Documentation
¶
Overview ¶
Package elements provides enhanced element processing functionality This module handles code block processing including syntax highlighting, language detection, and code formatting
Package elements provides enhanced element processing functionality This module handles footnote processing including detection, linking, and accessibility improvements
Package elements provides enhanced element processing functionality This module handles heading processing including navigation element removal, anchor link cleanup, and text content extraction
Package elements provides enhanced element processing functionality. This module handles image processing: picture collapse, lazy-load resolution, span→figure conversion, and caption normalization — matching the TypeScript defuddle imageRules transforms.
Package elements provides enhanced element processing functionality This module handles mathematical formula processing including MathML extraction, LaTeX conversion, and math display normalization
Index ¶
- Variables
- func ProcessCodeBlocks(doc *goquery.Document, options *CodeBlockProcessingOptions)
- func ProcessCodeBlocksInScope(scope *goquery.Selection, options *CodeBlockProcessingOptions)
- func ProcessHeadings(doc *goquery.Document, options *HeadingProcessingOptions)
- func ProcessHeadingsInScope(scope *goquery.Selection, options *HeadingProcessingOptions)
- func ProcessImages(doc *goquery.Document, options *ImageProcessingOptions)
- func ProcessImagesInScope(scope *goquery.Selection, options *ImageProcessingOptions)
- func ProcessMath(doc *goquery.Document, options *MathProcessingOptions)
- func ProcessMathInScope(scope *goquery.Selection, options *MathProcessingOptions)
- func StandardizeFootnotes(doc *goquery.Document)
- func StandardizeFootnotesInScope(doc *goquery.Document, scope *goquery.Selection)
- type CodeBlockProcessingOptions
- type CodeBlockProcessor
- type Footnote
- type FootnoteProcessingOptions
- type FootnoteProcessor
- func (p *FootnoteProcessor) CleanupFootnotes(footnotes []*Footnote) []*Footnote
- func (p *FootnoteProcessor) GetFootnotes() []*Footnote
- func (p *FootnoteProcessor) HasFootnotes() bool
- func (p *FootnoteProcessor) ProcessFootnotes(options *FootnoteProcessingOptions) []*Footnote
- func (p *FootnoteProcessor) StandardizeFootnotes(scope *goquery.Selection)
- type HeadingProcessingOptions
- type HeadingProcessor
- type ImageProcessingOptions
- type ImageProcessor
- type MathData
- type MathProcessingOptions
- type MathProcessor
- type RoleProcessingOptions
- type RoleProcessor
Constants ¶
This section is empty.
Variables ¶
var FootnoteInlineReferences = strings.Join([]string{
`sup.reference`,
`cite.ltx_cite`,
`sup[id^="fnr"]`,
`span[id^="fnr"]`,
`span[class*="footnote_ref"]`,
`span[class*="footnote-ref"]`,
`span.footnote-link`,
`a.citation`,
`a[id^="ref-link"]`,
`a[href^="#fn"]`,
`a[href^="#cite"]`,
`a[href^="#reference"]`,
`a[href^="#footnote"]`,
`a[href^="#r"]`,
`a[href^="#b"]`,
`a[href*="cite_note"]`,
`a[href*="cite_ref"]`,
`a.footnote-anchor`,
`a.footnote`,
`a[role="doc-biblioref"]`,
`a[id^="fnref"]`,
`.footnote-ref`,
`sup a[href^="#"]`,
}, ", ")
FootnoteInlineReferences matches inline footnote reference elements. Ported from TypeScript FOOTNOTE_INLINE_REFERENCES.
var FootnoteListSelectors = strings.Join([]string{
`div.footnote ol`,
`div.footnotes ol`,
`div[role="doc-endnotes"]`,
`div[role="doc-footnotes"]`,
`ol.footnotes-list`,
`ol.footnotes`,
`ol.references`,
`ol[class*="article-references"]`,
`section.footnotes ol`,
`section[role="doc-endnotes"]`,
`section[role="doc-footnotes"]`,
`section[role="doc-bibliography"]`,
`ul.footnotes-list`,
`ul.ltx_biblist`,
`div.footnote[data-component-name="FootnoteToDOM"]`,
}, ", ")
FootnoteListSelectors matches footnote definition list containers. Ported from TypeScript FOOTNOTE_LIST_SELECTORS.
Functions ¶
func ProcessCodeBlocks ¶
func ProcessCodeBlocks(doc *goquery.Document, options *CodeBlockProcessingOptions)
ProcessCodeBlocks processes all code blocks in the document (public interface) TypeScript original code:
export function processCodeBlocks(doc: Document, options?: CodeBlockOptions): void {
const processor = new CodeBlockProcessor(doc);
processor.processAllCodeBlocks(options || defaultOptions);
}
func ProcessCodeBlocksInScope ¶
func ProcessCodeBlocksInScope(scope *goquery.Selection, options *CodeBlockProcessingOptions)
ProcessCodeBlocksInScope processes code blocks within the given container element.
func ProcessHeadings ¶
func ProcessHeadings(doc *goquery.Document, options *HeadingProcessingOptions)
ProcessHeadings processes all headings in the document (public interface) TypeScript original code:
export function processHeadings(doc: Document, options?: HeadingOptions): void {
const processor = new HeadingProcessor(doc);
processor.processAllHeadings(options || defaultOptions);
}
func ProcessHeadingsInScope ¶
func ProcessHeadingsInScope(scope *goquery.Selection, options *HeadingProcessingOptions)
ProcessHeadingsInScope processes headings within the given container element.
func ProcessImages ¶
func ProcessImages(doc *goquery.Document, options *ImageProcessingOptions)
ProcessImages processes all images in the document.
func ProcessImagesInScope ¶
func ProcessImagesInScope(scope *goquery.Selection, options *ImageProcessingOptions)
ProcessImagesInScope processes images within the given container element, applying content-cleanup transforms matching the TypeScript imageRules.
func ProcessMath ¶
func ProcessMath(doc *goquery.Document, options *MathProcessingOptions)
ProcessMath processes all mathematical formulas in the document (public interface) TypeScript original code:
export function processMath(doc: Document, options?: MathOptions): void {
const processor = new MathProcessor(doc);
processor.processAllMath(options || defaultOptions);
}
func ProcessMathInScope ¶
func ProcessMathInScope(scope *goquery.Selection, options *MathProcessingOptions)
ProcessMathInScope processes mathematical formulas within the given container element.
func StandardizeFootnotes ¶
StandardizeFootnotes is the public entry point that creates a FootnoteProcessor and runs StandardizeFootnotes on the document body (or the document root if there is no body element). TypeScript original code:
export function standardizeFootnotes(element: any): void {
const doc = element.ownerDocument;
const handler = new FootnoteHandler(doc);
handler.standardizeFootnotes(element);
}
func StandardizeFootnotesInScope ¶
StandardizeFootnotesInScope runs footnote standardization on a pre-selected scope element rather than the entire document body. Use this when content has already been extracted to a specific subtree.
Types ¶
type CodeBlockProcessingOptions ¶
CodeBlockProcessingOptions contains options for code block processing TypeScript original code:
interface CodeBlockOptions {
detectLanguage?: boolean;
formatCode?: boolean;
addLineNumbers?: boolean;
enableSyntaxHighlight?: boolean;
wrapInPre?: boolean;
}
func DefaultCodeBlockProcessingOptions ¶
func DefaultCodeBlockProcessingOptions() *CodeBlockProcessingOptions
DefaultCodeBlockProcessingOptions returns default options for code block processing TypeScript original code:
const defaultOptions: CodeBlockOptions = {
detectLanguage: true,
formatCode: true,
addLineNumbers: false,
enableSyntaxHighlight: true,
wrapInPre: true
};
type CodeBlockProcessor ¶
type CodeBlockProcessor struct {
// contains filtered or unexported fields
}
CodeBlockProcessor handles code block processing and enhancement TypeScript original code:
class CodeBlockProcessor {
constructor(private document: Document) {}
}
func NewCodeBlockProcessor ¶
func NewCodeBlockProcessor(doc *goquery.Document) *CodeBlockProcessor
NewCodeBlockProcessor creates a new code block processor TypeScript original code: constructor(private doc: Document) {}
func (*CodeBlockProcessor) ProcessCodeBlocks ¶
func (p *CodeBlockProcessor) ProcessCodeBlocks(options *CodeBlockProcessingOptions)
ProcessCodeBlocks processes all code blocks in the document TypeScript original code: export const codeBlockRules = [
{
selector: [
'pre',
'div[class*="prismjs"]',
'.syntaxhighlighter',
'.highlight',
'.highlight-source',
'.wp-block-syntaxhighlighter-code',
'.wp-block-code',
'div[class*="language-"]'
].join(', '),
element: 'pre',
transform: (el: Element, doc: Document): Element => {
// Processing logic here
}
}
];
type Footnote ¶
type Footnote struct {
ID string
Number int
Reference *goquery.Selection
Definition *goquery.Selection
Content string
RefText string
Linked bool
}
Footnote represents a footnote with its reference and definition TypeScript original code:
interface FootnoteData {
content: any;
originalId: string;
refs: string[];
}
func ProcessFootnotes ¶
func ProcessFootnotes(doc *goquery.Document, options *FootnoteProcessingOptions) []*Footnote
ProcessFootnotes processes all footnotes in the document (public interface) TypeScript original code:
export function standardizeFootnotes(element: any): void {
const handler = new FootnoteHandler(element.ownerDocument);
handler.standardizeFootnotes(element);
}
type FootnoteProcessingOptions ¶
type FootnoteProcessingOptions struct {
DetectFootnotes bool
LinkFootnotes bool
ImproveAccessibility bool
GenerateSection bool
NumberFootnotes bool
FootnotePrefix string
SectionTitle string
SectionLocation string // "end", "after-content", "custom"
}
FootnoteProcessingOptions contains options for footnote processing TypeScript original code:
interface FootnoteData {
content: any;
originalId: string;
refs: string[];
}
interface FootnoteCollection {
[footnoteNumber: number]: FootnoteData;
}
func DefaultFootnoteProcessingOptions ¶
func DefaultFootnoteProcessingOptions() *FootnoteProcessingOptions
DefaultFootnoteProcessingOptions returns default options for footnote processing TypeScript original code:
const defaultOptions = {
detectFootnotes: true,
linkFootnotes: true,
improveAccessibility: true,
generateSection: true,
numberFootnotes: true
};
type FootnoteProcessor ¶
type FootnoteProcessor struct {
// contains filtered or unexported fields
}
FootnoteProcessor handles footnote processing and enhancement TypeScript original code:
class FootnoteHandler {
private doc: any;
constructor(doc: any) {
this.doc = doc;
}
}
func NewFootnoteProcessor ¶
func NewFootnoteProcessor(doc *goquery.Document) *FootnoteProcessor
NewFootnoteProcessor creates a new footnote processor TypeScript original code:
constructor(doc: any) {
this.doc = doc;
}
func (*FootnoteProcessor) CleanupFootnotes ¶
func (p *FootnoteProcessor) CleanupFootnotes(footnotes []*Footnote) []*Footnote
CleanupFootnotes removes duplicate and invalid footnotes TypeScript original code:
cleanupFootnotes(footnotes: Footnote[]): Footnote[] {
const uniqueFootnotes = new Map();
const cleaned = [];
for (const footnote of footnotes) {
if (!uniqueFootnotes.has(footnote.id) && footnote.isValid()) {
uniqueFootnotes.set(footnote.id, footnote);
cleaned.push(footnote);
}
}
return cleaned;
}
func (*FootnoteProcessor) GetFootnotes ¶
func (p *FootnoteProcessor) GetFootnotes() []*Footnote
GetFootnotes returns all footnotes found in the document TypeScript original code:
getFootnotes(): Footnote[] {
return this.footnotes;
}
func (*FootnoteProcessor) HasFootnotes ¶
func (p *FootnoteProcessor) HasFootnotes() bool
HasFootnotes checks if the document has footnotes TypeScript original code:
hasFootnotes(): boolean {
return this.footnotes.length > 0;
}
func (*FootnoteProcessor) ProcessFootnotes ¶
func (p *FootnoteProcessor) ProcessFootnotes(options *FootnoteProcessingOptions) []*Footnote
ProcessFootnotes processes all footnotes in the document TypeScript original code:
standardizeFootnotes(element: any) {
const footnotes = this.collectFootnotes(element);
// Standardize inline footnotes using the collected IDs
const footnoteInlineReferences = element.querySelectorAll(FOOTNOTE_INLINE_REFERENCES);
// Process all footnote references and definitions
}
func (*FootnoteProcessor) StandardizeFootnotes ¶
func (p *FootnoteProcessor) StandardizeFootnotes(scope *goquery.Selection)
StandardizeFootnotes rewrites all inline references and footnote definitions into the canonical form: <sup id="fnref:N"><a href="#fn:N">N</a></sup> for references and <div id="footnotes"><ol><li id="fn:N">…</li></ol></div> for definitions. It is the Go port of the TypeScript standardizeFootnotes method. TypeScript original code:
standardizeFootnotes(element: any) {
const footnotes = this.collectFootnotes(element);
const refs = element.querySelectorAll(FOOTNOTE_INLINE_REFERENCES);
const supGroups = new Map();
refs.forEach(el => { ... supGroups / replaceWith ... });
supGroups.forEach((refs, container) => { ... });
// rebuild list, remove originals, append new div#footnotes
}
type HeadingProcessingOptions ¶
type HeadingProcessingOptions struct {
PreserveStructure bool
}
HeadingProcessingOptions contains options for heading processing TypeScript original code:
interface HeadingOptions {
removeNavigation?: boolean;
preserveStructure?: boolean;
allowedAttributes?: string[];
}
func DefaultHeadingProcessingOptions ¶
func DefaultHeadingProcessingOptions() *HeadingProcessingOptions
DefaultHeadingProcessingOptions returns default options for heading processing TypeScript original code:
const defaultOptions: HeadingOptions = {
removeNavigation: true,
preserveStructure: true,
allowedAttributes: ['id', 'class', 'data-*']
};
type HeadingProcessor ¶
type HeadingProcessor struct {
// contains filtered or unexported fields
}
HeadingProcessor handles heading processing and enhancement TypeScript original code: export const headingRules = [
{
selector: 'h1, h2, h3, h4, h5, h6',
element: 'keep',
transform: (el: Element): Element => {
// Processing logic here
}
}
];
func NewHeadingProcessor ¶
func NewHeadingProcessor(doc *goquery.Document) *HeadingProcessor
NewHeadingProcessor creates a new heading processor TypeScript original code:
class HeadingProcessor {
constructor(private document: Document) {}
}
func (*HeadingProcessor) ProcessHeadings ¶
func (p *HeadingProcessor) ProcessHeadings(options *HeadingProcessingOptions)
ProcessHeadings processes all headings in the document TypeScript original code: export const headingRules = [
{
selector: 'h1, h2, h3, h4, h5, h6',
element: 'keep',
transform: (el: Element): Element => {
// Processing logic
}
}
];
type ImageProcessingOptions ¶
type ImageProcessingOptions struct {
EnableLazyLoading bool
EnableResponsive bool
GenerateAltText bool
OptimizeImages bool
RemoveSmallImages bool
MinImageWidth int
MinImageHeight int
MaxImageWidth int
MaxImageHeight int
}
ImageProcessingOptions contains options for image processing.
func DefaultImageProcessingOptions ¶
func DefaultImageProcessingOptions() *ImageProcessingOptions
DefaultImageProcessingOptions returns default options for image processing.
type ImageProcessor ¶
type ImageProcessor struct {
// contains filtered or unexported fields
}
ImageProcessor handles image processing and enhancement.
func NewImageProcessor ¶
func NewImageProcessor(doc *goquery.Document) *ImageProcessor
NewImageProcessor creates a new image processor.
func (*ImageProcessor) ProcessImages ¶
func (p *ImageProcessor) ProcessImages(options *ImageProcessingOptions)
ProcessImages applies all image transforms to the document.
type MathData ¶
type MathData struct {
MathML string `json:"mathml,omitempty"`
LaTeX string `json:"latex,omitempty"`
Type string `json:"type,omitempty"`
Display string `json:"display,omitempty"`
}
MathData represents extracted mathematical content TypeScript original code:
export interface MathData {
mathml?: string;
latex?: string;
type?: 'katex' | 'mathjax' | 'mathml' | 'latex';
display?: 'block' | 'inline';
}
type MathProcessingOptions ¶
type MathProcessingOptions struct {
ExtractMathML bool
ExtractLaTeX bool
CleanupScripts bool
PreserveDisplay bool
}
MathProcessingOptions contains options for math processing TypeScript original code:
interface MathOptions {
extractMathML?: boolean;
extractLaTeX?: boolean;
cleanupScripts?: boolean;
preserveDisplay?: boolean;
}
func DefaultMathProcessingOptions ¶
func DefaultMathProcessingOptions() *MathProcessingOptions
DefaultMathProcessingOptions returns default options for math processing TypeScript original code:
const defaultOptions: MathOptions = {
extractMathML: true,
extractLaTeX: true,
cleanupScripts: true,
preserveDisplay: true
};
type MathProcessor ¶
type MathProcessor struct {
// contains filtered or unexported fields
}
MathProcessor handles mathematical formula processing and enhancement TypeScript original code: export const mathRules = [
{
selector: mathSelectors,
element: 'math',
transform: (el: Element, doc: Document): Element => {
// Processing logic here
}
}
];
func NewMathProcessor ¶
func NewMathProcessor(doc *goquery.Document) *MathProcessor
NewMathProcessor creates a new math processor TypeScript original code:
class MathProcessor {
constructor(private document: Document) {}
}
func (*MathProcessor) ProcessMath ¶
func (p *MathProcessor) ProcessMath(options *MathProcessingOptions)
ProcessMath processes all mathematical formulas in the document TypeScript original code: export const mathRules = [
{
selector: mathSelectors,
element: 'math',
transform: (el: Element, doc: Document): Element => {
const mathData = getMathMLFromElement(el);
const latex = getLatexFromElement(el);
const isBlock = isBlockDisplay(el);
const cleanMathEl = createCleanMathEl(doc, mathData, latex, isBlock);
// Cleanup logic...
}
}
];
type RoleProcessingOptions ¶
type RoleProcessingOptions struct {
ConvertParagraphs bool
ConvertLists bool
ConvertButtons bool
ConvertLinks bool
}
RoleProcessingOptions configures role processing behavior
func DefaultRoleProcessingOptions ¶
func DefaultRoleProcessingOptions() *RoleProcessingOptions
DefaultRoleProcessingOptions returns default options for role processing
type RoleProcessor ¶
type RoleProcessor struct {
// contains filtered or unexported fields
}
RoleProcessor handles conversion of ARIA roles to semantic HTML elements
func NewRoleProcessor ¶
func NewRoleProcessor(doc *goquery.Document) *RoleProcessor
NewRoleProcessor creates a new role processor
func (*RoleProcessor) ProcessRoles ¶
func (p *RoleProcessor) ProcessRoles(options *RoleProcessingOptions)
ProcessRoles processes all role-based elements in the document