Documentation
¶
Index ¶
- type ExtractedLine
- type ExtractedRectangle
- type GraphicsExtractor
- func (ge *GraphicsExtractor) ClassifyLines() LineClassification
- func (ge *GraphicsExtractor) Clear()
- func (ge *GraphicsExtractor) Extract(operations []contentstream.Operation) error
- func (ge *GraphicsExtractor) ExtractFromBytes(data []byte) error
- func (ge *GraphicsExtractor) GetFilteredLines() []ExtractedLine
- func (ge *GraphicsExtractor) GetFilteredRectangles() []ExtractedRectangle
- func (ge *GraphicsExtractor) GetGraphicsState() *GraphicsState
- func (ge *GraphicsExtractor) GetGridLines() GridLines
- func (ge *GraphicsExtractor) GetHorizontalLines() []ExtractedLine
- func (ge *GraphicsExtractor) GetLines() []ExtractedLine
- func (ge *GraphicsExtractor) GetRectangles() []ExtractedRectangle
- func (ge *GraphicsExtractor) GetStatistics() GraphicsStatistics
- func (ge *GraphicsExtractor) GetVerticalLines() []ExtractedLine
- func (ge *GraphicsExtractor) ToModelLines() []model.Line
- func (ge *GraphicsExtractor) ToModelRectangles() []model.Line
- type GraphicsState
- func (gs *GraphicsState) BeginText()
- func (gs *GraphicsState) Clone() *GraphicsState
- func (gs *GraphicsState) EndText()
- func (gs *GraphicsState) GetEffectiveFontSize() float64
- func (gs *GraphicsState) GetFontName() string
- func (gs *GraphicsState) GetFontSize() float64
- func (gs *GraphicsState) GetTextMatrix() model.Matrix
- func (gs *GraphicsState) GetTextPosition() (x, y float64)
- func (gs *GraphicsState) NextLine()
- func (gs *GraphicsState) Restore() error
- func (gs *GraphicsState) Save()
- func (gs *GraphicsState) SetCharSpacing(spacing float64)
- func (gs *GraphicsState) SetFillColorRGB(r, g, b float64)
- func (gs *GraphicsState) SetFont(name string, size float64)
- func (gs *GraphicsState) SetHorizontalScaling(scale float64)
- func (gs *GraphicsState) SetLeading(leading float64)
- func (gs *GraphicsState) SetLineWidth(width float64)
- func (gs *GraphicsState) SetRenderingMode(mode int)
- func (gs *GraphicsState) SetStrokeColorRGB(r, g, b float64)
- func (gs *GraphicsState) SetTextMatrix(m model.Matrix)
- func (gs *GraphicsState) SetTextRise(rise float64)
- func (gs *GraphicsState) SetWordSpacing(spacing float64)
- func (gs *GraphicsState) ShowText(text string) (dx, dy float64)
- func (gs *GraphicsState) ShowTextArray(array []interface{}) (dx, dy float64)
- func (gs *GraphicsState) ShowTextWithWidth(text string, width float64) (dx, dy float64)
- func (gs *GraphicsState) Transform(m model.Matrix)
- func (gs *GraphicsState) TranslateText(tx, ty float64)
- func (gs *GraphicsState) TranslateTextSetLeading(tx, ty float64)
- type GraphicsStatistics
- type GridLines
- type LineClassification
- type Path
- func (p *Path) Clear()
- func (p *Path) ClosePath()
- func (p *Path) CurveTo(x1, y1, x2, y2, x3, y3 float64)
- func (p *Path) CurveToV(x2, y2, x3, y3 float64)
- func (p *Path) CurveToY(x1, y1, x3, y3 float64)
- func (p *Path) IsEmpty() bool
- func (p *Path) LineTo(x, y float64)
- func (p *Path) MoveTo(x, y float64)
- func (p *Path) Rectangle(x, y, width, height float64)
- type PathExtractor
- func (pe *PathExtractor) Clear()
- func (pe *PathExtractor) CloseAndStroke()
- func (pe *PathExtractor) CloseFillAndStroke()
- func (pe *PathExtractor) CloseFillAndStrokeEvenOdd()
- func (pe *PathExtractor) ClosePath()
- func (pe *PathExtractor) CurveTo(x1, y1, x2, y2, x3, y3 float64)
- func (pe *PathExtractor) CurveToV(x2, y2, x3, y3 float64)
- func (pe *PathExtractor) CurveToY(x1, y1, x3, y3 float64)
- func (pe *PathExtractor) EndPath()
- func (pe *PathExtractor) Fill()
- func (pe *PathExtractor) FillAndStroke()
- func (pe *PathExtractor) FillAndStrokeEvenOdd()
- func (pe *PathExtractor) FillEvenOdd()
- func (pe *PathExtractor) FilterLinesByLength(minLength float64) []ExtractedLine
- func (pe *PathExtractor) FilterRectanglesBySize(minWidth, minHeight float64) []ExtractedRectangle
- func (pe *PathExtractor) GetHorizontalLines() []ExtractedLine
- func (pe *PathExtractor) GetLines() []ExtractedLine
- func (pe *PathExtractor) GetRectangles() []ExtractedRectangle
- func (pe *PathExtractor) GetVerticalLines() []ExtractedLine
- func (pe *PathExtractor) LineTo(x, y float64)
- func (pe *PathExtractor) MoveTo(x, y float64)
- func (pe *PathExtractor) Rectangle(x, y, width, height float64)
- func (pe *PathExtractor) Stroke()
- type PathSegment
- type PathSegmentType
- type TextState
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ExtractedLine ¶
type ExtractedLine struct {
// Start and end points in device space
Start model.Point
End model.Point
// Line attributes
Width float64
Color [3]float64
// Classification
IsHorizontal bool
IsVertical bool
// Original bounding box
BBox model.BBox
}
ExtractedLine represents a line extracted from PDF graphics
type ExtractedRectangle ¶
type ExtractedRectangle struct {
// Bounding box in device space
BBox model.BBox
// Rectangle attributes
StrokeWidth float64
StrokeColor [3]float64
FillColor [3]float64
IsFilled bool
IsStroked bool
}
ExtractedRectangle represents a rectangle extracted from PDF graphics
type GraphicsExtractor ¶
type GraphicsExtractor struct {
// Minimum dimensions for filtering
MinLineLength float64
MinRectWidth float64
MinRectHeight float64
// contains filtered or unexported fields
}
GraphicsExtractor extracts lines and rectangles from content streams
func NewGraphicsExtractor ¶
func NewGraphicsExtractor() *GraphicsExtractor
NewGraphicsExtractor creates a new graphics extractor
func (*GraphicsExtractor) ClassifyLines ¶
func (ge *GraphicsExtractor) ClassifyLines() LineClassification
ClassifyLines classifies lines by orientation
func (*GraphicsExtractor) Clear ¶
func (ge *GraphicsExtractor) Clear()
Clear resets the extractor for reuse
func (*GraphicsExtractor) Extract ¶
func (ge *GraphicsExtractor) Extract(operations []contentstream.Operation) error
Extract extracts graphics from content stream operations
func (*GraphicsExtractor) ExtractFromBytes ¶
func (ge *GraphicsExtractor) ExtractFromBytes(data []byte) error
ExtractFromBytes parses and extracts graphics from raw content stream data
func (*GraphicsExtractor) GetFilteredLines ¶
func (ge *GraphicsExtractor) GetFilteredLines() []ExtractedLine
GetFilteredLines returns lines meeting the minimum length requirement
func (*GraphicsExtractor) GetFilteredRectangles ¶
func (ge *GraphicsExtractor) GetFilteredRectangles() []ExtractedRectangle
GetFilteredRectangles returns rectangles meeting the minimum size requirements
func (*GraphicsExtractor) GetGraphicsState ¶
func (ge *GraphicsExtractor) GetGraphicsState() *GraphicsState
GetGraphicsState returns the current graphics state (useful for debugging)
func (*GraphicsExtractor) GetGridLines ¶
func (ge *GraphicsExtractor) GetGridLines() GridLines
GetGridLines returns horizontal and vertical lines suitable for table detection
func (*GraphicsExtractor) GetHorizontalLines ¶
func (ge *GraphicsExtractor) GetHorizontalLines() []ExtractedLine
GetHorizontalLines returns only horizontal lines
func (*GraphicsExtractor) GetLines ¶
func (ge *GraphicsExtractor) GetLines() []ExtractedLine
GetLines returns all extracted lines
func (*GraphicsExtractor) GetRectangles ¶
func (ge *GraphicsExtractor) GetRectangles() []ExtractedRectangle
GetRectangles returns all extracted rectangles
func (*GraphicsExtractor) GetStatistics ¶
func (ge *GraphicsExtractor) GetStatistics() GraphicsStatistics
GetStatistics returns statistics about extracted graphics
func (*GraphicsExtractor) GetVerticalLines ¶
func (ge *GraphicsExtractor) GetVerticalLines() []ExtractedLine
GetVerticalLines returns only vertical lines
func (*GraphicsExtractor) ToModelLines ¶
func (ge *GraphicsExtractor) ToModelLines() []model.Line
ToModelLines converts extracted lines to model.Line objects
func (*GraphicsExtractor) ToModelRectangles ¶
func (ge *GraphicsExtractor) ToModelRectangles() []model.Line
ToModelRectangles converts extracted rectangles to model.Line objects (with IsRect=true)
type GraphicsState ¶
type GraphicsState struct {
// Current Transformation Matrix
CTM model.Matrix
// Text state
Text TextState
// Line attributes
LineWidth float64
// Color (simplified - just RGB for now)
StrokeColor [3]float64
FillColor [3]float64
// contains filtered or unexported fields
}
GraphicsState represents the PDF graphics state
func NewGraphicsState ¶
func NewGraphicsState() *GraphicsState
NewGraphicsState creates a new graphics state with default values
func (*GraphicsState) BeginText ¶
func (gs *GraphicsState) BeginText()
BeginText initializes text state (BT operator)
func (*GraphicsState) Clone ¶
func (gs *GraphicsState) Clone() *GraphicsState
Clone creates a deep copy of the graphics state
func (*GraphicsState) EndText ¶
func (gs *GraphicsState) EndText()
EndText does nothing for now (ET operator)
func (*GraphicsState) GetEffectiveFontSize ¶
func (gs *GraphicsState) GetEffectiveFontSize() float64
GetEffectiveFontSize returns the font size accounting for text matrix transformations The text matrix can scale the font even when the Tf operator uses size=1
func (*GraphicsState) GetFontName ¶
func (gs *GraphicsState) GetFontName() string
GetFontName returns the current font name
func (*GraphicsState) GetFontSize ¶
func (gs *GraphicsState) GetFontSize() float64
GetFontSize returns the current font size
func (*GraphicsState) GetTextMatrix ¶
func (gs *GraphicsState) GetTextMatrix() model.Matrix
GetTextMatrix returns the current text matrix
func (*GraphicsState) GetTextPosition ¶
func (gs *GraphicsState) GetTextPosition() (x, y float64)
GetTextPosition returns the current text position in device space
func (*GraphicsState) NextLine ¶
func (gs *GraphicsState) NextLine()
NextLine moves to next line (T* operator)
func (*GraphicsState) Restore ¶
func (gs *GraphicsState) Restore() error
Restore pops a graphics state from the stack (Q operator)
func (*GraphicsState) Save ¶
func (gs *GraphicsState) Save()
Save pushes the current graphics state onto the stack (q operator)
func (*GraphicsState) SetCharSpacing ¶
func (gs *GraphicsState) SetCharSpacing(spacing float64)
SetCharSpacing sets character spacing (Tc operator)
func (*GraphicsState) SetFillColorRGB ¶
func (gs *GraphicsState) SetFillColorRGB(r, g, b float64)
SetFillColorRGB sets the fill color (rg operator)
func (*GraphicsState) SetFont ¶
func (gs *GraphicsState) SetFont(name string, size float64)
SetFont sets the current font (Tf operator)
func (*GraphicsState) SetHorizontalScaling ¶
func (gs *GraphicsState) SetHorizontalScaling(scale float64)
SetHorizontalScaling sets horizontal scaling (Tz operator)
func (*GraphicsState) SetLeading ¶
func (gs *GraphicsState) SetLeading(leading float64)
SetLeading sets text leading (TL operator)
func (*GraphicsState) SetLineWidth ¶
func (gs *GraphicsState) SetLineWidth(width float64)
SetLineWidth sets the line width (w operator)
func (*GraphicsState) SetRenderingMode ¶
func (gs *GraphicsState) SetRenderingMode(mode int)
SetRenderingMode sets text rendering mode (Tr operator)
func (*GraphicsState) SetStrokeColorRGB ¶
func (gs *GraphicsState) SetStrokeColorRGB(r, g, b float64)
SetStrokeColorRGB sets the stroke color (RG operator)
func (*GraphicsState) SetTextMatrix ¶
func (gs *GraphicsState) SetTextMatrix(m model.Matrix)
SetTextMatrix sets the text matrix (Tm operator)
func (*GraphicsState) SetTextRise ¶
func (gs *GraphicsState) SetTextRise(rise float64)
SetTextRise sets text rise (Ts operator)
func (*GraphicsState) SetWordSpacing ¶
func (gs *GraphicsState) SetWordSpacing(spacing float64)
SetWordSpacing sets word spacing (Tw operator)
func (*GraphicsState) ShowText ¶
func (gs *GraphicsState) ShowText(text string) (dx, dy float64)
ShowText updates position after showing text (Tj operator) Returns the displacement caused by the text
func (*GraphicsState) ShowTextArray ¶
func (gs *GraphicsState) ShowTextArray(array []interface{}) (dx, dy float64)
ShowTextArray shows text with positioning adjustments (TJ operator) Returns the displacement caused by the text
func (*GraphicsState) ShowTextWithWidth ¶
func (gs *GraphicsState) ShowTextWithWidth(text string, width float64) (dx, dy float64)
ShowTextWithWidth updates position after showing text with a known width width should be the total width of the text glyphs in user space units
func (*GraphicsState) Transform ¶
func (gs *GraphicsState) Transform(m model.Matrix)
Transform applies a transformation matrix to CTM (cm operator)
func (*GraphicsState) TranslateText ¶
func (gs *GraphicsState) TranslateText(tx, ty float64)
TranslateText translates the text matrix (Td operator)
func (*GraphicsState) TranslateTextSetLeading ¶
func (gs *GraphicsState) TranslateTextSetLeading(tx, ty float64)
TranslateTextSetLeading translates text and sets leading (TD operator)
type GraphicsStatistics ¶
type GraphicsStatistics struct {
TotalLines int
HorizontalLines int
VerticalLines int
DiagonalLines int
TotalRectangles int
FilledRectangles int
StrokedRectangles int
}
Statistics provides statistics about extracted graphics
type GridLines ¶
type GridLines struct {
Horizontals []ExtractedLine
Verticals []ExtractedLine
}
GridLines represents horizontal and vertical lines that could form a table grid
type LineClassification ¶
type LineClassification struct {
HorizontalLines []ExtractedLine
VerticalLines []ExtractedLine
DiagonalLines []ExtractedLine
}
LineClassification provides classification of extracted lines
type Path ¶
type Path struct {
// Segments contains all the path segments
Segments []PathSegment
// CurrentPoint is the current point in user space
CurrentPoint model.Point
// SubpathStart is the start of the current subpath (for closepath)
SubpathStart model.Point
// HasCurrentPoint indicates if a current point has been set
HasCurrentPoint bool
}
Path represents a graphics path being constructed
func (*Path) ClosePath ¶
func (p *Path) ClosePath()
ClosePath closes the current subpath (h operator)
func (*Path) CurveTo ¶
CurveTo appends a cubic Bézier curve (c operator) Control points (x1, y1) and (x2, y2), end point (x3, y3)
func (*Path) CurveToV ¶
CurveToV appends a cubic Bézier curve with first control point = current point (v operator)
func (*Path) CurveToY ¶
CurveToY appends a cubic Bézier curve with second control point = end point (y operator)
type PathExtractor ¶
type PathExtractor struct {
// Collected graphics elements
Lines []ExtractedLine
Rectangles []ExtractedRectangle
// Tolerance for horizontal/vertical classification (in points)
AngleTolerance float64
// contains filtered or unexported fields
}
PathExtractor extracts lines and rectangles from paths
func NewPathExtractor ¶
func NewPathExtractor(gs *GraphicsState) *PathExtractor
NewPathExtractor creates a new path extractor
func (*PathExtractor) Clear ¶
func (pe *PathExtractor) Clear()
Clear clears all extracted elements and the current path
func (*PathExtractor) CloseAndStroke ¶
func (pe *PathExtractor) CloseAndStroke()
CloseAndStroke handles the s operator (close path and stroke)
func (*PathExtractor) CloseFillAndStroke ¶
func (pe *PathExtractor) CloseFillAndStroke()
CloseFillAndStroke handles the b operator
func (*PathExtractor) CloseFillAndStrokeEvenOdd ¶
func (pe *PathExtractor) CloseFillAndStrokeEvenOdd()
CloseFillAndStrokeEvenOdd handles the b* operator
func (*PathExtractor) ClosePath ¶
func (pe *PathExtractor) ClosePath()
ClosePath handles the h operator
func (*PathExtractor) CurveTo ¶
func (pe *PathExtractor) CurveTo(x1, y1, x2, y2, x3, y3 float64)
CurveTo handles the c operator
func (*PathExtractor) CurveToV ¶
func (pe *PathExtractor) CurveToV(x2, y2, x3, y3 float64)
CurveToV handles the v operator
func (*PathExtractor) CurveToY ¶
func (pe *PathExtractor) CurveToY(x1, y1, x3, y3 float64)
CurveToY handles the y operator
func (*PathExtractor) EndPath ¶
func (pe *PathExtractor) EndPath()
EndPath handles the n operator (end path without filling or stroking)
func (*PathExtractor) Fill ¶
func (pe *PathExtractor) Fill()
Fill handles the f/F operator (fill path)
func (*PathExtractor) FillAndStroke ¶
func (pe *PathExtractor) FillAndStroke()
FillAndStroke handles the B operator (fill and stroke)
func (*PathExtractor) FillAndStrokeEvenOdd ¶
func (pe *PathExtractor) FillAndStrokeEvenOdd()
FillAndStrokeEvenOdd handles the B* operator
func (*PathExtractor) FillEvenOdd ¶
func (pe *PathExtractor) FillEvenOdd()
FillEvenOdd handles the f* operator (fill with even-odd rule)
func (*PathExtractor) FilterLinesByLength ¶
func (pe *PathExtractor) FilterLinesByLength(minLength float64) []ExtractedLine
FilterLinesByLength filters lines by minimum length
func (*PathExtractor) FilterRectanglesBySize ¶
func (pe *PathExtractor) FilterRectanglesBySize(minWidth, minHeight float64) []ExtractedRectangle
FilterRectanglesBySize filters rectangles by minimum dimensions
func (*PathExtractor) GetHorizontalLines ¶
func (pe *PathExtractor) GetHorizontalLines() []ExtractedLine
GetHorizontalLines returns only horizontal lines
func (*PathExtractor) GetLines ¶
func (pe *PathExtractor) GetLines() []ExtractedLine
GetLines returns all extracted lines
func (*PathExtractor) GetRectangles ¶
func (pe *PathExtractor) GetRectangles() []ExtractedRectangle
GetRectangles returns all extracted rectangles
func (*PathExtractor) GetVerticalLines ¶
func (pe *PathExtractor) GetVerticalLines() []ExtractedLine
GetVerticalLines returns only vertical lines
func (*PathExtractor) LineTo ¶
func (pe *PathExtractor) LineTo(x, y float64)
LineTo handles the l operator
func (*PathExtractor) MoveTo ¶
func (pe *PathExtractor) MoveTo(x, y float64)
MoveTo handles the m operator
func (*PathExtractor) Rectangle ¶
func (pe *PathExtractor) Rectangle(x, y, width, height float64)
Rectangle handles the re operator
func (*PathExtractor) Stroke ¶
func (pe *PathExtractor) Stroke()
Stroke handles the S operator (stroke path)
type PathSegment ¶
type PathSegment struct {
Type PathSegmentType
// For MoveTo and LineTo: single point
// For CurveTo: control point 1, control point 2, end point
Points []model.Point
}
PathSegment represents a single segment of a path
type PathSegmentType ¶
type PathSegmentType int
PathSegmentType defines the type of path segment
const ( // PathMoveTo starts a new subpath PathMoveTo PathSegmentType = iota // PathLineTo draws a line to a point PathLineTo // PathCurveTo draws a cubic Bézier curve PathCurveTo // PathClosePath closes the current subpath PathClosePath )
type TextState ¶
type TextState struct {
// Font and size
FontName string
FontSize float64
// Character and word spacing
CharSpacing float64
WordSpacing float64
// Horizontal scaling (percentage)
HorizontalScaling float64
// Leading (line spacing)
Leading float64
// Text rendering mode
RenderingMode int
// Text rise
Rise float64
// Text matrices
TextMatrix model.Matrix
TextLineMatrix model.Matrix
}
TextState represents text-specific state