Documentation
¶
Overview ¶
Package graphicsstate provides PDF graphics state management.
The PDF graphics state controls how content is rendered, including transformation matrices, colors, line properties, and text state. This package implements the state stack used during content stream processing.
Graphics State ¶
The main type is GraphicsState, which tracks:
- CTM (Current Transformation Matrix) for coordinate transformations
- Line properties (width, cap, join)
- Colors (stroke and fill)
- Text state (font, size, spacing, matrices)
Example usage:
gs := graphicsstate.NewGraphicsState()
gs.Save() // Push state (q operator)
gs.Transform(matrix) // Modify CTM (cm operator)
gs.SetFont("F1", 12) // Set font (Tf operator)
gs.Restore() // Pop state (Q operator)
Text State ¶
Text rendering uses a separate TextState structure that tracks:
- Font name and size (Tf operator)
- Character and word spacing (Tc, Tw operators)
- Horizontal scaling (Tz operator)
- Leading for line spacing (TL operator)
- Text and text line matrices (Tm, Td operators)
Path Operations ¶
The package also includes path construction and painting support for extracting line graphics used in table detection:
- MoveTo, LineTo, CurveTo for path construction
- Rectangle for rect operator
- Stroke, Fill for path painting
Index ¶
- type ExtractedLine
- type ExtractedRectangle
- type GraphicsExtractor
- func (ge *GraphicsExtractor) ClassifyLines() LineClassification
- func (ge *GraphicsExtractor) Clear()
- func (ge *GraphicsExtractor) Extract(operations []contentstream.Operation) error
- func (ge *GraphicsExtractor) ExtractFromBytes(data []byte) error
- func (ge *GraphicsExtractor) GetFilteredLines() []ExtractedLine
- func (ge *GraphicsExtractor) GetFilteredRectangles() []ExtractedRectangle
- func (ge *GraphicsExtractor) GetGraphicsState() *GraphicsState
- func (ge *GraphicsExtractor) GetGridLines() GridLines
- func (ge *GraphicsExtractor) GetHorizontalLines() []ExtractedLine
- func (ge *GraphicsExtractor) GetLines() []ExtractedLine
- func (ge *GraphicsExtractor) GetRectangles() []ExtractedRectangle
- func (ge *GraphicsExtractor) GetStatistics() GraphicsStatistics
- func (ge *GraphicsExtractor) GetVerticalLines() []ExtractedLine
- func (ge *GraphicsExtractor) ToModelLines() []model.Line
- func (ge *GraphicsExtractor) ToModelRectangles() []model.Line
- type GraphicsState
- func (gs *GraphicsState) BeginText()
- func (gs *GraphicsState) Clone() *GraphicsState
- func (gs *GraphicsState) EndText()
- func (gs *GraphicsState) GetEffectiveFontSize() float64
- func (gs *GraphicsState) GetFontName() string
- func (gs *GraphicsState) GetFontSize() float64
- func (gs *GraphicsState) GetTextMatrix() model.Matrix
- func (gs *GraphicsState) GetTextPosition() (x, y float64)
- func (gs *GraphicsState) NextLine()
- func (gs *GraphicsState) Restore() error
- func (gs *GraphicsState) Save()
- func (gs *GraphicsState) SetCharSpacing(spacing float64)
- func (gs *GraphicsState) SetFillColorRGB(r, g, b float64)
- func (gs *GraphicsState) SetFont(name string, size float64)
- func (gs *GraphicsState) SetHorizontalScaling(scale float64)
- func (gs *GraphicsState) SetLeading(leading float64)
- func (gs *GraphicsState) SetLineWidth(width float64)
- func (gs *GraphicsState) SetRenderingMode(mode int)
- func (gs *GraphicsState) SetStrokeColorRGB(r, g, b float64)
- func (gs *GraphicsState) SetTextMatrix(m model.Matrix)
- func (gs *GraphicsState) SetTextRise(rise float64)
- func (gs *GraphicsState) SetWordSpacing(spacing float64)
- func (gs *GraphicsState) ShowText(text string) (dx, dy float64)
- func (gs *GraphicsState) ShowTextArray(array []interface{}) (dx, dy float64)
- func (gs *GraphicsState) ShowTextWithWidth(text string, width float64) (dx, dy float64)
- func (gs *GraphicsState) Transform(m model.Matrix)
- func (gs *GraphicsState) TranslateText(tx, ty float64)
- func (gs *GraphicsState) TranslateTextSetLeading(tx, ty float64)
- type GraphicsStatistics
- type GridLines
- type LineClassification
- type Path
- func (p *Path) Clear()
- func (p *Path) ClosePath()
- func (p *Path) CurveTo(x1, y1, x2, y2, x3, y3 float64)
- func (p *Path) CurveToV(x2, y2, x3, y3 float64)
- func (p *Path) CurveToY(x1, y1, x3, y3 float64)
- func (p *Path) IsEmpty() bool
- func (p *Path) LineTo(x, y float64)
- func (p *Path) MoveTo(x, y float64)
- func (p *Path) Rectangle(x, y, width, height float64)
- type PathExtractor
- func (pe *PathExtractor) Clear()
- func (pe *PathExtractor) CloseAndStroke()
- func (pe *PathExtractor) CloseFillAndStroke()
- func (pe *PathExtractor) CloseFillAndStrokeEvenOdd()
- func (pe *PathExtractor) ClosePath()
- func (pe *PathExtractor) CurveTo(x1, y1, x2, y2, x3, y3 float64)
- func (pe *PathExtractor) CurveToV(x2, y2, x3, y3 float64)
- func (pe *PathExtractor) CurveToY(x1, y1, x3, y3 float64)
- func (pe *PathExtractor) EndPath()
- func (pe *PathExtractor) Fill()
- func (pe *PathExtractor) FillAndStroke()
- func (pe *PathExtractor) FillAndStrokeEvenOdd()
- func (pe *PathExtractor) FillEvenOdd()
- func (pe *PathExtractor) FilterLinesByLength(minLength float64) []ExtractedLine
- func (pe *PathExtractor) FilterRectanglesBySize(minWidth, minHeight float64) []ExtractedRectangle
- func (pe *PathExtractor) GetHorizontalLines() []ExtractedLine
- func (pe *PathExtractor) GetLines() []ExtractedLine
- func (pe *PathExtractor) GetRectangles() []ExtractedRectangle
- func (pe *PathExtractor) GetVerticalLines() []ExtractedLine
- func (pe *PathExtractor) LineTo(x, y float64)
- func (pe *PathExtractor) MoveTo(x, y float64)
- func (pe *PathExtractor) Rectangle(x, y, width, height float64)
- func (pe *PathExtractor) Stroke()
- type PathSegment
- type PathSegmentType
- type TextState
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ExtractedLine ¶
type ExtractedLine struct {
// Start and end points in device space
Start model.Point
End model.Point
// Line attributes
Width float64
Color [3]float64
// Classification
IsHorizontal bool
IsVertical bool
// Original bounding box
BBox model.BBox
}
ExtractedLine represents a line extracted from PDF graphics
type ExtractedRectangle ¶
type ExtractedRectangle struct {
// Bounding box in device space
BBox model.BBox
// Rectangle attributes
StrokeWidth float64
StrokeColor [3]float64
FillColor [3]float64
IsFilled bool
IsStroked bool
}
ExtractedRectangle represents a rectangle extracted from PDF graphics
type GraphicsExtractor ¶
type GraphicsExtractor struct {
// Minimum dimensions for filtering
MinLineLength float64
MinRectWidth float64
MinRectHeight float64
// contains filtered or unexported fields
}
GraphicsExtractor extracts lines and rectangles from content streams
func NewGraphicsExtractor ¶
func NewGraphicsExtractor() *GraphicsExtractor
NewGraphicsExtractor creates a new graphics extractor
func (*GraphicsExtractor) ClassifyLines ¶
func (ge *GraphicsExtractor) ClassifyLines() LineClassification
ClassifyLines classifies lines by orientation
func (*GraphicsExtractor) Clear ¶
func (ge *GraphicsExtractor) Clear()
Clear resets the extractor for reuse
func (*GraphicsExtractor) Extract ¶
func (ge *GraphicsExtractor) Extract(operations []contentstream.Operation) error
Extract extracts graphics from content stream operations
func (*GraphicsExtractor) ExtractFromBytes ¶
func (ge *GraphicsExtractor) ExtractFromBytes(data []byte) error
ExtractFromBytes parses and extracts graphics from raw content stream data
func (*GraphicsExtractor) GetFilteredLines ¶
func (ge *GraphicsExtractor) GetFilteredLines() []ExtractedLine
GetFilteredLines returns lines meeting the minimum length requirement
func (*GraphicsExtractor) GetFilteredRectangles ¶
func (ge *GraphicsExtractor) GetFilteredRectangles() []ExtractedRectangle
GetFilteredRectangles returns rectangles meeting the minimum size requirements
func (*GraphicsExtractor) GetGraphicsState ¶
func (ge *GraphicsExtractor) GetGraphicsState() *GraphicsState
GetGraphicsState returns the current graphics state (useful for debugging)
func (*GraphicsExtractor) GetGridLines ¶
func (ge *GraphicsExtractor) GetGridLines() GridLines
GetGridLines returns horizontal and vertical lines suitable for table detection
func (*GraphicsExtractor) GetHorizontalLines ¶
func (ge *GraphicsExtractor) GetHorizontalLines() []ExtractedLine
GetHorizontalLines returns only horizontal lines
func (*GraphicsExtractor) GetLines ¶
func (ge *GraphicsExtractor) GetLines() []ExtractedLine
GetLines returns all extracted lines
func (*GraphicsExtractor) GetRectangles ¶
func (ge *GraphicsExtractor) GetRectangles() []ExtractedRectangle
GetRectangles returns all extracted rectangles
func (*GraphicsExtractor) GetStatistics ¶
func (ge *GraphicsExtractor) GetStatistics() GraphicsStatistics
GetStatistics returns statistics about extracted graphics
func (*GraphicsExtractor) GetVerticalLines ¶
func (ge *GraphicsExtractor) GetVerticalLines() []ExtractedLine
GetVerticalLines returns only vertical lines
func (*GraphicsExtractor) ToModelLines ¶
func (ge *GraphicsExtractor) ToModelLines() []model.Line
ToModelLines converts extracted lines to model.Line objects
func (*GraphicsExtractor) ToModelRectangles ¶
func (ge *GraphicsExtractor) ToModelRectangles() []model.Line
ToModelRectangles converts extracted rectangles to model.Line objects (with IsRect=true)
type GraphicsState ¶
type GraphicsState struct {
// Current Transformation Matrix
CTM model.Matrix
// Text state
Text TextState
// Line attributes
LineWidth float64
// Color (simplified - just RGB for now)
StrokeColor [3]float64
FillColor [3]float64
// contains filtered or unexported fields
}
GraphicsState represents the PDF graphics state
func NewGraphicsState ¶
func NewGraphicsState() *GraphicsState
NewGraphicsState creates a new graphics state with default values
func (*GraphicsState) BeginText ¶
func (gs *GraphicsState) BeginText()
BeginText initializes text state (BT operator)
func (*GraphicsState) Clone ¶
func (gs *GraphicsState) Clone() *GraphicsState
Clone creates a deep copy of the graphics state
func (*GraphicsState) EndText ¶
func (gs *GraphicsState) EndText()
EndText does nothing for now (ET operator)
func (*GraphicsState) GetEffectiveFontSize ¶
func (gs *GraphicsState) GetEffectiveFontSize() float64
GetEffectiveFontSize returns the font size accounting for text matrix transformations The text matrix can scale the font even when the Tf operator uses size=1
func (*GraphicsState) GetFontName ¶
func (gs *GraphicsState) GetFontName() string
GetFontName returns the current font name
func (*GraphicsState) GetFontSize ¶
func (gs *GraphicsState) GetFontSize() float64
GetFontSize returns the current font size
func (*GraphicsState) GetTextMatrix ¶
func (gs *GraphicsState) GetTextMatrix() model.Matrix
GetTextMatrix returns the current text matrix
func (*GraphicsState) GetTextPosition ¶
func (gs *GraphicsState) GetTextPosition() (x, y float64)
GetTextPosition returns the current text position in device space
func (*GraphicsState) NextLine ¶
func (gs *GraphicsState) NextLine()
NextLine moves to next line (T* operator)
func (*GraphicsState) Restore ¶
func (gs *GraphicsState) Restore() error
Restore pops a graphics state from the stack (Q operator)
func (*GraphicsState) Save ¶
func (gs *GraphicsState) Save()
Save pushes the current graphics state onto the stack (q operator)
func (*GraphicsState) SetCharSpacing ¶
func (gs *GraphicsState) SetCharSpacing(spacing float64)
SetCharSpacing sets character spacing (Tc operator)
func (*GraphicsState) SetFillColorRGB ¶
func (gs *GraphicsState) SetFillColorRGB(r, g, b float64)
SetFillColorRGB sets the fill color (rg operator)
func (*GraphicsState) SetFont ¶
func (gs *GraphicsState) SetFont(name string, size float64)
SetFont sets the current font (Tf operator)
func (*GraphicsState) SetHorizontalScaling ¶
func (gs *GraphicsState) SetHorizontalScaling(scale float64)
SetHorizontalScaling sets horizontal scaling (Tz operator)
func (*GraphicsState) SetLeading ¶
func (gs *GraphicsState) SetLeading(leading float64)
SetLeading sets text leading (TL operator)
func (*GraphicsState) SetLineWidth ¶
func (gs *GraphicsState) SetLineWidth(width float64)
SetLineWidth sets the line width (w operator)
func (*GraphicsState) SetRenderingMode ¶
func (gs *GraphicsState) SetRenderingMode(mode int)
SetRenderingMode sets text rendering mode (Tr operator)
func (*GraphicsState) SetStrokeColorRGB ¶
func (gs *GraphicsState) SetStrokeColorRGB(r, g, b float64)
SetStrokeColorRGB sets the stroke color (RG operator)
func (*GraphicsState) SetTextMatrix ¶
func (gs *GraphicsState) SetTextMatrix(m model.Matrix)
SetTextMatrix sets the text matrix (Tm operator)
func (*GraphicsState) SetTextRise ¶
func (gs *GraphicsState) SetTextRise(rise float64)
SetTextRise sets text rise (Ts operator)
func (*GraphicsState) SetWordSpacing ¶
func (gs *GraphicsState) SetWordSpacing(spacing float64)
SetWordSpacing sets word spacing (Tw operator)
func (*GraphicsState) ShowText ¶
func (gs *GraphicsState) ShowText(text string) (dx, dy float64)
ShowText updates position after showing text (Tj operator) Returns the displacement caused by the text
func (*GraphicsState) ShowTextArray ¶
func (gs *GraphicsState) ShowTextArray(array []interface{}) (dx, dy float64)
ShowTextArray shows text with positioning adjustments (TJ operator) Returns the displacement caused by the text
func (*GraphicsState) ShowTextWithWidth ¶
func (gs *GraphicsState) ShowTextWithWidth(text string, width float64) (dx, dy float64)
ShowTextWithWidth updates position after showing text with a known width width should be the total width of the text glyphs in user space units
func (*GraphicsState) Transform ¶
func (gs *GraphicsState) Transform(m model.Matrix)
Transform applies a transformation matrix to CTM (cm operator)
func (*GraphicsState) TranslateText ¶
func (gs *GraphicsState) TranslateText(tx, ty float64)
TranslateText translates the text matrix (Td operator)
func (*GraphicsState) TranslateTextSetLeading ¶
func (gs *GraphicsState) TranslateTextSetLeading(tx, ty float64)
TranslateTextSetLeading translates text and sets leading (TD operator)
type GraphicsStatistics ¶
type GraphicsStatistics struct {
TotalLines int
HorizontalLines int
VerticalLines int
DiagonalLines int
TotalRectangles int
FilledRectangles int
StrokedRectangles int
}
Statistics provides statistics about extracted graphics
type GridLines ¶
type GridLines struct {
Horizontals []ExtractedLine
Verticals []ExtractedLine
}
GridLines represents horizontal and vertical lines that could form a table grid
type LineClassification ¶
type LineClassification struct {
HorizontalLines []ExtractedLine
VerticalLines []ExtractedLine
DiagonalLines []ExtractedLine
}
LineClassification provides classification of extracted lines
type Path ¶
type Path struct {
// Segments contains all the path segments
Segments []PathSegment
// CurrentPoint is the current point in user space
CurrentPoint model.Point
// SubpathStart is the start of the current subpath (for closepath)
SubpathStart model.Point
// HasCurrentPoint indicates if a current point has been set
HasCurrentPoint bool
}
Path represents a graphics path being constructed
func (*Path) ClosePath ¶
func (p *Path) ClosePath()
ClosePath closes the current subpath (h operator)
func (*Path) CurveTo ¶
CurveTo appends a cubic Bézier curve (c operator) Control points (x1, y1) and (x2, y2), end point (x3, y3)
func (*Path) CurveToV ¶
CurveToV appends a cubic Bézier curve with first control point = current point (v operator)
func (*Path) CurveToY ¶
CurveToY appends a cubic Bézier curve with second control point = end point (y operator)
type PathExtractor ¶
type PathExtractor struct {
// Collected graphics elements
Lines []ExtractedLine
Rectangles []ExtractedRectangle
// Tolerance for horizontal/vertical classification (in points)
AngleTolerance float64
// contains filtered or unexported fields
}
PathExtractor extracts lines and rectangles from paths
func NewPathExtractor ¶
func NewPathExtractor(gs *GraphicsState) *PathExtractor
NewPathExtractor creates a new path extractor
func (*PathExtractor) Clear ¶
func (pe *PathExtractor) Clear()
Clear clears all extracted elements and the current path
func (*PathExtractor) CloseAndStroke ¶
func (pe *PathExtractor) CloseAndStroke()
CloseAndStroke handles the s operator (close path and stroke)
func (*PathExtractor) CloseFillAndStroke ¶
func (pe *PathExtractor) CloseFillAndStroke()
CloseFillAndStroke handles the b operator
func (*PathExtractor) CloseFillAndStrokeEvenOdd ¶
func (pe *PathExtractor) CloseFillAndStrokeEvenOdd()
CloseFillAndStrokeEvenOdd handles the b* operator
func (*PathExtractor) ClosePath ¶
func (pe *PathExtractor) ClosePath()
ClosePath handles the h operator
func (*PathExtractor) CurveTo ¶
func (pe *PathExtractor) CurveTo(x1, y1, x2, y2, x3, y3 float64)
CurveTo handles the c operator
func (*PathExtractor) CurveToV ¶
func (pe *PathExtractor) CurveToV(x2, y2, x3, y3 float64)
CurveToV handles the v operator
func (*PathExtractor) CurveToY ¶
func (pe *PathExtractor) CurveToY(x1, y1, x3, y3 float64)
CurveToY handles the y operator
func (*PathExtractor) EndPath ¶
func (pe *PathExtractor) EndPath()
EndPath handles the n operator (end path without filling or stroking)
func (*PathExtractor) Fill ¶
func (pe *PathExtractor) Fill()
Fill handles the f/F operator (fill path)
func (*PathExtractor) FillAndStroke ¶
func (pe *PathExtractor) FillAndStroke()
FillAndStroke handles the B operator (fill and stroke)
func (*PathExtractor) FillAndStrokeEvenOdd ¶
func (pe *PathExtractor) FillAndStrokeEvenOdd()
FillAndStrokeEvenOdd handles the B* operator
func (*PathExtractor) FillEvenOdd ¶
func (pe *PathExtractor) FillEvenOdd()
FillEvenOdd handles the f* operator (fill with even-odd rule)
func (*PathExtractor) FilterLinesByLength ¶
func (pe *PathExtractor) FilterLinesByLength(minLength float64) []ExtractedLine
FilterLinesByLength filters lines by minimum length
func (*PathExtractor) FilterRectanglesBySize ¶
func (pe *PathExtractor) FilterRectanglesBySize(minWidth, minHeight float64) []ExtractedRectangle
FilterRectanglesBySize filters rectangles by minimum dimensions
func (*PathExtractor) GetHorizontalLines ¶
func (pe *PathExtractor) GetHorizontalLines() []ExtractedLine
GetHorizontalLines returns only horizontal lines
func (*PathExtractor) GetLines ¶
func (pe *PathExtractor) GetLines() []ExtractedLine
GetLines returns all extracted lines
func (*PathExtractor) GetRectangles ¶
func (pe *PathExtractor) GetRectangles() []ExtractedRectangle
GetRectangles returns all extracted rectangles
func (*PathExtractor) GetVerticalLines ¶
func (pe *PathExtractor) GetVerticalLines() []ExtractedLine
GetVerticalLines returns only vertical lines
func (*PathExtractor) LineTo ¶
func (pe *PathExtractor) LineTo(x, y float64)
LineTo handles the l operator
func (*PathExtractor) MoveTo ¶
func (pe *PathExtractor) MoveTo(x, y float64)
MoveTo handles the m operator
func (*PathExtractor) Rectangle ¶
func (pe *PathExtractor) Rectangle(x, y, width, height float64)
Rectangle handles the re operator
func (*PathExtractor) Stroke ¶
func (pe *PathExtractor) Stroke()
Stroke handles the S operator (stroke path)
type PathSegment ¶
type PathSegment struct {
Type PathSegmentType
// For MoveTo and LineTo: single point
// For CurveTo: control point 1, control point 2, end point
Points []model.Point
}
PathSegment represents a single segment of a path
type PathSegmentType ¶
type PathSegmentType int
PathSegmentType defines the type of path segment
const ( // PathMoveTo starts a new subpath PathMoveTo PathSegmentType = iota // PathLineTo draws a line to a point PathLineTo // PathCurveTo draws a cubic Bézier curve PathCurveTo // PathClosePath closes the current subpath PathClosePath )
type TextState ¶
type TextState struct {
// Font and size
FontName string
FontSize float64
// Character and word spacing
CharSpacing float64
WordSpacing float64
// Horizontal scaling (percentage)
HorizontalScaling float64
// Leading (line spacing)
Leading float64
// Text rendering mode
RenderingMode int
// Text rise
Rise float64
// Text matrices
TextMatrix model.Matrix
TextLineMatrix model.Matrix
}
TextState represents text-specific state