Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func ListDetectors ¶
func ListDetectors() []string
ListDetectors returns all registered detector names
func RegisterDetector ¶
func RegisterDetector(detector Detector)
RegisterDetector registers a detector globally
Types ¶
type AlignedLineGroup ¶
type AlignedLineGroup struct {
// Position on the alignment axis (X for vertical lines, Y for horizontal)
Position float64
// Lines in this group
Lines []graphicsstate.ExtractedLine
// Total coverage (sum of line lengths)
TotalLength float64
// Span of the lines (min to max on the perpendicular axis)
MinExtent float64
MaxExtent float64
}
AlignedLineGroup represents a group of lines aligned on an axis
type Config ¶
type Config struct {
// Minimum rows for a valid table
MinRows int
// Minimum columns for a valid table
MinCols int
// Minimum confidence threshold (0-1)
MinConfidence float64
// Whether to use line-based detection
UseLines bool
// Whether to use whitespace-based detection
UseWhitespace bool
// Maximum gap between text fragments to consider them in same cell (points)
MaxCellGap float64
// Tolerance for row/column alignment (points)
AlignmentTolerance float64
// Whether to detect merged cells
DetectMergedCells bool
}
Config holds detector configuration
type Detector ¶
type Detector interface {
// Detect finds tables in a page
Detect(page *model.Page) ([]*model.Table, error)
// Name returns the detector name
Name() string
// Configure sets detector parameters
Configure(config Config) error
}
Detector is the interface for table detection algorithms
type DetectorRegistry ¶
type DetectorRegistry struct {
// contains filtered or unexported fields
}
DetectorRegistry holds registered detectors
func (*DetectorRegistry) Get ¶
func (r *DetectorRegistry) Get(name string) Detector
Get retrieves a detector by name
func (*DetectorRegistry) List ¶
func (r *DetectorRegistry) List() []string
List returns all registered detector names
func (*DetectorRegistry) Register ¶
func (r *DetectorRegistry) Register(detector Detector)
Register registers a detector
type GeometricDetector ¶
type GeometricDetector struct {
// contains filtered or unexported fields
}
GeometricDetector implements table detection using geometric heuristics
func NewGeometricDetector ¶
func NewGeometricDetector() *GeometricDetector
NewGeometricDetector creates a new geometric detector
func (*GeometricDetector) Configure ¶
func (d *GeometricDetector) Configure(config Config) error
func (*GeometricDetector) Name ¶
func (d *GeometricDetector) Name() string
type GridDetectionResult ¶
type GridDetectionResult struct {
// All detected grid hypotheses, sorted by confidence
Hypotheses []*GridHypothesis
// Statistics about the detection
TotalHorizontalLines int
TotalVerticalLines int
AlignedHGroups int
AlignedVGroups int
}
GridDetectionResult contains the result of grid detection
func DetectGrids ¶
func DetectGrids(ge *graphicsstate.GraphicsExtractor) *GridDetectionResult
DetectGrids is a convenience function for grid detection
type GridDetector ¶
type GridDetector struct {
// Tolerance for considering lines aligned (in points)
AlignmentTolerance float64
// Minimum number of aligned lines to form a grid axis
MinAlignedLines int
// Minimum line length to consider (in points)
MinLineLength float64
// Maximum gap between lines to be considered part of same grid
MaxLineGap float64
}
GridDetector detects table grids from extracted graphics lines
func NewGridDetector ¶
func NewGridDetector() *GridDetector
NewGridDetector creates a new grid detector with default settings
func (*GridDetector) DetectFromExtractor ¶
func (gd *GridDetector) DetectFromExtractor(ge *graphicsstate.GraphicsExtractor) []*GridHypothesis
DetectFromExtractor detects grid hypotheses from a graphics extractor
func (*GridDetector) DetectFromLines ¶
func (gd *GridDetector) DetectFromLines(horizontals, verticals []graphicsstate.ExtractedLine) []*GridHypothesis
DetectFromLines detects grid hypotheses from horizontal and vertical lines
type GridHypothesis ¶
type GridHypothesis struct {
// Bounding box of the grid
BBox model.BBox
// Horizontal line positions (Y coordinates, sorted descending)
HorizontalLines []float64
// Vertical line positions (X coordinates, sorted ascending)
VerticalLines []float64
// Confidence score (0-1)
Confidence float64
// Number of rows and columns
Rows int
Cols int
// Whether the grid has complete borders
HasTopBorder bool
HasBottomBorder bool
HasLeftBorder bool
HasRightBorder bool
}
GridHypothesis represents a potential table grid detected from lines
func (*GridHypothesis) ToTableGrid ¶
func (h *GridHypothesis) ToTableGrid() *model.TableGrid
ToTableGrid converts a grid hypothesis to a model.TableGrid