tables

package
v1.2.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 28, 2025 License: MIT Imports: 5 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func ListDetectors

func ListDetectors() []string

ListDetectors returns all registered detector names

func RegisterDetector

func RegisterDetector(detector Detector)

RegisterDetector registers a detector globally

Types

type AlignedLineGroup

type AlignedLineGroup struct {
	// Position on the alignment axis (X for vertical lines, Y for horizontal)
	Position float64

	// Lines in this group
	Lines []graphicsstate.ExtractedLine

	// Total coverage (sum of line lengths)
	TotalLength float64

	// Span of the lines (min to max on the perpendicular axis)
	MinExtent float64
	MaxExtent float64
}

AlignedLineGroup represents a group of lines aligned on an axis

type Config

type Config struct {
	// Minimum rows for a valid table
	MinRows int

	// Minimum columns for a valid table
	MinCols int

	// Minimum confidence threshold (0-1)
	MinConfidence float64

	// Whether to use line-based detection
	UseLines bool

	// Whether to use whitespace-based detection
	UseWhitespace bool

	// Maximum gap between text fragments to consider them in same cell (points)
	MaxCellGap float64

	// Tolerance for row/column alignment (points)
	AlignmentTolerance float64

	// Whether to detect merged cells
	DetectMergedCells bool
}

Config holds detector configuration

func DefaultConfig

func DefaultConfig() Config

DefaultConfig returns default configuration

type Detector

type Detector interface {
	// Detect finds tables in a page
	Detect(page *model.Page) ([]*model.Table, error)

	// Name returns the detector name
	Name() string

	// Configure sets detector parameters
	Configure(config Config) error
}

Detector is the interface for table detection algorithms

func GetDetector

func GetDetector(name string) Detector

GetDetector retrieves a detector by name

type DetectorRegistry

type DetectorRegistry struct {
	// contains filtered or unexported fields
}

DetectorRegistry holds registered detectors

func NewRegistry

func NewRegistry() *DetectorRegistry

NewRegistry creates a new detector registry

func (*DetectorRegistry) Get

func (r *DetectorRegistry) Get(name string) Detector

Get retrieves a detector by name

func (*DetectorRegistry) List

func (r *DetectorRegistry) List() []string

List returns all registered detector names

func (*DetectorRegistry) Register

func (r *DetectorRegistry) Register(detector Detector)

Register registers a detector

type GeometricDetector

type GeometricDetector struct {
	// contains filtered or unexported fields
}

GeometricDetector implements table detection using geometric heuristics

func NewGeometricDetector

func NewGeometricDetector() *GeometricDetector

NewGeometricDetector creates a new geometric detector

func (*GeometricDetector) Configure

func (d *GeometricDetector) Configure(config Config) error

func (*GeometricDetector) Detect

func (d *GeometricDetector) Detect(page *model.Page) ([]*model.Table, error)

Detect finds tables using geometric heuristics

func (*GeometricDetector) Name

func (d *GeometricDetector) Name() string

type GridDetectionResult

type GridDetectionResult struct {
	// All detected grid hypotheses, sorted by confidence
	Hypotheses []*GridHypothesis

	// Statistics about the detection
	TotalHorizontalLines int
	TotalVerticalLines   int
	AlignedHGroups       int
	AlignedVGroups       int
}

GridDetectionResult contains the result of grid detection

func DetectGrids

DetectGrids is a convenience function for grid detection

type GridDetector

type GridDetector struct {
	// Tolerance for considering lines aligned (in points)
	AlignmentTolerance float64

	// Minimum number of aligned lines to form a grid axis
	MinAlignedLines int

	// Minimum line length to consider (in points)
	MinLineLength float64

	// Maximum gap between lines to be considered part of same grid
	MaxLineGap float64
}

GridDetector detects table grids from extracted graphics lines

func NewGridDetector

func NewGridDetector() *GridDetector

NewGridDetector creates a new grid detector with default settings

func (*GridDetector) DetectFromExtractor

func (gd *GridDetector) DetectFromExtractor(ge *graphicsstate.GraphicsExtractor) []*GridHypothesis

DetectFromExtractor detects grid hypotheses from a graphics extractor

func (*GridDetector) DetectFromLines

func (gd *GridDetector) DetectFromLines(horizontals, verticals []graphicsstate.ExtractedLine) []*GridHypothesis

DetectFromLines detects grid hypotheses from horizontal and vertical lines

type GridHypothesis

type GridHypothesis struct {
	// Bounding box of the grid
	BBox model.BBox

	// Horizontal line positions (Y coordinates, sorted descending)
	HorizontalLines []float64

	// Vertical line positions (X coordinates, sorted ascending)
	VerticalLines []float64

	// Confidence score (0-1)
	Confidence float64

	// Number of rows and columns
	Rows int
	Cols int

	// Whether the grid has complete borders
	HasTopBorder    bool
	HasBottomBorder bool
	HasLeftBorder   bool
	HasRightBorder  bool
}

GridHypothesis represents a potential table grid detected from lines

func (*GridHypothesis) ToTableGrid

func (h *GridHypothesis) ToTableGrid() *model.TableGrid

ToTableGrid converts a grid hypothesis to a model.TableGrid

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL