core

package

v1.2.0 Latest Latest Go to latest Published: Nov 28, 2025 License: MIT Imports: 7 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/tsawler/tabula

Links

Open Source Insights

Documentation ¶

Index ¶

type Array
- func (a Array) Get(index int) Object
- func (a Array) GetInt(index int) (Int, bool)
- func (a Array) GetName(index int) (Name, bool)
- func (a Array) GetReal(index int) (Real, bool)
- func (a Array) Len() int
- func (a Array) String() string
- func (a Array) Type() ObjectType
type Bool
- func (b Bool) String() string
- func (b Bool) Type() ObjectType
type Dict
- func (d Dict) Delete(key string)
- func (d Dict) Get(key string) Object
- func (d Dict) GetArray(key string) (Array, bool)
- func (d Dict) GetBool(key string) (Bool, bool)
- func (d Dict) GetDict(key string) (Dict, bool)
- func (d Dict) GetIndirectRef(key string) (IndirectRef, bool)
- func (d Dict) GetInt(key string) (Int, bool)
- func (d Dict) GetName(key string) (Name, bool)
- func (d Dict) GetReal(key string) (Real, bool)
- func (d Dict) GetStream(key string) (*Stream, bool)
- func (d Dict) GetString(key string) (String, bool)
- func (d Dict) Has(key string) bool
- func (d Dict) Keys() []string
- func (d Dict) Set(key string, value Object)
- func (d Dict) String() string
- func (d Dict) Type() ObjectType
type IndirectObject
type IndirectRef
- func (r IndirectRef) String() string
- func (r IndirectRef) Type() ObjectType
type Int
- func (i Int) String() string
- func (i Int) Type() ObjectType
type Lexer
- func NewLexer(r io.Reader) *Lexer
- func (l *Lexer) NextToken() (*Token, error)
- func (l *Lexer) Peek() (byte, error)
- func (l *Lexer) ReadByte() (byte, error)
- func (l *Lexer) ReadBytes(n int) ([]byte, error)
- func (l *Lexer) SkipBytes(n int) error
type Name
- func (n Name) String() string
- func (n Name) Type() ObjectType
type Null
- func (n Null) String() string
- func (n Null) Type() ObjectType
type Object
type ObjectStream
- func NewObjectStream(stream *Stream) (*ObjectStream, error)
- func (os *ObjectStream) ContainsObject(objNum int) (bool, error)
- func (os *ObjectStream) Extends() *IndirectRef
- func (os *ObjectStream) First() int
- func (os *ObjectStream) GetObjectByIndex(index int) (Object, int, error)
- func (os *ObjectStream) GetObjectByNumber(objNum int) (Object, int, error)
- func (os *ObjectStream) N() int
- func (os *ObjectStream) ObjectNumbers() ([]int, error)
type ObjectType
- func (t ObjectType) String() string
type Parser
- func NewParser(r io.Reader) *Parser
- func (p *Parser) ParseIndirectObject() (*IndirectObject, error)
- func (p *Parser) ParseObject() (Object, error)
type Real
- func (r Real) String() string
- func (r Real) Type() ObjectType
type Stream
- func (s *Stream) Decode() ([]byte, error)
- func (s *Stream) Decoded() ([]byte, error)
- func (s *Stream) String() string
- func (s *Stream) Type() ObjectType
type String
- func (s String) String() string
- func (s String) Type() ObjectType
type Token
type TokenType
type XRefEntry
type XRefEntryType
- func (t XRefEntryType) String() string
type XRefParser
- func NewXRefParser(r io.ReadSeeker) *XRefParser
- func (x *XRefParser) FindXRef() (int64, error)
- func (x *XRefParser) ParseAllXRefs() ([]*XRefTable, error)
- func (x *XRefParser) ParsePrevXRef(table *XRefTable) (*XRefTable, error)
- func (x *XRefParser) ParseXRef(offset int64) (*XRefTable, error)
- func (x *XRefParser) ParseXRefFromEOF() (*XRefTable, error)
type XRefTable
- func MergeXRefTables(tables ...*XRefTable) *XRefTable
- func NewXRefTable() *XRefTable
- func (x *XRefTable) Get(objNum int) (*XRefEntry, bool)
- func (x *XRefTable) Set(objNum int, entry *XRefEntry)
- func (x *XRefTable) Size() int

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type Array ¶

type Array []Object

Array represents a PDF array

func (Array) Get ¶

func (a Array) Get(index int) Object

Get retrieves an element at the given index

func (Array) GetInt ¶

func (a Array) GetInt(index int) (Int, bool)

GetInt retrieves an integer at the given index

func (Array) GetName ¶

func (a Array) GetName(index int) (Name, bool)

GetName retrieves a name at the given index

func (Array) GetReal ¶

func (a Array) GetReal(index int) (Real, bool)

GetReal retrieves a real number at the given index

func (Array) Len ¶

func (a Array) Len() int

Len returns the length of the array

func (Array) String ¶

func (a Array) String() string

func (Array) Type ¶

func (a Array) Type() ObjectType

type Bool ¶

type Bool bool

Bool represents a PDF boolean

func (Bool) String ¶

func (b Bool) String() string

func (Bool) Type ¶

func (b Bool) Type() ObjectType

type Dict ¶

type Dict map[string]Object

Dict represents a PDF dictionary

func (Dict) Delete ¶

func (d Dict) Delete(key string)

Delete removes a key from the dictionary

func (Dict) Get ¶

func (d Dict) Get(key string) Object

Get retrieves a value from the dictionary

func (Dict) GetArray ¶

func (d Dict) GetArray(key string) (Array, bool)

GetArray retrieves an array value

func (Dict) GetBool ¶

func (d Dict) GetBool(key string) (Bool, bool)

GetBool retrieves a boolean value

func (Dict) GetDict ¶

func (d Dict) GetDict(key string) (Dict, bool)

GetDict retrieves a dictionary value

func (Dict) GetIndirectRef ¶

func (d Dict) GetIndirectRef(key string) (IndirectRef, bool)

GetIndirectRef retrieves an indirect reference

func (Dict) GetInt ¶

func (d Dict) GetInt(key string) (Int, bool)

GetInt retrieves an integer value

func (Dict) GetName ¶

func (d Dict) GetName(key string) (Name, bool)

GetName retrieves a name value

func (Dict) GetReal ¶

func (d Dict) GetReal(key string) (Real, bool)

GetReal retrieves a real number value

func (Dict) GetStream ¶

func (d Dict) GetStream(key string) (*Stream, bool)

GetStream retrieves a stream value

func (Dict) GetString ¶

func (d Dict) GetString(key string) (String, bool)

GetString retrieves a string value

func (Dict) Has ¶

func (d Dict) Has(key string) bool

Has checks if a key exists in the dictionary

func (Dict) Keys ¶

func (d Dict) Keys() []string

Keys returns all keys in the dictionary

func (Dict) Set ¶

func (d Dict) Set(key string, value Object)

Set sets a value in the dictionary

func (Dict) String ¶

func (d Dict) String() string

func (Dict) Type ¶

func (d Dict) Type() ObjectType

type IndirectObject ¶

type IndirectObject struct {
	Ref    IndirectRef
	Object Object
}

IndirectObject represents an indirect object with its reference

type IndirectRef ¶

type IndirectRef struct {
	Number     int
	Generation int
}

IndirectRef represents an indirect object reference

func (IndirectRef) String ¶

func (r IndirectRef) String() string

func (IndirectRef) Type ¶

func (r IndirectRef) Type() ObjectType

type Int ¶

type Int int64

Int represents a PDF integer

func (Int) String ¶

func (i Int) String() string

func (Int) Type ¶

func (i Int) Type() ObjectType

type Lexer ¶

type Lexer struct {
	// contains filtered or unexported fields
}

Lexer performs lexical analysis of PDF content

func NewLexer ¶

func NewLexer(r io.Reader) *Lexer

NewLexer creates a new lexer

func (*Lexer) NextToken ¶

func (l *Lexer) NextToken() (*Token, error)

NextToken returns the next token from the input

func (*Lexer) Peek ¶

func (l *Lexer) Peek() (byte, error)

Peek returns the next byte without consuming it (public wrapper for peek)

func (*Lexer) ReadByte ¶

func (l *Lexer) ReadByte() (byte, error)

ReadByte reads and returns a single byte (public wrapper for readByte)

func (*Lexer) ReadBytes ¶

func (l *Lexer) ReadBytes(n int) ([]byte, error)

ReadBytes reads exactly n bytes from the underlying reader This is used for reading binary stream data

func (*Lexer) SkipBytes ¶

func (l *Lexer) SkipBytes(n int) error

SkipBytes skips exactly n bytes from the underlying reader

type Name ¶

type Name string

Name represents a PDF name

func (Name) String ¶

func (n Name) String() string

func (Name) Type ¶

func (n Name) Type() ObjectType

type Null ¶

type Null struct{}

Null represents a PDF null object

func (Null) String ¶

func (n Null) String() string

func (Null) Type ¶

func (n Null) Type() ObjectType

type Object ¶

type Object interface {
	Type() ObjectType
	String() string
}

Object represents a PDF object

type ObjectStream ¶

type ObjectStream struct {
	// contains filtered or unexported fields
}

ObjectStream represents a PDF Object Stream (Type /ObjStm) Object streams (PDF 1.5+) store multiple objects in a single compressed stream

func NewObjectStream ¶

func NewObjectStream(stream *Stream) (*ObjectStream, error)

NewObjectStream creates an ObjectStream from a Stream object Returns an error if the stream is not a valid object stream

func (*ObjectStream) ContainsObject ¶

func (os *ObjectStream) ContainsObject(objNum int) (bool, error)

ContainsObject checks if an object number is stored in this stream

func (*ObjectStream) Extends ¶

func (os *ObjectStream) Extends() *IndirectRef

Extends returns the reference to another object stream this one extends, or nil

func (*ObjectStream) First ¶

func (os *ObjectStream) First() int

First returns the byte offset to the first object in the decoded data

func (*ObjectStream) GetObjectByIndex ¶

func (os *ObjectStream) GetObjectByIndex(index int) (Object, int, error)

GetObjectByIndex extracts an object by its index within the stream (0-based) The index corresponds to the position in the header, not the object number

func (*ObjectStream) GetObjectByNumber ¶

func (os *ObjectStream) GetObjectByNumber(objNum int) (Object, int, error)

GetObjectByNumber finds and extracts an object by its object number Returns the object and its index, or an error if not found

func (*ObjectStream) N ¶

func (os *ObjectStream) N() int

N returns the number of objects in the stream

func (*ObjectStream) ObjectNumbers ¶

func (os *ObjectStream) ObjectNumbers() ([]int, error)

ObjectNumbers returns a slice of all object numbers stored in this stream

type ObjectType ¶

type ObjectType int

ObjectType represents the type of PDF object

const (
	ObjNull ObjectType = iota
	ObjBool
	ObjInt
	ObjReal
	ObjString
	ObjName
	ObjArray
	ObjDict
	ObjStream
	ObjIndirect
)

func (ObjectType) String ¶

func (t ObjectType) String() string

String returns the string representation of the object type

type Parser ¶

type Parser struct {
	// contains filtered or unexported fields
}

Parser parses PDF objects using the Lexer

func NewParser ¶

func NewParser(r io.Reader) *Parser

NewParser creates a new PDF parser

func (*Parser) ParseIndirectObject ¶

func (p *Parser) ParseIndirectObject() (*IndirectObject, error)

ParseIndirectObject parses an indirect object (num gen obj ... endobj)

func (*Parser) ParseObject ¶

func (p *Parser) ParseObject() (Object, error)

ParseObject parses a PDF object

type Real ¶

type Real float64

Real represents a PDF real number

func (Real) String ¶

func (r Real) String() string

func (Real) Type ¶

func (r Real) Type() ObjectType

type Stream ¶

type Stream struct {
	Dict Dict
	Data []byte
	// contains filtered or unexported fields
}

Stream represents a PDF stream object

func (*Stream) Decode ¶

func (s *Stream) Decode() ([]byte, error)

Decode decodes the stream data according to its Filter(s) Returns decoded data or error

func (*Stream) Decoded ¶

func (s *Stream) Decoded() ([]byte, error)

Decoded returns the decoded stream data

func (*Stream) String ¶

func (s *Stream) String() string

func (*Stream) Type ¶

func (s *Stream) Type() ObjectType

type String ¶

type String string

String represents a PDF string

func (String) String ¶

func (s String) String() string

func (String) Type ¶

func (s String) Type() ObjectType

type Token ¶

type Token struct {
	Type  TokenType
	Value []byte
	Pos   int64 // Position in stream
}

Token represents a lexical token

type TokenType ¶

type TokenType int

TokenType represents the type of token

const (
	TokenEOF TokenType = iota
	TokenWhitespace
	TokenComment
	TokenKeyword     // true, false, null, obj, endobj, stream, endstream, etc.
	TokenInteger     // 123
	TokenReal        // 3.14
	TokenString      // (hello)
	TokenHexString   // <48656C6C6F>
	TokenName        // /Type
	TokenArrayStart  // [
	TokenArrayEnd    // ]
	TokenDictStart   // <<
	TokenDictEnd     // >>
	TokenIndirectRef // R (after two numbers)
)

type XRefEntry ¶

type XRefEntry struct {
	Type       XRefEntryType // Entry type (free, uncompressed, or compressed)
	Offset     int64         // Byte offset (uncompressed) or object stream number (compressed)
	Generation int           // Generation number (uncompressed) or index within object stream (compressed)
	InUse      bool          // true if object is in use (Type != XRefEntryFree)
}

XRefEntry represents a single cross-reference table entry

type XRefEntryType ¶

type XRefEntryType int

XRefEntryType represents the type of an XRef entry

const (
	// XRefEntryFree indicates a free (deleted) object entry
	XRefEntryFree XRefEntryType = 0
	// XRefEntryUncompressed indicates an in-use object at a byte offset in the file
	XRefEntryUncompressed XRefEntryType = 1
	// XRefEntryCompressed indicates an object stored in an object stream (PDF 1.5+)
	XRefEntryCompressed XRefEntryType = 2
)

func (XRefEntryType) String ¶

func (t XRefEntryType) String() string

String returns a human-readable representation of the entry type

type XRefParser ¶

type XRefParser struct {
	// contains filtered or unexported fields
}

XRefParser parses PDF cross-reference tables

func NewXRefParser ¶

func NewXRefParser(r io.ReadSeeker) *XRefParser

NewXRefParser creates a new XRef parser

func (*XRefParser) FindXRef ¶

func (x *XRefParser) FindXRef() (int64, error)

FindXRef finds the byte offset of the XRef table by scanning from EOF PDFs end with "startxref\n<offset>\n%%EOF"

func (*XRefParser) ParseAllXRefs ¶

func (x *XRefParser) ParseAllXRefs() ([]*XRefTable, error)

ParseAllXRefs parses the main XRef table and all previous ones (incremental updates) Returns them in order from oldest to newest

func (*XRefParser) ParsePrevXRef ¶

func (x *XRefParser) ParsePrevXRef(table *XRefTable) (*XRefTable, error)

ParsePrevXRef checks if the trailer has a /Prev entry and parses that XRef table This handles incremental updates in PDFs

func (*XRefParser) ParseXRef ¶

func (x *XRefParser) ParseXRef(offset int64) (*XRefTable, error)

ParseXRef parses the XRef table at the given byte offset Handles both traditional XRef tables (PDF 1.0-1.4) and XRef streams (PDF 1.5+)

func (*XRefParser) ParseXRefFromEOF ¶

func (x *XRefParser) ParseXRefFromEOF() (*XRefTable, error)

ParseXRefFromEOF finds and parses the XRef table by scanning from EOF

type XRefTable ¶

type XRefTable struct {
	Entries  map[int]*XRefEntry // Map from object number to XRef entry
	Trailer  Dict               // Trailer dictionary
	IsStream bool               // true if this XRef came from a stream (PDF 1.5+)
}

XRefTable represents a PDF cross-reference table

func MergeXRefTables ¶

func MergeXRefTables(tables ...*XRefTable) *XRefTable

MergeXRefTables merges multiple XRef tables (from incremental updates) Later entries override earlier ones

func NewXRefTable ¶

func NewXRefTable() *XRefTable

NewXRefTable creates a new empty XRef table

func (*XRefTable) Get ¶

func (x *XRefTable) Get(objNum int) (*XRefEntry, bool)

Get retrieves an XRef entry by object number

func (*XRefTable) Set ¶

func (x *XRefTable) Set(objNum int, entry *XRefEntry)

Set adds or updates an XRef entry

func (*XRefTable) Size ¶

func (x *XRefTable) Size() int

Size returns the number of entries in the table

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL