Documentation
¶
Overview ¶
Package odt provides ODT (OpenDocument Text) document parsing.
Index ¶
- type ListParser
- type ListType
- type ParsedList
- type ParsedListItem
- type ParsedTable
- type ParsedTableCell
- type ParsedTableRow
- type Reader
- func (r *Reader) Close() error
- func (r *Reader) Document() (*model.Document, error)
- func (r *Reader) Lists() []ParsedList
- func (r *Reader) Markdown() (string, error)
- func (r *Reader) Metadata() model.Metadata
- func (r *Reader) ModelTables() []*model.Table
- func (r *Reader) PageCount() (int, error)
- func (r *Reader) Tables() []ParsedTable
- func (r *Reader) Text() (string, error)
- type ResolvedListLevel
- type ResolvedStyle
- type StyleResolver
- type TableParser
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ListParser ¶
type ListParser struct {
// contains filtered or unexported fields
}
ListParser handles parsing of ODT lists.
func NewListParser ¶
func NewListParser(resolver *StyleResolver) *ListParser
NewListParser creates a new list parser.
func (*ListParser) ParseList ¶
func (lp *ListParser) ParseList(list listXML, level int) ParsedList
ParseList parses a list XML element into a ParsedList.
type ParsedList ¶
type ParsedList struct {
Items []ParsedListItem
Type ListType
StyleName string
StartAt int // Starting number for ordered lists
}
ParsedList represents a parsed list with its items.
func (*ParsedList) ToModelList ¶
func (pl *ParsedList) ToModelList() *model.List
ToModelList converts a ParsedList to a model.List.
func (*ParsedList) ToText ¶
func (pl *ParsedList) ToText() string
ToText returns a plain text representation of the list.
type ParsedListItem ¶
type ParsedListItem struct {
Text string
Level int // Indentation level (0-based)
Bullet string // The bullet character or number prefix
}
ParsedListItem represents a single list item.
type ParsedTable ¶
type ParsedTable struct {
Rows []ParsedTableRow
ColWidths []float64 // Column widths in points
HasBorders bool
StyleName string
}
ParsedTable represents a parsed table with resolved structure.
func (*ParsedTable) ColCount ¶
func (pt *ParsedTable) ColCount() int
ColCount returns the number of columns in the table.
func (*ParsedTable) ToMarkdown ¶
func (pt *ParsedTable) ToMarkdown() string
ToMarkdown returns a markdown table representation.
func (*ParsedTable) ToModelTable ¶
func (pt *ParsedTable) ToModelTable() *model.Table
ToModelTable converts a ParsedTable to a model.Table.
func (*ParsedTable) ToText ¶
func (pt *ParsedTable) ToText() string
ToText returns a plain text representation of the table.
type ParsedTableCell ¶
type ParsedTableCell struct {
// Content
Paragraphs []parsedParagraph
Text string // Combined text from all paragraphs
// Structure
ColSpan int // Number of columns spanned
RowSpan int // Number of rows spanned
IsCovered bool // True if this is a covered cell (part of a merge)
// Dimensions
Width float64 // Cell width in points
// Styling
VerticalAlign string // top, middle, bottom
Background string // Background color (hex)
HasBorders bool
StyleName string
}
ParsedTableCell represents a parsed table cell.
type ParsedTableRow ¶
type ParsedTableRow struct {
Cells []ParsedTableCell
Height float64 // Row height in points (0 = auto)
StyleName string
}
ParsedTableRow represents a parsed table row.
type Reader ¶
type Reader struct {
// contains filtered or unexported fields
}
Reader provides access to ODT document content.
func (*Reader) Lists ¶
func (r *Reader) Lists() []ParsedList
Lists returns all parsed lists from the document.
func (*Reader) ModelTables ¶
ModelTables returns tables converted to model.Table format.
func (*Reader) PageCount ¶
PageCount returns the number of "pages" in the document. Since ODT doesn't have fixed pages, we return 1 (entire document as single page).
func (*Reader) Tables ¶
func (r *Reader) Tables() []ParsedTable
Tables returns all parsed tables from the document.
type ResolvedListLevel ¶
type ResolvedListLevel struct {
Level int
IsBullet bool
BulletChar string
NumFormat string // "1", "a", "A", "i", "I"
NumPrefix string
NumSuffix string
StartValue int
}
ResolvedListLevel contains resolved list level properties.
type ResolvedStyle ¶
type ResolvedStyle struct {
// Identity
Name string
Family string // paragraph, text, table, table-cell, etc.
// Heading info
IsHeading bool
HeadingLevel int // 1-9, 0 if not a heading
// Paragraph properties
Alignment string // left, center, right, justify
SpaceBefore float64 // points
SpaceAfter float64 // points
LineSpacing float64 // points (0 = auto)
IndentLeft float64 // points
IndentRight float64 // points
IndentFirst float64 // points (first line indent, can be negative for hanging)
// Run/character properties
FontName string
FontSize float64 // points
Bold bool
Italic bool
Underline bool
Strike bool
Color string // hex color like "#FF0000"
}
ResolvedStyle contains the fully resolved properties for a style.
type StyleResolver ¶
type StyleResolver struct {
// contains filtered or unexported fields
}
StyleResolver resolves styles with inheritance support.
func NewStyleResolver ¶
func NewStyleResolver(contentStyles *contentStylesXML, docStyles *stylesXML) *StyleResolver
NewStyleResolver creates a new style resolver from parsed styles.
func (*StyleResolver) Resolve ¶
func (sr *StyleResolver) Resolve(styleName string) *ResolvedStyle
Resolve returns the fully resolved style for the given style name. If the style doesn't exist, returns a default style.
func (*StyleResolver) ResolveListLevel ¶
func (sr *StyleResolver) ResolveListLevel(listStyleName string, level int) *ResolvedListLevel
ResolveListLevel returns the resolved list level for a given list style and level.
type TableParser ¶
type TableParser struct {
// contains filtered or unexported fields
}
TableParser handles parsing of ODT tables.
func NewTableParser ¶
func NewTableParser(resolver *StyleResolver) *TableParser
NewTableParser creates a new table parser.
func (*TableParser) ParseTable ¶
func (tp *TableParser) ParseTable(tbl tableXML) ParsedTable
ParseTable parses a table XML element into a ParsedTable.