pptx

package
v1.6.4 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 4, 2026 License: MIT Imports: 9 Imported by: 0

Documentation

Overview

Package pptx provides PPTX (Office Open XML Presentation) document parsing.

Package pptx provides PPTX (Office Open XML Presentation) document parsing.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type ExtractOptions

type ExtractOptions struct {
	IncludeNotes   bool  // Include speaker notes
	IncludeTitles  bool  // Include slide titles (default: true)
	SlideNumbers   []int // Which slides to include (0-indexed, empty = all)
	ExcludeHeaders bool  // Exclude header placeholders
	ExcludeFooters bool  // Exclude footer placeholders (footer, date, slide number)
}

ExtractOptions holds options for text extraction.

type Paragraph

type Paragraph struct {
	Text       string
	Level      int    // Bullet/indent level (0 = top level)
	IsBullet   bool   // Has bullet point
	IsNumbered bool   // Is numbered list
	BulletChar string // Bullet character (if custom)
	Alignment  string // l, ctr, r, just
	Runs       []Run  // Text runs with formatting
}

Paragraph represents a paragraph within a text block.

type Reader

type Reader struct {
	// contains filtered or unexported fields
}

Reader provides access to PPTX document content.

func Open

func Open(filename string) (*Reader, error)

Open opens a PPTX file for reading.

func (*Reader) Close

func (r *Reader) Close() error

Close releases resources associated with the Reader.

func (*Reader) Document

func (r *Reader) Document() (*model.Document, error)

Document returns a model.Document representation of the PPTX content.

func (*Reader) Markdown

func (r *Reader) Markdown() (string, error)

Markdown returns the presentation content as Markdown.

func (*Reader) MarkdownWithOptions

func (r *Reader) MarkdownWithOptions(opts ExtractOptions) (string, error)

MarkdownWithOptions returns presentation content as Markdown with options.

func (*Reader) MarkdownWithRAGOptions

func (r *Reader) MarkdownWithRAGOptions(extractOpts ExtractOptions, mdOpts rag.MarkdownOptions) (string, error)

MarkdownWithRAGOptions returns presentation content as Markdown with RAG options.

func (*Reader) Metadata

func (r *Reader) Metadata() model.Metadata

Metadata returns document metadata.

func (*Reader) PageCount

func (r *Reader) PageCount() (int, error)

PageCount returns the number of slides (alias for SlideCount).

func (*Reader) Slide

func (r *Reader) Slide(index int) (*Slide, error)

Slide returns the slide at the given index (0-indexed).

func (*Reader) SlideCount

func (r *Reader) SlideCount() int

SlideCount returns the number of slides.

func (*Reader) Text

func (r *Reader) Text() (string, error)

Text extracts and returns all text content from the presentation.

func (*Reader) TextWithOptions

func (r *Reader) TextWithOptions(opts ExtractOptions) (string, error)

TextWithOptions extracts text content with the specified options.

type Run

type Run struct {
	Text     string
	Bold     bool
	Italic   bool
	FontSize int // In hundredths of a point
}

Run represents a text run with consistent formatting.

type Slide

type Slide struct {
	Index   int         // 0-indexed slide number
	Title   string      // Slide title (from title placeholder)
	Content []TextBlock // Text content in reading order
	Tables  []Table     // Tables on the slide
	Notes   string      // Speaker notes
}

Slide represents a parsed slide.

func (*Slide) GetMarkdown

func (s *Slide) GetMarkdown() string

GetMarkdown returns the slide content as markdown.

func (*Slide) GetText

func (s *Slide) GetText() string

GetText returns all text from the slide as a single string.

type Table

type Table struct {
	Rows    [][]TableCell
	Columns int
	X, Y    int // Position in EMUs
	Width   int // Width in EMUs
	Height  int // Height in EMUs
}

Table represents a table on a slide.

func (*Table) ToMarkdown

func (t *Table) ToMarkdown() string

ToMarkdown converts a table to markdown format.

type TableCell

type TableCell struct {
	Text     string
	RowSpan  int
	ColSpan  int
	IsMerged bool // Part of a merged cell (not the origin)
}

TableCell represents a cell in a table.

type TextBlock

type TextBlock struct {
	Text        string
	Paragraphs  []Paragraph
	IsTitle     bool   // Is this the slide title?
	IsSubtitle  bool   // Is this a subtitle?
	Placeholder string // Placeholder type (title, body, etc.)
	X, Y        int    // Position in EMUs
	Width       int    // Width in EMUs
	Height      int    // Height in EMUs
}

TextBlock represents a block of text on a slide.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL