epubdoc

package
v1.6.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 3, 2026 License: MIT Imports: 12 Imported by: 0

Documentation

Overview

Package epubdoc provides EPUB document parsing.

Index

Constants

This section is empty.

Variables

View Source
var (
	ErrNoContainer      = errors.New("epub: missing META-INF/container.xml")
	ErrInvalidContainer = errors.New("epub: invalid container.xml")
	ErrNoRootfile       = errors.New("epub: no rootfile found in container.xml")
)

Container-related errors.

View Source
var (
	ErrNoOPF      = errors.New("epub: missing package document (OPF)")
	ErrInvalidOPF = errors.New("epub: invalid package document")
	ErrEmptySpine = errors.New("epub: no content in spine")
)

OPF-related errors.

View Source
var (
	ErrInvalidArchive  = errors.New("epub: invalid or corrupted archive")
	ErrInvalidMimetype = errors.New("epub: invalid mimetype (not an EPUB)")
	ErrMissingContent  = errors.New("epub: referenced content file not found")
)

Reader-related errors.

View Source
var (
	ErrDRMProtected = errors.New("epub: DRM-protected content cannot be processed")
)

DRM-related errors.

Functions

This section is empty.

Types

type Chapter

type Chapter struct {
	ID      string
	Title   string
	Index   int
	Href    string
	Content []byte // Raw XHTML content
}

Chapter represents extracted content from one spine item.

type ExtractOptions

type ExtractOptions struct {
	// NavigationExclusion controls filtering of nav/header/footer elements.
	// Uses htmldoc.NavigationExclusionMode values.
	NavigationExclusion int
}

ExtractOptions configures content extraction.

type ManifestItem

type ManifestItem struct {
	ID         string
	Href       string
	MediaType  string
	Properties []string // "nav", "cover-image", etc.
}

ManifestItem represents a file in the EPUB.

type Metadata

type Metadata struct {
	Title       string
	Creator     []string // Multiple authors possible
	Language    string
	Identifier  string // ISBN, UUID, etc.
	Publisher   string
	Date        string
	Description string
	Subjects    []string
	Rights      string
	Modified    time.Time
}

Metadata contains EPUB metadata (Dublin Core).

type Package

type Package struct {
	Metadata Metadata
	Manifest map[string]ManifestItem // keyed by ID
	Spine    []SpineItem
	Version  string // "2.0" or "3.0"
}

Package represents the parsed OPF document.

type Reader

type Reader struct {
	// contains filtered or unexported fields
}

Reader provides access to EPUB content.

func Open

func Open(filePath string) (*Reader, error)

Open opens an EPUB file from a path.

func OpenReader

func OpenReader(ra io.ReaderAt, size int64) (*Reader, error)

OpenReader opens an EPUB from an io.ReaderAt.

func (*Reader) ChapterCount

func (r *Reader) ChapterCount() int

ChapterCount returns the number of chapters.

func (*Reader) Chapters

func (r *Reader) Chapters() []*Chapter

Chapters returns all chapters.

func (*Reader) Close

func (r *Reader) Close() error

Close closes the reader and releases resources.

func (*Reader) Document

func (r *Reader) Document() (*model.Document, error)

Document returns the document model.

func (*Reader) Markdown

func (r *Reader) Markdown() (string, error)

Markdown extracts content as markdown from all chapters.

func (*Reader) MarkdownWithOptions

func (r *Reader) MarkdownWithOptions(opts ExtractOptions) (string, error)

MarkdownWithOptions extracts content as markdown with the given options.

func (*Reader) Metadata

func (r *Reader) Metadata() Metadata

Metadata returns the EPUB metadata.

func (*Reader) TableOfContents

func (r *Reader) TableOfContents() *TableOfContents

TableOfContents returns the parsed table of contents.

func (*Reader) Text

func (r *Reader) Text() (string, error)

Text extracts plain text from all chapters.

func (*Reader) TextWithOptions

func (r *Reader) TextWithOptions(opts ExtractOptions) (string, error)

TextWithOptions extracts plain text with the given options.

type SpineItem

type SpineItem struct {
	IDRef  string
	Linear bool // true if part of main reading order
}

SpineItem represents a content document in reading order.

type TOCEntry

type TOCEntry struct {
	Title    string
	Href     string
	Children []TOCEntry
}

TOCEntry represents a single navigation entry.

type TableOfContents

type TableOfContents struct {
	Title   string
	Entries []TOCEntry
}

TableOfContents represents the navigation structure.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL