fitz

package module
v0.0.0-...-f0a07eb Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 16, 2021 License: AGPL-3.0 Imports: 9 Imported by: 0

README

go-fitz

TravisCI Build Status AppVeyor Build Status GoDoc Go Report Card

Go wrapper for MuPDF fitz library that can extract pages from PDF and EPUB documents as images, text, html or svg.

Install

go get -u github.com/gen2brain/go-fitz

Build tags

  • extlib - use external MuPDF library
  • static - build with static external MuPDF library (used with extlib)
  • nopie - use this with GCC older then 7

Example

package main

import (
	"fmt"
	"image/jpeg"
	"io/ioutil"
	"os"
	"path/filepath"

	"github.com/gen2brain/go-fitz"
)

func main() {
	doc, err := fitz.New("test.pdf")
	if err != nil {
		panic(err)
	}

	defer doc.Close()

	tmpDir, err := ioutil.TempDir(os.TempDir(), "fitz")
	if err != nil {
		panic(err)
	}

	// Extract pages as images
	for n := 0; n < doc.NumPage(); n++ {
		img, err := doc.Image(n)
		if err != nil {
			panic(err)
		}

		f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.jpg", n)))
		if err != nil {
			panic(err)
		}

		err = jpeg.Encode(f, img, &jpeg.Options{jpeg.DefaultQuality})
		if err != nil {
			panic(err)
		}

		f.Close()
	}

	// Extract pages as text
	for n := 0; n < doc.NumPage(); n++ {
		text, err := doc.Text(n)
		if err != nil {
			panic(err)
		}

		f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.txt", n)))
		if err != nil {
			panic(err)
		}

		_, err = f.WriteString(text)
		if err != nil {
			panic(err)
		}

		f.Close()
	}

	// Extract pages as html
	for n := 0; n < doc.NumPage(); n++ {
		html, err := doc.HTML(n, true)
		if err != nil {
			panic(err)
		}

		f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.html", n)))
		if err != nil {
			panic(err)
		}

		_, err = f.WriteString(html)
		if err != nil {
			panic(err)
		}

		f.Close()
	}
}

Documentation

Overview

Package fitz provides wrapper for the [MuPDF](http://mupdf.com/) fitz library that can extract pages from PDF and EPUB documents as images, text, html or svg.

Index

Examples

Constants

This section is empty.

Variables

View Source
var (
	ErrNoSuchFile    = errors.New("fitz: no such file")
	ErrCreateContext = errors.New("fitz: cannot create context")
	ErrOpenDocument  = errors.New("fitz: cannot open document")
	ErrOpenMemory    = errors.New("fitz: cannot open memory")
	ErrPageMissing   = errors.New("fitz: page missing")
	ErrCreatePixmap  = errors.New("fitz: cannot create pixmap")
	ErrPixmapSamples = errors.New("fitz: cannot get pixmap samples")
	ErrNeedsPassword = errors.New("fitz: document needs password")
	ErrLoadOutline   = errors.New("fitz: cannot load outline")
)

Errors.

Functions

This section is empty.

Types

type Document

type Document struct {
	// contains filtered or unexported fields
}

Document represents fitz document.

func New

func New(filename string) (f *Document, err error)

New returns new fitz document.

Example
doc, err := New("test.pdf")
if err != nil {
	panic(err)
}

defer doc.Close()

tmpDir, err := ioutil.TempDir(os.TempDir(), "fitz")
if err != nil {
	panic(err)
}

// Extract pages as images
for n := 0; n < doc.NumPage(); n++ {
	img, err := doc.Image(n)
	if err != nil {
		panic(err)
	}

	f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.jpg", n)))
	if err != nil {
		panic(err)
	}

	err = jpeg.Encode(f, img, &jpeg.Options{Quality: jpeg.DefaultQuality})
	if err != nil {
		panic(err)
	}

	f.Close()
}

// Extract pages as text
for n := 0; n < doc.NumPage(); n++ {
	text, err := doc.Text(n)
	if err != nil {
		panic(err)
	}

	f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.txt", n)))
	if err != nil {
		panic(err)
	}

	_, err = f.WriteString(text)
	if err != nil {
		panic(err)
	}

	f.Close()
}

// Extract pages as html
for n := 0; n < doc.NumPage(); n++ {
	html, err := doc.HTML(n, true)
	if err != nil {
		panic(err)
	}

	f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.html", n)))
	if err != nil {
		panic(err)
	}

	_, err = f.WriteString(html)
	if err != nil {
		panic(err)
	}

	f.Close()
}

// Extract pages as svg
for n := 0; n < doc.NumPage(); n++ {
	svg, err := doc.SVG(n)
	if err != nil {
		panic(err)
	}

	f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.svg", n)))
	if err != nil {
		panic(err)
	}

	_, err = f.WriteString(svg)
	if err != nil {
		panic(err)
	}

	f.Close()
}
Output:

func NewFromMemory

func NewFromMemory(b []byte) (f *Document, err error)

NewFromMemory returns new fitz document from byte slice.

func NewFromReader

func NewFromReader(r io.Reader) (f *Document, err error)

NewFromReader returns new fitz document from io.Reader.

func (*Document) Close

func (f *Document) Close() error

Close closes the underlying fitz document.

func (*Document) HTML

func (f *Document) HTML(pageNumber int, header bool) (string, error)

HTML returns html for given page number.

func (*Document) Image

func (f *Document) Image(pageNumber int) (image.Image, error)

Image returns image for given page number.

func (*Document) ImageDPI

func (f *Document) ImageDPI(pageNumber int, dpi float64) (image.Image, error)

ImageDPI returns image for given page number and DPI.

func (*Document) ImagePNG

func (f *Document) ImagePNG(pageNumber int, dpi float64) ([]byte, error)

ImagePNG returns image for given page number as PNG bytes.

func (*Document) Metadata

func (f *Document) Metadata() map[string]string

Metadata returns the map with standard metadata.

func (*Document) NumPage

func (f *Document) NumPage() int

NumPage returns total number of pages in document.

func (*Document) SVG

func (f *Document) SVG(pageNumber int) (string, error)

SVG returns svg document for given page number.

func (*Document) Text

func (f *Document) Text(pageNumber int) (string, error)

Text returns text for given page number.

func (*Document) ToC

func (f *Document) ToC() ([]Outline, error)

ToC returns the table of contents (also known as outline).

type Outline

type Outline struct {
	// Hierarchy level of the entry (starting from 1).
	Level int
	// Title of outline item.
	Title string
	// Destination in the document to be displayed when this outline item is activated.
	URI string
	// The page number of an internal link.
	Page int
	// Top.
	Top float64
}

Outline type.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL