fitz

package module
v0.0.0-...-4d4120b Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 15, 2019 License: AGPL-3.0 Imports: 11 Imported by: 0

README

go-fitz

TravisCI Build Status AppVeyor Build Status GoDoc Go Report Card

Go wrapper for MuPDF fitz library that can extract pages from PDF and EPUB documents as images, text, html or svg.

Install

go get -u github.com/gen2brain/go-fitz

Build tags

  • extlib - use external MuPDF library
  • static - build with static external MuPDF library (used with extlib)
  • nopie - use this with GCC older then 7

Example

package main

import (
	"fmt"
	"image/jpeg"
	"io/ioutil"
	"os"
	"path/filepath"

	"github.com/gen2brain/go-fitz"
)

func main() {
	doc, err := fitz.New("test.pdf")
	if err != nil {
		panic(err)
	}

	defer doc.Close()

	tmpDir, err := ioutil.TempDir(os.TempDir(), "fitz")
	if err != nil {
		panic(err)
	}

	// Extract pages as images
	for n := 0; n < doc.NumPage(); n++ {
		img, err := doc.Image(n)
		if err != nil {
			panic(err)
		}

		f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.jpg", n)))
		if err != nil {
			panic(err)
		}

		err = jpeg.Encode(f, img, &jpeg.Options{jpeg.DefaultQuality})
		if err != nil {
			panic(err)
		}

		f.Close()
	}

	// Extract pages as text
	for n := 0; n < doc.NumPage(); n++ {
		text, err := doc.Text(n)
		if err != nil {
			panic(err)
		}

		f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.txt", n)))
		if err != nil {
			panic(err)
		}

		_, err = f.WriteString(text)
		if err != nil {
			panic(err)
		}

		f.Close()
	}

	// Extract pages as html
	for n := 0; n < doc.NumPage(); n++ {
		html, err := doc.HTML(n, true)
		if err != nil {
			panic(err)
		}

		f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.html", n)))
		if err != nil {
			panic(err)
		}

		_, err = f.WriteString(html)
		if err != nil {
			panic(err)
		}

		f.Close()
	}
}

Documentation

Overview

Package fitz provides wrapper for the [MuPDF](http://mupdf.com/) fitz library that can extract pages from PDF and EPUB documents as images, text, html or svg.

Index

Examples

Constants

This section is empty.

Variables

View Source
var (
	ErrNoSuchFile    = errors.New("fitz: no such file")
	ErrCreateContext = errors.New("fitz: cannot create context")
	ErrOpenDocument  = errors.New("fitz: cannot open document")
	ErrOpenMemory    = errors.New("fitz: cannot open memory")
	ErrPageMissing   = errors.New("fitz: page missing")
	ErrObjMissing    = errors.New("fitz: obj missing")
	ErrNotImage      = errors.New("fitz: obj is not image, please ignore this obj")
	ErrCreatePixmap  = errors.New("fitz: cannot create pixmap")
	ErrPixmapSamples = errors.New("fitz: cannot get pixmap samples")
	ErrNeedsPassword = errors.New("fitz: document needs password")
	ErrLoadOutline   = errors.New("fitz: cannot load outline")
)

Errors.

Functions

This section is empty.

Types

type Document

type Document struct {
	// contains filtered or unexported fields
}

Document represents fitz document.

func New

func New(filename string) (f *Document, err error)

New returns new fitz document from filename. After process, please do Close() to release resource.

Example
doc, err := New("testdata/test.pdf")
if err != nil {
	panic(err)
}

defer doc.Close()

tmpDir, err := ioutil.TempDir(os.TempDir(), "fitz")
if err != nil {
	panic(err)
}

// Extract pages as images
for n := 1; n < doc.NumObj(); n++ {
	img, err := doc.Image(n)
	if err == ErrNotImage {
		continue
	} else if err != nil {
		panic(err)
	}

	f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.jpg", n)))
	if err != nil {
		panic(err)
	}

	err = jpeg.Encode(f, img, &jpeg.Options{Quality: jpeg.DefaultQuality})
	if err != nil {
		panic(err)
	}

	f.Close()
}

// Extract pages as text
for n := 0; n < doc.NumPage(); n++ {
	text, err := doc.Text(n)
	if err != nil {
		panic(err)
	}

	f, err := os.Create(filepath.Join(tmpDir, fmt.Sprintf("test%03d.txt", n)))
	if err != nil {
		panic(err)
	}

	_, err = f.WriteString(text)
	if err != nil {
		panic(err)
	}

	f.Close()
}
Output:

func NewFromMemory

func NewFromMemory(b []byte) (f *Document, err error)

NewFromMemory returns new fitz document from byte slice, please do Close() to release resource.

func NewFromReader

func NewFromReader(r io.Reader) (f *Document, err error)

NewFromReader returns new fitz document from io.Reader, please do Close() to release resource.

func (*Document) Close

func (f *Document) Close() error

Close closes the underlying fitz document.

func (*Document) Image

func (f *Document) Image(objNumber int) (image.Image, error)

Image returns image.Image encoded by png. The objNumber should between 1 ~ f.NumObj()

func (*Document) ImageBytes

func (f *Document) ImageBytes(objNumber int) ([]byte, error)

ImageBytes returns image.Image bytes encoded by png. The objNumber should between 1 ~ f.NumObj() ImageBytes will be faster than Image

func (*Document) Metadata

func (f *Document) Metadata() map[string]string

Metadata returns the map with standard metadata.

func (*Document) NumObj

func (f *Document) NumObj() int

NumObj returns total number of objects in document.

func (*Document) NumPage

func (f *Document) NumPage() int

NumPage returns total number of pages in document.

func (*Document) Text

func (f *Document) Text(pageNumber int) (string, error)

Text returns text for given page number. Index start at 0

func (*Document) ToC

func (f *Document) ToC() ([]Outline, error)

ToC returns the table of contents (also known as outline).

type Outline

type Outline struct {
	// Hierarchy level of the entry (starting from 1).
	Level int
	// Title of outline item.
	Title string
	// Destination in the document to be displayed when this outline item is activated.
	URI string
	// The page number of an internal link.
	Page int
	// Top.
	Top float64
}

Outline type.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL