Documentation

Overview

hocr contains structures and functions for parsing and analysing hocr files

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func BoxCoords

func BoxCoords(s string) ([4]int, error)

BoxCoords parses bbox coordinate strings

func GetAvgConf

func GetAvgConf(hocrfn string) (float64, error)

GetAvgConf calculates the average confidence of a hOCR file from confidences embedded in each word

func GetLineBasics

func GetLineBasics(hocrfn string) (line.Details, error)

GetLineBasics parses a hocr file and returns a corresponding line.Details, without any image extracts

func GetLineDetails

func GetLineDetails(hocrfn string) (line.Details, error)

GetLineDetails parses a hocr file and returns a corresponding line.Details, including image extracts for each line

func GetText

func GetText(hocrfn string) (string, error)

GetText parses a hOCR file and extracts the text from it

func GetWordConfs

func GetWordConfs(hocrfn string) ([]float64, error)

GetWordConfs is a utility function that parses a hocr file and returns an array containing the confidences of each word therein

func LineText

func LineText(l OcrLine) string

LineText extracts the text from an OcrLine

Types

type Hocr

type Hocr struct {
	Lines []OcrLine `xml:"body>div>div>p>span"`
}

func Parse

func Parse(b []byte) (Hocr, error)

Parse parses a hOCR file

type OcrChar

type OcrChar struct {
	Class string    `xml:"class,attr"`
	Id    string    `xml:"id,attr"`
	Title string    `xml:"title,attr"`
	Chars []OcrChar `xml:"span"`
	Text  string    `xml:",chardata"`
}

type OcrLine

type OcrLine struct {
	Class string    `xml:"class,attr"`
	Id    string    `xml:"id,attr"`
	Title string    `xml:"title,attr"`
	Words []OcrWord `xml:"span"`
	Text  string    `xml:",chardata"`
}

type OcrWord

type OcrWord struct {
	Class string    `xml:"class,attr"`
	Id    string    `xml:"id,attr"`
	Title string    `xml:"title,attr"`
	Chars []OcrChar `xml:"span"`
	Text  string    `xml:",chardata"`
}

Source Files