Versions in this module Expand all Collapse all v0 v0.0.0 Oct 28, 2019 Changes in this version + const BorderWidth + const ShadowWidth + var CheckConsistency = false + var Debug bool + var ErrNoMatch = errors.New("no match for hit") + var ErrNoPositions = errors.New("no match for hit") + var ExposeErrors bool + var Trace bool + func ExportBleveMem(index bleve.Index) ([]byte, error) + func ExtractPageTextMarks(page *model.PdfPage) (string, *extractor.TextMarkArray, error) + func ImportBleveMem(data []byte) (bleve.Index, error) + func PageSizePt(page *model.PdfPage) (width, height float64, err error) + func PdfOpenFile(inPath string) (*model.PdfReader, error) + func PdfOpenFileLazy(inPath string) (*os.File, *model.PdfReader, error) + func PdfOpenReader(rs io.ReadSeeker, lazy bool) (*model.PdfReader, error) + func ProcessPDFPagesFile(inPath string, processPage func(pageNum uint32, page *model.PdfPage) error) error + func ProcessPDFPagesReader(inPath string, rs io.ReadSeeker, ...) error + type BlevePdf struct + func BlevePdfFromHIPDs(hipds []serial.HashIndexPathDoc) (BlevePdf, error) + func IndexPdfFilesOrReaders(pathList []string, rsList []io.ReadSeeker, persistDir string, forceCreate bool, ...) (*BlevePdf, bleve.Index, int, int, time.Duration, time.Duration, error) + func IndexPdfFilesUsingReaders(pathList []string, persistDir string, forceCreate bool, report func(string)) (*BlevePdf, bleve.Index, int, int, time.Duration, time.Duration, error) + func (blevePdf *BlevePdf) Equals(other *BlevePdf) bool + func (blevePdf *BlevePdf) SearchBleveIndex(index bleve.Index, term0 string, maxResults int) (PdfMatchSet, error) + func (blevePdf BlevePdf) Len() int + func (blevePdf BlevePdf) String() string + func (blevePdf BlevePdf) ToHIPDs() ([]serial.HashIndexPathDoc, error) + type DocPageText struct + DocIdx uint64 + PageIdx uint32 + PageNum uint32 + Text string + type DocPositions struct + func (docPos *DocPositions) AddDocPage(pageNum uint32, ppos PagePositions, text string) (uint32, error) + func (docPos *DocPositions) Close() error + func (docPos *DocPositions) Equals(e *DocPositions) bool + func (docPos *DocPositions) Save() error + func (docPos DocPositions) Len() int + func (docPos DocPositions) String() string + type ExtractList struct + func CreateExtractList(maxPages, maxPerPage int) *ExtractList + func (l *ExtractList) AddRect(inPath string, pageNum uint32, r model.PdfRectangle) + func (l *ExtractList) SaveOutputPdf(outPath string) error + func (l ExtractList) String() string + type IDText struct + ID string + Text string + type PDFPageProcessor struct + func CreatePDFPageProcessorFile(inPath string) (*PDFPageProcessor, error) + func CreatePDFPageProcessorReader(inPath string, rs io.ReadSeeker) (*PDFPageProcessor, error) + func (p *PDFPageProcessor) Close() error + func (p *PDFPageProcessor) Process(processPage func(pageNum uint32, page *model.PdfPage) error) (err error) + func (p PDFPageProcessor) NumPages() (uint32, error) + type PagePositions struct + func PagePositionsFromTextMarks(textMarks *extractor.TextMarkArray) PagePositions + func (ppos PagePositions) BBox(start, end uint32) (model.PdfRectangle, bool) + func (ppos PagePositions) Empty() bool + func (ppos PagePositions) Equals(epl PagePositions) bool + func (ppos PagePositions) String() string + type PdfMatchSet struct + Matches []PdfPageMatch + SearchDuration time.Duration + TotalMatches int + func SearchPersistentPdfIndex(persistDir, term string, maxResults int) (PdfMatchSet, error) + func (p PdfMatchSet) Best() PdfMatchSet + func (p PdfMatchSet) Equals(q PdfMatchSet) bool + func (s PdfMatchSet) Files() []string + func (s PdfMatchSet) String() string + type PdfPageMatch struct + InPath string + LineNums []int + Lines []string + PageNum uint32 + func (p PdfPageMatch) String() string + type Phrase struct + type Span struct + End uint32 + Score float64 + Start uint32