Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Cache ¶
type Cache interface {
GetMetadata(url string) (DocumentMetadata, error)
StreamText(url string, w io.Writer) error
Save(doc ExtractedDocument) (*jetstream.ObjectInfo, error)
}
type Document ¶
type Document interface {
// StreamText writes all text to w
StreamText(w io.Writer) error
// Pages returns the documents number of pages. Returns -1 if the concept is not applicable to the file type.
Pages() int
// Text returns a single page's text and true if there is at least one image on the page
Text(int) (string, bool)
// Data returns the underlying byte array or nil if the document was loaded from disk
Data() *[]byte
// Path returns the filesystem path a document was loaded from or an empty string if it was not loaded from disk
Path() string
// MetadataMap returns a map of Document properties, such as Author, Title etc.
MetadataMap() DocumentMetadata
// HasNewlines reports if dehyphenation is possible and sensible
HasNewlines() bool
// Close releases resources associated with the document
Close()
}
Document represents any kind of document this service can convert to plain text
type DocumentMetadata ¶
type ExtractedDocument ¶
ExtractedDocument contains pointers to metadata, textual content and URL of origin
type NopCache ¶
type NopCache struct{}
func (*NopCache) GetMetadata ¶
func (c *NopCache) GetMetadata(url string) (DocumentMetadata, error)
func (*NopCache) Save ¶
func (c *NopCache) Save(doc ExtractedDocument) (*jetstream.ObjectInfo, error)
type ObjectStoreCache ¶
type ObjectStoreCache struct {
jetstream.ObjectStore
// contains filtered or unexported fields
}
func (ObjectStoreCache) GetMetadata ¶
func (store ObjectStoreCache) GetMetadata(url string) (DocumentMetadata, error)
func (ObjectStoreCache) Save ¶
func (store ObjectStoreCache) Save(doc ExtractedDocument) (*jetstream.ObjectInfo, error)
func (ObjectStoreCache) StreamText ¶
func (store ObjectStoreCache) StreamText(url string, w io.Writer) error
Click to show internal directories.
Click to hide internal directories.