Documentation
¶
Index ¶
- func FetchLines(ident string) (chan ProgressMessage, chan []OCRLine)
- func GetMetadata(ident string) (*simplejson.Json, error)
- func GetStartPageNumber(ident string) int
- func InitCache()
- func IsFraktur(ident string) (bool, error)
- func MakeLineIdentifier(volumeID string, line OCRLine) string
- func Sha1Digest(inp []byte) string
- type Document
- type DocumentStore
- type FileStatus
- type GitRepo
- func (r *GitRepo) Add(path string) error
- func (r *GitRepo) CleanUp() error
- func (r *GitRepo) Commit(message string, author string, email string) (string, error)
- func (r *GitRepo) Diff(cached bool) (map[string]FileStatus, error)
- func (r *GitRepo) Log(fpaths ...string) ([]LogEntry, error)
- func (r *GitRepo) Pull(remote string, branch string, rebase bool) error
- func (r *GitRepo) Push(remote string, branch string) error
- func (r *GitRepo) Remove(path string) error
- type IdentifierCache
- type IdentifierCacheEntry
- type LineImageCache
- type LogEntry
- type OCRLine
- type ProgressMessage
- type ProgressReader
- type Result
- type SubmitResult
- type TaskDefinition
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func FetchLines ¶
func FetchLines(ident string) (chan ProgressMessage, chan []OCRLine)
FetchLines fetches OCR lines for a given Archive.org identifier
func GetMetadata ¶
func GetMetadata(ident string) (*simplejson.Json, error)
GetMetadata fetches metadata for identifier from Archive.org
func GetStartPageNumber ¶
GetStartPageNumber determines whether an identifier's first page has index 0 or 1
func IsFraktur ¶
IsFraktur uses heuristics to determine wheter a given identifier is set in a Fraktur typeface
func MakeLineIdentifier ¶
MakeLineIdentifier returns the unique identifier for a line
func Sha1Digest ¶
Sha1Digest generates the SHA1 digest for the given data
Types ¶
type Document ¶
type Document struct {
Identifier string `json:"id"`
Title string `json:"title"`
Year int `json:"year"`
Manifest string `json:"manifest"`
Lines []OCRLine `json:"lines,omitempty"`
History []LogEntry `json:"history,omitempty"`
NumLines int `json:"numLines,omitempty"`
Reviewed bool `json:"reviewed"`
}
Document holds all information about a transcription document
type DocumentStore ¶
type DocumentStore struct {
// contains filtered or unexported fields
}
DocumentStore offers an interface to the transcriptions
func NewDocumentStore ¶
func NewDocumentStore(path string) (*DocumentStore, error)
NewDocumentStore creates a new document store
func (*DocumentStore) Details ¶
func (s *DocumentStore) Details(ident string) *Document
Details retrieves a single Document by its identifier
type FileStatus ¶
type FileStatus rune
FileStatus encodes the status of a file
const ( StatusModified FileStatus = 'M' StatusAdded FileStatus = 'A' StatusDeleted FileStatus = 'D' )
Status constants from git diff output
type GitRepo ¶
type GitRepo struct {
// contains filtered or unexported fields
}
GitRepo represents a Git repository
func (*GitRepo) Diff ¶
func (r *GitRepo) Diff(cached bool) (map[string]FileStatus, error)
Diff lists modified files
type IdentifierCache ¶
type IdentifierCache struct {
// contains filtered or unexported fields
}
IdentifierCache stores suitable identifiers
var IDCache *IdentifierCache
IDCache is the global cache for suitable identifiers
func CacheIdentifiers ¶
func CacheIdentifiers(path string) (*IdentifierCache, error)
CacheIdentifiers scrapes the Archive.org API and caches information about relevant identifiers and their number of pages
func LoadIdentifierCache ¶
func LoadIdentifierCache(path string) *IdentifierCache
LoadIdentifierCache loads a cache from a JSON file
func NewIdentifierCache ¶
func NewIdentifierCache(path string) *IdentifierCache
NewIdentifierCache constructs a new cache
func (*IdentifierCache) Add ¶
func (c *IdentifierCache) Add(ident string, numPages int, year int)
Add a new entry to the cache
func (*IdentifierCache) Random ¶
func (c *IdentifierCache) Random(year int) IdentifierCacheEntry
Random returns a random identifier for a given year
type IdentifierCacheEntry ¶
IdentifierCacheEntry encodes cached information for a given Archive.org identifier
type LineImageCache ¶
type LineImageCache struct {
// contains filtered or unexported fields
}
LineImageCache handles cached line images on disk
var LineCache *LineImageCache
LineCache is the global cache for line images
func NewLineImageCache ¶
func NewLineImageCache(cacheDir string) *LineImageCache
NewLineImageCache creates a new line image cache
func (*LineImageCache) CacheLine ¶
func (c *LineImageCache) CacheLine(url string, id string) (string, error)
CacheLine downloads a line image and stores it on disk
func (*LineImageCache) CacheLines ¶
func (c *LineImageCache) CacheLines(lines []OCRLine, ident string)
CacheLines caches all passed lines
func (*LineImageCache) GetLinePath ¶
func (c *LineImageCache) GetLinePath(id string) string
GetLinePath returns the file path for a given line image
func (*LineImageCache) PurgeLines ¶
func (c *LineImageCache) PurgeLines(prefix string) error
PurgeLines removes all cached line images that match the prefix
type LogEntry ¶
type LogEntry struct {
Author struct {
Name string `json:"name"`
Email string `json:"email"`
} `json:"author"`
Date time.Time `json:"date"`
Commit string `json:"commit"`
Subject string `json:"subject"`
Body string `json:"body,omitempty"`
}
LogEntry encodes a git log entry
type OCRLine ¶
type OCRLine struct {
Identifier string `json:"id"`
ImageURL string `json:"line"`
PreviousImageURL string `json:"previous,omitempty"`
NextImageURL string `json:"next,omitempty"`
Transcription string `json:"transcription,omitempty"`
}
OCRLine contains information about an OCR line
type ProgressMessage ¶
type ProgressMessage struct {
Step string `json:"step"`
Progress float64 `json:"progress"`
BytesTotal int64 `json:"bytesTotal,omitempty"`
BytesRead int64 `json:"bytesRead,omitempty"`
PageNumber int `json:"pageNumber,omitempty"`
LineNumber int `json:"lineNumber,omitempty"`
Error error `json:"error,omitempty"`
}
ProgressMessage contains progress information for the ABBYY parsing task
type ProgressReader ¶
type ProgressReader struct {
BytesRead int64
// contains filtered or unexported fields
}
ProgressReader wraps another reader and exposes progress information
func NewProgressReader ¶
func NewProgressReader(proxied io.Reader) *ProgressReader
NewProgressReader creates a new ProgressReader from a given Reader
type Result ¶
type Result struct {
// contains filtered or unexported fields
}
Result stores a response from the Archive.org Scraping API
type SubmitResult ¶
SubmitResult holds the result of a submission
type TaskDefinition ¶
type TaskDefinition struct {
Document Document `json:"document"`
Author string `json:"author,omitempty"`
Email string `json:"email,omitempty"`
Comment string `json:"comment,omitempty"`
ResultChan chan SubmitResult `json:"-"`
}
TaskDefinition encodes a finished transcription along with author information