lib

package
v0.0.0-...-9e5351f Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 22, 2018 License: MIT Imports: 24 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func FetchLines

func FetchLines(ident string) (chan ProgressMessage, chan []OCRLine)

FetchLines fetches OCR lines for a given Archive.org identifier

func GetMetadata

func GetMetadata(ident string) (*simplejson.Json, error)

GetMetadata fetches metadata for identifier from Archive.org

func GetStartPageNumber

func GetStartPageNumber(ident string) int

GetStartPageNumber determines whether an identifier's first page has index 0 or 1

func InitCache

func InitCache()

InitCache initializes global identifier cache

func IsFraktur

func IsFraktur(ident string) (bool, error)

IsFraktur uses heuristics to determine wheter a given identifier is set in a Fraktur typeface

func MakeLineIdentifier

func MakeLineIdentifier(volumeID string, line OCRLine) string

MakeLineIdentifier returns the unique identifier for a line

func Sha1Digest

func Sha1Digest(inp []byte) string

Sha1Digest generates the SHA1 digest for the given data

Types

type Document

type Document struct {
	Identifier string     `json:"id"`
	Title      string     `json:"title"`
	Year       int        `json:"year"`
	Manifest   string     `json:"manifest"`
	Lines      []OCRLine  `json:"lines,omitempty"`
	History    []LogEntry `json:"history,omitempty"`
	NumLines   int        `json:"numLines,omitempty"`
	Reviewed   bool       `json:"reviewed"`
}

Document holds all information about a transcription document

type DocumentStore

type DocumentStore struct {
	// contains filtered or unexported fields
}

DocumentStore offers an interface to the transcriptions

func NewDocumentStore

func NewDocumentStore(path string) (*DocumentStore, error)

NewDocumentStore creates a new document store

func (*DocumentStore) Details

func (s *DocumentStore) Details(ident string) *Document

Details retrieves a single Document by its identifier

func (*DocumentStore) List

func (s *DocumentStore) List() []*Document

List all documents

func (*DocumentStore) Save

func (s *DocumentStore) Save(doc Document, author string, email string, comment string) (*Document, error)

Save a document

type FileStatus

type FileStatus rune

FileStatus encodes the status of a file

const (
	StatusModified FileStatus = 'M'
	StatusAdded    FileStatus = 'A'
	StatusDeleted  FileStatus = 'D'
)

Status constants from git diff output

type GitRepo

type GitRepo struct {
	// contains filtered or unexported fields
}

GitRepo represents a Git repository

func GitOpen

func GitOpen(path string) (*GitRepo, error)

GitOpen a repository

func (*GitRepo) Add

func (r *GitRepo) Add(path string) error

Add stages a new file

func (*GitRepo) CleanUp

func (r *GitRepo) CleanUp() error

CleanUp residual modifications

func (*GitRepo) Commit

func (r *GitRepo) Commit(message string, author string, email string) (string, error)

Commit the staged changes

func (*GitRepo) Diff

func (r *GitRepo) Diff(cached bool) (map[string]FileStatus, error)

Diff lists modified files

func (*GitRepo) Log

func (r *GitRepo) Log(fpaths ...string) ([]LogEntry, error)

Log returns the git log of a given file

func (*GitRepo) Pull

func (r *GitRepo) Pull(remote string, branch string, rebase bool) error

Pull from remote and optionally rebase

func (*GitRepo) Push

func (r *GitRepo) Push(remote string, branch string) error

Push changes to remote

func (*GitRepo) Remove

func (r *GitRepo) Remove(path string) error

Remove removes a file

type IdentifierCache

type IdentifierCache struct {
	// contains filtered or unexported fields
}

IdentifierCache stores suitable identifiers

var IDCache *IdentifierCache

IDCache is the global cache for suitable identifiers

func CacheIdentifiers

func CacheIdentifiers(path string) (*IdentifierCache, error)

CacheIdentifiers scrapes the Archive.org API and caches information about relevant identifiers and their number of pages

func LoadIdentifierCache

func LoadIdentifierCache(path string) *IdentifierCache

LoadIdentifierCache loads a cache from a JSON file

func NewIdentifierCache

func NewIdentifierCache(path string) *IdentifierCache

NewIdentifierCache constructs a new cache

func (*IdentifierCache) Add

func (c *IdentifierCache) Add(ident string, numPages int, year int)

Add a new entry to the cache

func (*IdentifierCache) Random

func (c *IdentifierCache) Random(year int) IdentifierCacheEntry

Random returns a random identifier for a given year

func (*IdentifierCache) Write

func (c *IdentifierCache) Write()

Write the cache to disk

type IdentifierCacheEntry

type IdentifierCacheEntry struct {
	Identifier string `json:"id"`
	NumPages   int    `json:"numPages"`
}

IdentifierCacheEntry encodes cached information for a given Archive.org identifier

type LineImageCache

type LineImageCache struct {
	// contains filtered or unexported fields
}

LineImageCache handles cached line images on disk

var LineCache *LineImageCache

LineCache is the global cache for line images

func NewLineImageCache

func NewLineImageCache(cacheDir string) *LineImageCache

NewLineImageCache creates a new line image cache

func (*LineImageCache) CacheLine

func (c *LineImageCache) CacheLine(url string, id string) (string, error)

CacheLine downloads a line image and stores it on disk

func (*LineImageCache) CacheLines

func (c *LineImageCache) CacheLines(lines []OCRLine, ident string)

CacheLines caches all passed lines

func (*LineImageCache) GetLinePath

func (c *LineImageCache) GetLinePath(id string) string

GetLinePath returns the file path for a given line image

func (*LineImageCache) PurgeLines

func (c *LineImageCache) PurgeLines(prefix string) error

PurgeLines removes all cached line images that match the prefix

type LogEntry

type LogEntry struct {
	Author struct {
		Name  string `json:"name"`
		Email string `json:"email"`
	} `json:"author"`
	Date    time.Time `json:"date"`
	Commit  string    `json:"commit"`
	Subject string    `json:"subject"`
	Body    string    `json:"body,omitempty"`
}

LogEntry encodes a git log entry

type OCRLine

type OCRLine struct {
	Identifier       string `json:"id"`
	ImageURL         string `json:"line"`
	PreviousImageURL string `json:"previous,omitempty"`
	NextImageURL     string `json:"next,omitempty"`
	Transcription    string `json:"transcription,omitempty"`
}

OCRLine contains information about an OCR line

type ProgressMessage

type ProgressMessage struct {
	Step       string  `json:"step"`
	Progress   float64 `json:"progress"`
	BytesTotal int64   `json:"bytesTotal,omitempty"`
	BytesRead  int64   `json:"bytesRead,omitempty"`
	PageNumber int     `json:"pageNumber,omitempty"`
	LineNumber int     `json:"lineNumber,omitempty"`
	Error      error   `json:"error,omitempty"`
}

ProgressMessage contains progress information for the ABBYY parsing task

type ProgressReader

type ProgressReader struct {
	BytesRead int64
	// contains filtered or unexported fields
}

ProgressReader wraps another reader and exposes progress information

func NewProgressReader

func NewProgressReader(proxied io.Reader) *ProgressReader

NewProgressReader creates a new ProgressReader from a given Reader

func (*ProgressReader) Read

func (r *ProgressReader) Read(p []byte) (n int, err error)

type Result

type Result struct {
	// contains filtered or unexported fields
}

Result stores a response from the Archive.org Scraping API

type SubmitResult

type SubmitResult struct {
	Document Document
	Error    error
}

SubmitResult holds the result of a submission

type TaskDefinition

type TaskDefinition struct {
	Document   Document          `json:"document"`
	Author     string            `json:"author,omitempty"`
	Email      string            `json:"email,omitempty"`
	Comment    string            `json:"comment,omitempty"`
	ResultChan chan SubmitResult `json:"-"`
}

TaskDefinition encodes a finished transcription along with author information

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL