lib

package

v0.0.0-...-9e5351f Latest Latest Go to latest Published: Feb 22, 2018 License: MIT Imports: 24 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/jbaiter/archiscribe

Links

Open Source Insights

Documentation ¶

Index ¶

func FetchLines(ident string) (chan ProgressMessage, chan []OCRLine)
func GetMetadata(ident string) (*simplejson.Json, error)
func GetStartPageNumber(ident string) int
func InitCache()
func IsFraktur(ident string) (bool, error)
func MakeLineIdentifier(volumeID string, line OCRLine) string
func Sha1Digest(inp []byte) string
type Document
type DocumentStore
- func NewDocumentStore(path string) (*DocumentStore, error)
- func (s *DocumentStore) Details(ident string) *Document
- func (s *DocumentStore) List() []*Document
- func (s *DocumentStore) Save(doc Document, author string, email string, comment string) (*Document, error)
type FileStatus
type GitRepo
- func GitOpen(path string) (*GitRepo, error)
- func (r *GitRepo) Add(path string) error
- func (r *GitRepo) CleanUp() error
- func (r *GitRepo) Commit(message string, author string, email string) (string, error)
- func (r *GitRepo) Diff(cached bool) (map[string]FileStatus, error)
- func (r *GitRepo) Log(fpaths ...string) ([]LogEntry, error)
- func (r *GitRepo) Pull(remote string, branch string, rebase bool) error
- func (r *GitRepo) Push(remote string, branch string) error
- func (r *GitRepo) Remove(path string) error
type IdentifierCache
- func CacheIdentifiers(path string) (*IdentifierCache, error)
- func LoadIdentifierCache(path string) *IdentifierCache
- func NewIdentifierCache(path string) *IdentifierCache
- func (c *IdentifierCache) Add(ident string, numPages int, year int)
- func (c *IdentifierCache) Random(year int) IdentifierCacheEntry
- func (c *IdentifierCache) Write()
type IdentifierCacheEntry
type LineImageCache
- func NewLineImageCache(cacheDir string) *LineImageCache
- func (c *LineImageCache) CacheLine(url string, id string) (string, error)
- func (c *LineImageCache) CacheLines(lines []OCRLine, ident string)
- func (c *LineImageCache) GetLinePath(id string) string
- func (c *LineImageCache) PurgeLines(prefix string) error
type LogEntry
type OCRLine
type ProgressMessage
type ProgressReader
- func NewProgressReader(proxied io.Reader) *ProgressReader
- func (r *ProgressReader) Read(p []byte) (n int, err error)
type Result
type SubmitResult
type TaskDefinition

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func FetchLines ¶

func FetchLines(ident string) (chan ProgressMessage, chan []OCRLine)

FetchLines fetches OCR lines for a given Archive.org identifier

func GetMetadata ¶

func GetMetadata(ident string) (*simplejson.Json, error)

GetMetadata fetches metadata for identifier from Archive.org

func GetStartPageNumber ¶

func GetStartPageNumber(ident string) int

GetStartPageNumber determines whether an identifier's first page has index 0 or 1

func InitCache ¶

func InitCache()

InitCache initializes global identifier cache

func IsFraktur ¶

func IsFraktur(ident string) (bool, error)

IsFraktur uses heuristics to determine wheter a given identifier is set in a Fraktur typeface

func MakeLineIdentifier ¶

func MakeLineIdentifier(volumeID string, line OCRLine) string

MakeLineIdentifier returns the unique identifier for a line

func Sha1Digest ¶

func Sha1Digest(inp []byte) string

Sha1Digest generates the SHA1 digest for the given data

Types ¶

type Document ¶

type Document struct {
	Identifier string     `json:"id"`
	Title      string     `json:"title"`
	Year       int        `json:"year"`
	Manifest   string     `json:"manifest"`
	Lines      []OCRLine  `json:"lines,omitempty"`
	History    []LogEntry `json:"history,omitempty"`
	NumLines   int        `json:"numLines,omitempty"`
	Reviewed   bool       `json:"reviewed"`
}

Document holds all information about a transcription document

type DocumentStore ¶

type DocumentStore struct {
	// contains filtered or unexported fields
}

DocumentStore offers an interface to the transcriptions

func NewDocumentStore ¶

func NewDocumentStore(path string) (*DocumentStore, error)

NewDocumentStore creates a new document store

func (*DocumentStore) Details ¶

func (s *DocumentStore) Details(ident string) *Document

Details retrieves a single Document by its identifier

func (*DocumentStore) List ¶

func (s *DocumentStore) List() []*Document

List all documents

func (*DocumentStore) Save ¶

func (s *DocumentStore) Save(doc Document, author string, email string, comment string) (*Document, error)

Save a document

type FileStatus ¶

type FileStatus rune

FileStatus encodes the status of a file

const (
	StatusModified FileStatus = 'M'
	StatusAdded    FileStatus = 'A'
	StatusDeleted  FileStatus = 'D'
)

Status constants from git diff output

type GitRepo ¶

type GitRepo struct {
	// contains filtered or unexported fields
}

GitRepo represents a Git repository

func GitOpen ¶

func GitOpen(path string) (*GitRepo, error)

GitOpen a repository

func (*GitRepo) Add ¶

func (r *GitRepo) Add(path string) error

Add stages a new file

func (*GitRepo) CleanUp ¶

func (r *GitRepo) CleanUp() error

CleanUp residual modifications

func (*GitRepo) Commit ¶

func (r *GitRepo) Commit(message string, author string, email string) (string, error)

Commit the staged changes

func (*GitRepo) Diff ¶

func (r *GitRepo) Diff(cached bool) (map[string]FileStatus, error)

Diff lists modified files

func (*GitRepo) Log ¶

func (r *GitRepo) Log(fpaths ...string) ([]LogEntry, error)

Log returns the git log of a given file

func (*GitRepo) Pull ¶

func (r *GitRepo) Pull(remote string, branch string, rebase bool) error

Pull from remote and optionally rebase

func (*GitRepo) Push ¶

func (r *GitRepo) Push(remote string, branch string) error

Push changes to remote

func (*GitRepo) Remove ¶

func (r *GitRepo) Remove(path string) error

Remove removes a file

type IdentifierCache ¶

type IdentifierCache struct {
	// contains filtered or unexported fields
}

IdentifierCache stores suitable identifiers

var IDCache *IdentifierCache

IDCache is the global cache for suitable identifiers

func CacheIdentifiers ¶

func CacheIdentifiers(path string) (*IdentifierCache, error)

CacheIdentifiers scrapes the Archive.org API and caches information about relevant identifiers and their number of pages

func LoadIdentifierCache ¶

func LoadIdentifierCache(path string) *IdentifierCache

LoadIdentifierCache loads a cache from a JSON file

func NewIdentifierCache ¶

func NewIdentifierCache(path string) *IdentifierCache

NewIdentifierCache constructs a new cache

func (*IdentifierCache) Add ¶

func (c *IdentifierCache) Add(ident string, numPages int, year int)

Add a new entry to the cache

func (*IdentifierCache) Random ¶

func (c *IdentifierCache) Random(year int) IdentifierCacheEntry

Random returns a random identifier for a given year

func (*IdentifierCache) Write ¶

func (c *IdentifierCache) Write()

Write the cache to disk

type IdentifierCacheEntry ¶

type IdentifierCacheEntry struct {
	Identifier string `json:"id"`
	NumPages   int    `json:"numPages"`
}

IdentifierCacheEntry encodes cached information for a given Archive.org identifier

type LineImageCache ¶

type LineImageCache struct {
	// contains filtered or unexported fields
}

LineImageCache handles cached line images on disk

var LineCache *LineImageCache

LineCache is the global cache for line images

func NewLineImageCache ¶

func NewLineImageCache(cacheDir string) *LineImageCache

NewLineImageCache creates a new line image cache

func (*LineImageCache) CacheLine ¶

func (c *LineImageCache) CacheLine(url string, id string) (string, error)

CacheLine downloads a line image and stores it on disk

func (*LineImageCache) CacheLines ¶

func (c *LineImageCache) CacheLines(lines []OCRLine, ident string)

CacheLines caches all passed lines

func (*LineImageCache) GetLinePath ¶

func (c *LineImageCache) GetLinePath(id string) string

GetLinePath returns the file path for a given line image

func (*LineImageCache) PurgeLines ¶

func (c *LineImageCache) PurgeLines(prefix string) error

PurgeLines removes all cached line images that match the prefix

type LogEntry ¶

type LogEntry struct {
	Author struct {
		Name  string `json:"name"`
		Email string `json:"email"`
	} `json:"author"`
	Date    time.Time `json:"date"`
	Commit  string    `json:"commit"`
	Subject string    `json:"subject"`
	Body    string    `json:"body,omitempty"`
}

LogEntry encodes a git log entry

type OCRLine ¶

type OCRLine struct {
	Identifier       string `json:"id"`
	ImageURL         string `json:"line"`
	PreviousImageURL string `json:"previous,omitempty"`
	NextImageURL     string `json:"next,omitempty"`
	Transcription    string `json:"transcription,omitempty"`
}

OCRLine contains information about an OCR line

type ProgressMessage ¶

type ProgressMessage struct {
	Step       string  `json:"step"`
	Progress   float64 `json:"progress"`
	BytesTotal int64   `json:"bytesTotal,omitempty"`
	BytesRead  int64   `json:"bytesRead,omitempty"`
	PageNumber int     `json:"pageNumber,omitempty"`
	LineNumber int     `json:"lineNumber,omitempty"`
	Error      error   `json:"error,omitempty"`
}

ProgressMessage contains progress information for the ABBYY parsing task

type ProgressReader ¶

type ProgressReader struct {
	BytesRead int64
	// contains filtered or unexported fields
}

ProgressReader wraps another reader and exposes progress information

func NewProgressReader ¶

func NewProgressReader(proxied io.Reader) *ProgressReader

NewProgressReader creates a new ProgressReader from a given Reader

func (*ProgressReader) Read ¶

func (r *ProgressReader) Read(p []byte) (n int, err error)

type Result ¶

type Result struct {
	// contains filtered or unexported fields
}

Result stores a response from the Archive.org Scraping API

type SubmitResult ¶

type SubmitResult struct {
	Document Document
	Error    error
}

SubmitResult holds the result of a submission

type TaskDefinition ¶

type TaskDefinition struct {
	Document   Document          `json:"document"`
	Author     string            `json:"author,omitempty"`
	Email      string            `json:"email,omitempty"`
	Comment    string            `json:"comment,omitempty"`
	ResultChan chan SubmitResult `json:"-"`
}

TaskDefinition encodes a finished transcription along with author information

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL