Version: v0.17.0 Latest Latest

This package is not in the latest version of its module.

Go to latest
Published: Jan 7, 2022 License: MIT Imports: 11 Imported by: 2




This section is empty.


This section is empty.


func CSVHeader

func CSVHeader(withVerification bool, sep rune) string

CSVHeader returns the header string for CSV output format.


type Kingdom added in v0.17.0

type Kingdom struct {
	NamesNum        int     `json:"namesNumber"`
	Kingdom         string  `json:"kingdom"`
	NamesPercentage float32 `json:"namesPercentage"`

Kingdom contains names resolved to it and their percentage.

type Meta

type Meta struct {
	// InputFile is the name of the source file.
	InputFile string `json:"inputFile,omitempty"`

	// TextExtractionSec is the time spent on converting the file
	// into UTF8-encoded text.
	TextExtractionSec float32 `json:"textExtractSec,omitempty"`

	// NameFindingSec is the time spent on name-finding.
	NameFindingSec float32 `json:"nameFindingSec"`

	// NameVerifSec is the time spent on name-verification.
	NameVerifSec float32 `json:"nameVerifSec,omitempty"`

	// TotalSec is time spent for the whole process
	TotalSec float32 `json:"totalSec,omitempty"`

	// Date represents time when output was generated.
	Date time.Time `json:"date"`

	// FinderVersion the version of gnfinder.
	FinderVersion string `json:"gnfinderVersion"`

	// WithBayes use of bayes during name-finding
	WithBayes bool `json:"withBayes"`

	// WithPositionInBytes names get start/enc positionx in bytes
	// instead of UTF-8 chars.
	WithPositionInBytes bool `json:"withPositionInBytes"`

	// WithOddsAdjustment to adjust prior odds according to the dencity of
	// scientific names in the text.
	WithOddsAdjustment bool `json:"withOddsAdjustment"`

	// WithVerification is true if results are checked by verification service.
	WithVerification bool `json:"withVerification"`

	// WordsAround shows the number of tokens preserved before and after
	// a name-string candidate.
	WordsAround int `json:"wordsAround"`

	// Language inside name-finding algorithm
	Language string `json:"language"`

	// LanguageDetected automatically for the text
	LanguageDetected string `json:"languageDetected,omitempty"`

	// LanguageForced by language option
	DetectLanguage bool `json:"detectLanguage"`

	// TotalTokens is a number of 'normalized' words in the text
	TotalTokens int `json:"totalWords"`

	// TotalNameCandidates is a number of words that might be a start of
	// a scientific name
	TotalNameCandidates int `json:"totalCandidates"`

	// TotalNames is a number of scientific names found
	TotalNames int `json:"totalNames"`

	// Kingdoms are the kingdoms to which the names resolved by
	// the Catalogue of Life are placed.
	// Kingdoms are sorted by percentage in descending order.
	// The first kingom contains the most number of names.
	Kingdoms []Kingdom `json:"kingdoms,omitempty"`

	// MainClade is the clade containing majority of resolved by
	// the Catalogue of Life names.
	MainClade string `json:"mainClade,omitempty"`

	// MainCladeRank is the rank of the MainClade.
	MainCladeRank string `json:"mainCladeRank,omitempty"`

	// MainCladePercentage is the percentage of names in Context.
	MainCladePercentage float32 `json:"mainCladePercentage,omitempty"`

Meta contains meta-information of name-finding result.

type Name

type Name struct {
	// Cardinality depicts number of elements in a name. 0 - Cannot determine
	// cardinality, 1 - Uninomial, 2 - Binomial, 3 - Trinomial.
	Cardinality int `json:"cardinality"`

	// Verbatim shows name the way it was in the text.
	Verbatim string `json:"verbatim,omitempty"`

	// Name is a normalized version of a name.
	Name string `json:"name"`

	// Odds show a probability that name detection was correct.
	Odds float64 `json:"-"`

	// OddsLog10 show a Log10 of Odds.
	OddsLog10 float64 `json:"oddsLog10,omitempty"`

	// OddsDetails descibes how Odds were calculated.
	OddsDetails token.OddsDetails `json:"oddsDetails,omitempty"`

	// OffsetStart is a start of a name on a page.
	OffsetStart int `json:"start"`

	// OffsetEnd is the end of the name on a page.
	OffsetEnd int `json:"end"`

	// AnnotNomen is a nomenclatural annotation for new species or combination.
	AnnotNomen string `json:"annotationNomen,omitempty"`

	// AnnotNomenType is normalized nomenclatural annotation.
	AnnotNomenType string `json:"annotationNomenType,omitempty"`

	// Annotation is a placeholder to add more information about name.
	Annotation string `json:"annotation,omitempty"`

	// WordsBefore are words that happened before the name.
	WordsBefore []string `json:"wordsBefore,omitempty"`

	// WordsAfter are words that happened right after the name.
	WordsAfter []string `json:"wordsAfter,omitempty"`

	// Verification gives results of verification process of the name.
	Verification *vlib.Name `json:"verification,omitempty"`

Name represents one found name.

type OddsDatum

type OddsDatum struct {
	Name bool
	Odds float64

OddsDatum is a simplified version of a name, that stores boolean decision (Name/NotName), and corresponding odds of the name.

type Output

type Output struct {
	Meta      `json:"metadata"`
	InputText string `json:"inputText,omitempty"`
	Names     []Name `json:"names"`

Output type is the result of name-finding.

func TokensToOutput

func TokensToOutput(
	ts []token.TokenSN,
	text []rune,
	version string,
	cfg config.Config) Output

TokensToOutput takes tagged tokens and assembles output out of them.

func (*Output) Format

func (o *Output) Format(f gnfmt.Format) string

func (*Output) MergeVerification

func (o *Output) MergeVerification(
	v map[string]vlib.Name,
	stats gncontext.Context,
	dur float32,

MergeVerification takes a map with verified names and incorporates into output.

func (*Output) UniqueNameStrings

func (o *Output) UniqueNameStrings() []string

UniqueNameStrings takes a list of names, and returns a list of unique name-strings

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL