tokenize

package
v1.0.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 20, 2025 License: MIT Imports: 4 Imported by: 4

Documentation

Index

Constants

View Source
const (
	DependencyEdgeLabelUnknown      = DependencyEdgeLabel(v1beta2.DependencyEdge_UNKNOWN)
	DependencyEdgeLabelAbbrev       = DependencyEdgeLabel(v1beta2.DependencyEdge_ABBREV)
	DependencyEdgeLabelAComp        = DependencyEdgeLabel(v1beta2.DependencyEdge_ACOMP)
	DependencyEdgeLabelAdvCl        = DependencyEdgeLabel(v1beta2.DependencyEdge_ADVCL)
	DependencyEdgeLabelAdvMod       = DependencyEdgeLabel(v1beta2.DependencyEdge_ADVMOD)
	DependencyEdgeLabelAMod         = DependencyEdgeLabel(v1beta2.DependencyEdge_AMOD)
	DependencyEdgeLabelAppos        = DependencyEdgeLabel(v1beta2.DependencyEdge_APPOS)
	DependencyEdgeLabelAttr         = DependencyEdgeLabel(v1beta2.DependencyEdge_ATTR)
	DependencyEdgeLabelAux          = DependencyEdgeLabel(v1beta2.DependencyEdge_AUX)
	DependencyEdgeLabelAuxPass      = DependencyEdgeLabel(v1beta2.DependencyEdge_AUXPASS)
	DependencyEdgeLabelCC           = DependencyEdgeLabel(v1beta2.DependencyEdge_CC)
	DependencyEdgeLabelCComp        = DependencyEdgeLabel(v1beta2.DependencyEdge_CCOMP)
	DependencyEdgeLabelConj         = DependencyEdgeLabel(v1beta2.DependencyEdge_CONJ)
	DependencyEdgeLabelCSubj        = DependencyEdgeLabel(v1beta2.DependencyEdge_CSUBJ)
	DependencyEdgeLabelCSubjPass    = DependencyEdgeLabel(v1beta2.DependencyEdge_CSUBJPASS)
	DependencyEdgeLabelDep          = DependencyEdgeLabel(v1beta2.DependencyEdge_DEP)
	DependencyEdgeLabelDet          = DependencyEdgeLabel(v1beta2.DependencyEdge_DET)
	DependencyEdgeLabelDiscourse    = DependencyEdgeLabel(v1beta2.DependencyEdge_DISCOURSE)
	DependencyEdgeLabelDObj         = DependencyEdgeLabel(v1beta2.DependencyEdge_DOBJ)
	DependencyEdgeLabelExpl         = DependencyEdgeLabel(v1beta2.DependencyEdge_EXPL)
	DependencyEdgeLabelGoesWith     = DependencyEdgeLabel(v1beta2.DependencyEdge_GOESWITH)
	DependencyEdgeLabelIObj         = DependencyEdgeLabel(v1beta2.DependencyEdge_IOBJ)
	DependencyEdgeLabelMark         = DependencyEdgeLabel(v1beta2.DependencyEdge_MARK)
	DependencyEdgeLabelMwE          = DependencyEdgeLabel(v1beta2.DependencyEdge_MWE)
	DependencyEdgeLabelMwV          = DependencyEdgeLabel(v1beta2.DependencyEdge_MWV)
	DependencyEdgeLabelNeg          = DependencyEdgeLabel(v1beta2.DependencyEdge_NEG)
	DependencyEdgeLabelNN           = DependencyEdgeLabel(v1beta2.DependencyEdge_NN)
	DependencyEdgeLabelNPAdvMod     = DependencyEdgeLabel(v1beta2.DependencyEdge_NPADVMOD)
	DependencyEdgeLabelNSubj        = DependencyEdgeLabel(v1beta2.DependencyEdge_NSUBJ)
	DependencyEdgeLabelNSubjPass    = DependencyEdgeLabel(v1beta2.DependencyEdge_NSUBJPASS)
	DependencyEdgeLabelNum          = DependencyEdgeLabel(v1beta2.DependencyEdge_NUM)
	DependencyEdgeLabelNumber       = DependencyEdgeLabel(v1beta2.DependencyEdge_NUMBER)
	DependencyEdgeLabelP            = DependencyEdgeLabel(v1beta2.DependencyEdge_P)
	DependencyEdgeLabelParataxis    = DependencyEdgeLabel(v1beta2.DependencyEdge_PARATAXIS)
	DependencyEdgeLabelPartMod      = DependencyEdgeLabel(v1beta2.DependencyEdge_PARTMOD)
	DependencyEdgeLabelPComp        = DependencyEdgeLabel(v1beta2.DependencyEdge_PCOMP)
	DependencyEdgeLabelPObj         = DependencyEdgeLabel(v1beta2.DependencyEdge_POBJ)
	DependencyEdgeLabelPoss         = DependencyEdgeLabel(v1beta2.DependencyEdge_POSS)
	DependencyEdgeLabelPostNeg      = DependencyEdgeLabel(v1beta2.DependencyEdge_POSTNEG)
	DependencyEdgeLabelPreComp      = DependencyEdgeLabel(v1beta2.DependencyEdge_PRECOMP)
	DependencyEdgeLabelPreConj      = DependencyEdgeLabel(v1beta2.DependencyEdge_PRECONJ)
	DependencyEdgeLabelPreDet       = DependencyEdgeLabel(v1beta2.DependencyEdge_PREDET)
	DependencyEdgeLabelPref         = DependencyEdgeLabel(v1beta2.DependencyEdge_PREF)
	DependencyEdgeLabelPrep         = DependencyEdgeLabel(v1beta2.DependencyEdge_PREP)
	DependencyEdgeLabelPRonl        = DependencyEdgeLabel(v1beta2.DependencyEdge_PRONL)
	DependencyEdgeLabelPrt          = DependencyEdgeLabel(v1beta2.DependencyEdge_PRT)
	DependencyEdgeLabelPS           = DependencyEdgeLabel(v1beta2.DependencyEdge_PS)
	DependencyEdgeLabelQuantMod     = DependencyEdgeLabel(v1beta2.DependencyEdge_QUANTMOD)
	DependencyEdgeLabelRCMod        = DependencyEdgeLabel(v1beta2.DependencyEdge_RCMOD)
	DependencyEdgeLabelRCModRel     = DependencyEdgeLabel(v1beta2.DependencyEdge_RCMODREL)
	DependencyEdgeLabelRDropP       = DependencyEdgeLabel(v1beta2.DependencyEdge_RDROP)
	DependencyEdgeLabelRef          = DependencyEdgeLabel(v1beta2.DependencyEdge_REF)
	DependencyEdgeLabelRemnant      = DependencyEdgeLabel(v1beta2.DependencyEdge_REMNANT)
	DependencyEdgeLabelReparandum   = DependencyEdgeLabel(v1beta2.DependencyEdge_REPARANDUM)
	DependencyEdgeLabelRoot         = DependencyEdgeLabel(v1beta2.DependencyEdge_ROOT)
	DependencyEdgeLabelSNum         = DependencyEdgeLabel(v1beta2.DependencyEdge_SNUM)
	DependencyEdgeLabelSuff         = DependencyEdgeLabel(v1beta2.DependencyEdge_SUFF)
	DependencyEdgeLabelTMod         = DependencyEdgeLabel(v1beta2.DependencyEdge_TMOD)
	DependencyEdgeLabelTopic        = DependencyEdgeLabel(v1beta2.DependencyEdge_TOPIC)
	DependencyEdgeLabelVMod         = DependencyEdgeLabel(v1beta2.DependencyEdge_VMOD)
	DependencyEdgeLabelVocative     = DependencyEdgeLabel(v1beta2.DependencyEdge_VOCATIVE)
	DependencyEdgeLabelXComp        = DependencyEdgeLabel(v1beta2.DependencyEdge_XCOMP)
	DependencyEdgeLabelSuffix       = DependencyEdgeLabel(v1beta2.DependencyEdge_SUFFIX)
	DependencyEdgeLabelTitle        = DependencyEdgeLabel(v1beta2.DependencyEdge_TITLE)
	DependencyEdgeLabelAdvPhMod     = DependencyEdgeLabel(v1beta2.DependencyEdge_ADVPHMOD)
	DependencyEdgeLabelAuxCaus      = DependencyEdgeLabel(v1beta2.DependencyEdge_AUXCAUS)
	DependencyEdgeLabelAuxVV        = DependencyEdgeLabel(v1beta2.DependencyEdge_AUXVV)
	DependencyEdgeLabelDtMod        = DependencyEdgeLabel(v1beta2.DependencyEdge_DTMOD)
	DependencyEdgeLabelForeign      = DependencyEdgeLabel(v1beta2.DependencyEdge_FOREIGN)
	DependencyEdgeLabelKw           = DependencyEdgeLabel(v1beta2.DependencyEdge_KW)
	DependencyEdgeLabelList         = DependencyEdgeLabel(v1beta2.DependencyEdge_LIST)
	DependencyEdgeLabelNomC         = DependencyEdgeLabel(v1beta2.DependencyEdge_NOMC)
	DependencyEdgeLabelNomCSubj     = DependencyEdgeLabel(v1beta2.DependencyEdge_NOMCSUBJ)
	DependencyEdgeLabelNomCSubjPass = DependencyEdgeLabel(v1beta2.DependencyEdge_NOMCSUBJPASS)
	DependencyEdgeLabelNumC         = DependencyEdgeLabel(v1beta2.DependencyEdge_NUMC)
	DependencyEdgeLabelCop          = DependencyEdgeLabel(v1beta2.DependencyEdge_COP)
	DependencyEdgeLabelDislocated   = DependencyEdgeLabel(v1beta2.DependencyEdge_DISLOCATED)
	DependencyEdgeLabelAsp          = DependencyEdgeLabel(v1beta2.DependencyEdge_ASP)
	DependencyEdgeLabelGMod         = DependencyEdgeLabel(v1beta2.DependencyEdge_GMOD)
	DependencyEdgeLabelGObj         = DependencyEdgeLabel(v1beta2.DependencyEdge_GOBJ)
	DependencyEdgeLabelInfMod       = DependencyEdgeLabel(v1beta2.DependencyEdge_INFMOD)
	DependencyEdgeLabelMes          = DependencyEdgeLabel(v1beta2.DependencyEdge_MES)
	DependencyEdgeLabelNComp        = DependencyEdgeLabel(v1beta2.DependencyEdge_NCOMP)
)
View Source
const (
	// Tags
	PartOfSpeechTagUnknown = PartOfSpeechTag(v1beta2.PartOfSpeech_UNKNOWN)
	PartOfSpeechTagAdj     = PartOfSpeechTag(v1beta2.PartOfSpeech_ADJ)
	PartOfSpeechTagAdp     = PartOfSpeechTag(v1beta2.PartOfSpeech_ADP)
	PartOfSpeechTagAdv     = PartOfSpeechTag(v1beta2.PartOfSpeech_ADV)
	PartOfSpeechTagConj    = PartOfSpeechTag(v1beta2.PartOfSpeech_CONJ)
	PartOfSpeechTagDet     = PartOfSpeechTag(v1beta2.PartOfSpeech_DET)
	PartOfSpeechTagNoun    = PartOfSpeechTag(v1beta2.PartOfSpeech_NOUN)
	PartOfSpeechTagNum     = PartOfSpeechTag(v1beta2.PartOfSpeech_NUM)
	PartOfSpeechTagPron    = PartOfSpeechTag(v1beta2.PartOfSpeech_PRON)
	PartOfSpeechTagPrt     = PartOfSpeechTag(v1beta2.PartOfSpeech_PRT)
	PartOfSpeechTagPunct   = PartOfSpeechTag(v1beta2.PartOfSpeech_PUNCT)
	PartOfSpeechTagVerb    = PartOfSpeechTag(v1beta2.PartOfSpeech_VERB)
	PartOfSpeechTagX       = PartOfSpeechTag(v1beta2.PartOfSpeech_X)
	PartOfSpeechTagAffix   = PartOfSpeechTag(v1beta2.PartOfSpeech_AFFIX)
	// Aspect
	PartOfSpechAspectUnknown      = PartOfSpeechAspect(v1beta2.PartOfSpeech_ASPECT_UNKNOWN)
	PartOfSpechAspectPerfective   = PartOfSpeechAspect(v1beta2.PartOfSpeech_PERFECTIVE)
	PartOfSpechAspectImperfective = PartOfSpeechAspect(v1beta2.PartOfSpeech_IMPERFECTIVE)
	PartOfSpechAspectProgressive  = PartOfSpeechAspect(v1beta2.PartOfSpeech_PROGRESSIVE)
	// Case
	PartOfSpeechCaseUnknown       = PartOfSpeechCase(v1beta2.PartOfSpeech_CASE_UNKNOWN)
	PartOfSpeechCaseAccusative    = PartOfSpeechCase(v1beta2.PartOfSpeech_ACCUSATIVE)
	PartOfSpeechCaseAdverbial     = PartOfSpeechCase(v1beta2.PartOfSpeech_ADVERBIAL)
	PartOfSpeechCaseComplemantive = PartOfSpeechCase(v1beta2.PartOfSpeech_COMPLEMENTIVE)
	PartOfSpeechCaseDative        = PartOfSpeechCase(v1beta2.PartOfSpeech_DATIVE)
	PartOfSpeechCaseGenitive      = PartOfSpeechCase(v1beta2.PartOfSpeech_GENITIVE)
	PartOfSpeechCaseInstrumental  = PartOfSpeechCase(v1beta2.PartOfSpeech_INSTRUMENTAL)
	PartOfSpeechCaseLocative      = PartOfSpeechCase(v1beta2.PartOfSpeech_LOCATIVE)
	PartOfSpeechCaseNominative    = PartOfSpeechCase(v1beta2.PartOfSpeech_NOMINATIVE)
	PartOfSpeechCaseOblique       = PartOfSpeechCase(v1beta2.PartOfSpeech_OBLIQUE)
	PartOfSpeechCasePartitive     = PartOfSpeechCase(v1beta2.PartOfSpeech_PARTITIVE)
	PartOfSpeechCasePrepositional = PartOfSpeechCase(v1beta2.PartOfSpeech_PREPOSITIONAL)
	PartOfSpeechCaseReflexive     = PartOfSpeechCase(v1beta2.PartOfSpeech_REFLEXIVE_CASE)
	PartOfSpeechCaseRelative      = PartOfSpeechCase(v1beta2.PartOfSpeech_RELATIVE_CASE)
	PartOfSpeechCaseVocative      = PartOfSpeechCase(v1beta2.PartOfSpeech_VOCATIVE)
	// Form
	PartOfSpeechFormUnknown        = PartOfSpeechForm(v1beta2.PartOfSpeech_FORM_UNKNOWN)
	PartOfSpeechFormAdnomial       = PartOfSpeechForm(v1beta2.PartOfSpeech_ADNOMIAL)
	PartOfSpeechFormAuxiliary      = PartOfSpeechForm(v1beta2.PartOfSpeech_AUXILIARY)
	PartOfSpeechFormComplementizer = PartOfSpeechForm(v1beta2.PartOfSpeech_COMPLEMENTIZER)
	PartOfSpeechFormFinalEnding    = PartOfSpeechForm(v1beta2.PartOfSpeech_FINAL_ENDING)
	PartOfSpeechFormGerund         = PartOfSpeechForm(v1beta2.PartOfSpeech_GERUND)
	PartOfSpeechFormRealis         = PartOfSpeechForm(v1beta2.PartOfSpeech_REALIS)
	PartOfSpeechFormIrrealis       = PartOfSpeechForm(v1beta2.PartOfSpeech_IRREALIS)
	PartOfSpeechFormShort          = PartOfSpeechForm(v1beta2.PartOfSpeech_SHORT)
	PartOfSpeechFormLong           = PartOfSpeechForm(v1beta2.PartOfSpeech_LONG)
	PartOfSpeechFormOrder          = PartOfSpeechForm(v1beta2.PartOfSpeech_ORDER)
	PartOfSpeechFormSpecific       = PartOfSpeechForm(v1beta2.PartOfSpeech_SPECIFIC)
	// Gender
	PartOfSpeechGenderUnknown   = PartOfSpeechGender(v1beta2.PartOfSpeech_GENDER_UNKNOWN)
	PartOfSpeechGenderFeminine  = PartOfSpeechGender(v1beta2.PartOfSpeech_FEMININE)
	PartOfSpeechGenderMasculine = PartOfSpeechGender(v1beta2.PartOfSpeech_MASCULINE)
	PartOfSpeechGenderNeuter    = PartOfSpeechGender(v1beta2.PartOfSpeech_NEUTER)
	// Mood
	PartOfSpeechMoodUnknown       = PartOfSpeechMood(v1beta2.PartOfSpeech_MOOD_UNKNOWN)
	PartOfSpeechMoodConditional   = PartOfSpeechMood(v1beta2.PartOfSpeech_CONDITIONAL_MOOD)
	PartOfSpeechMoodImperative    = PartOfSpeechMood(v1beta2.PartOfSpeech_IMPERATIVE)
	PartOfSpeechMoodIndicative    = PartOfSpeechMood(v1beta2.PartOfSpeech_INDICATIVE)
	PartOfSpeechMoodInterrogative = PartOfSpeechMood(v1beta2.PartOfSpeech_INTERROGATIVE)
	PartOfSpeechMoodJussive       = PartOfSpeechMood(v1beta2.PartOfSpeech_JUSSIVE)
	PartOfSpeechMoodSubjunctive   = PartOfSpeechMood(v1beta2.PartOfSpeech_SUBJUNCTIVE)
	// Number
	PartOfSpeechNumberUnknown  = PartOfSpeechNumber(v1beta2.PartOfSpeech_NUMBER_UNKNOWN)
	PartOfSpeechNumberSingular = PartOfSpeechNumber(v1beta2.PartOfSpeech_SINGULAR)
	PartOfSpeechNumberPlural   = PartOfSpeechNumber(v1beta2.PartOfSpeech_PLURAL)
	PartOfSpeechNumberDual     = PartOfSpeechNumber(v1beta2.PartOfSpeech_DUAL)
	// Person
	PartOfSpeechPersonUnknown   = PartOfSpeechPerson(v1beta2.PartOfSpeech_PERSON_UNKNOWN)
	PartOfSpeechPersonFirst     = PartOfSpeechPerson(v1beta2.PartOfSpeech_FIRST)
	PartOfSpeechPersonSecond    = PartOfSpeechPerson(v1beta2.PartOfSpeech_SECOND)
	PartOfSpeechPersonThird     = PartOfSpeechPerson(v1beta2.PartOfSpeech_THIRD)
	PartOfSpeechPersonReflexive = PartOfSpeechPerson(v1beta2.PartOfSpeech_REFLEXIVE_PERSON)
	// Proper
	PartOfSpeechProperUnknown = PartOfSpeechProper(v1beta2.PartOfSpeech_PROPER_UNKNOWN)
	PartOfSpeechIsProper      = PartOfSpeechProper(v1beta2.PartOfSpeech_PROPER)
	PartOfSpeechIsNotProper   = PartOfSpeechProper(v1beta2.PartOfSpeech_NOT_PROPER)
	// Reciprocity
	PartOfSpeechReciprocityUnknown       = PartOfSpeechReciprocity(v1beta2.PartOfSpeech_RECIPROCITY_UNKNOWN)
	PartOfSpeechReciprocityReciprocal    = PartOfSpeechReciprocity(v1beta2.PartOfSpeech_RECIPROCAL)
	PartOfSpeechReciprocityNonReciprocal = PartOfSpeechReciprocity(v1beta2.PartOfSpeech_NON_RECIPROCAL)
	// Tense
	PartOfSpeechTenseUnknown     = PartOfSpeechTense(v1beta2.PartOfSpeech_TENSE_UNKNOWN)
	PartOfSpeechTenseConditional = PartOfSpeechTense(v1beta2.PartOfSpeech_CONDITIONAL_TENSE)
	PartOfSpeechTenseFuture      = PartOfSpeechTense(v1beta2.PartOfSpeech_FUTURE)
	PartOfSpeechTensePast        = PartOfSpeechTense(v1beta2.PartOfSpeech_PAST)
	PartOfSpeechTensePresent     = PartOfSpeechTense(v1beta2.PartOfSpeech_PRESENT)
	PartOfSpeechTenseImperfect   = PartOfSpeechTense(v1beta2.PartOfSpeech_IMPERFECT)
	PartOfSpeechTensePluperfect  = PartOfSpeechTense(v1beta2.PartOfSpeech_PLUPERFECT)
	// Voice
	PartOfSpeechVoiceUnknown   = PartOfSpeechVoice(v1beta2.PartOfSpeech_VOICE_UNKNOWN)
	PartOfSpeechVoiceActive    = PartOfSpeechVoice(v1beta2.PartOfSpeech_ACTIVE)
	PartOfSpeechVoiceCausative = PartOfSpeechVoice(v1beta2.PartOfSpeech_CAUSATIVE)
	PartOfSpeechVoicePassive   = PartOfSpeechVoice(v1beta2.PartOfSpeech_PASSIVE)
)

Variables

View Source
var NilToken = &Token{
	Text: &TextSpan{
		BeginOffset: -1,
	},
	PartOfSpeech: &PartOfSpeech{
		Tag:         -1,
		Aspect:      -1,
		Case:        -1,
		Form:        -1,
		Gender:      -1,
		Mood:        -1,
		Number:      -1,
		Person:      -1,
		Proper:      -1,
		Reciprocity: -1,
		Tense:       -1,
		Voice:       -1,
	},
	DependencyEdge: &DependencyEdge{
		HeadTokenIndex: -1,
		Label:          -1,
	},
}

NilToken can be used as a placeholder token to not lose positional properties. Note that the NilToken loses dependency relations.

Functions

This section is empty.

Types

type Analysis

type Analysis struct {
	// Sentences contains each sentence's text and sentiment.
	Sentences []*Sentence
	// Tokens contains all document tokens.
	Tokens []*Token
	// Sentiment is the documents Sentiment.
	Sentiment *Sentiment
}

Analysis contains the sentences, tokens and sentiment of a tokenized text.

func (Analysis) String

func (a Analysis) String() string

type DependencyEdge

type DependencyEdge struct {
	HeadTokenIndex int32
	Label          DependencyEdgeLabel
}

type DependencyEdgeLabel

type DependencyEdgeLabel int32

type Features

type Features int

Features represents the features during the tokenization process.

const (
	FeatureAll Features = FeatureSyntax | FeatureSentiment

	// FeatureSyntax enables syntax analysis.
	FeatureSyntax Features = 1 << iota
	// FeatureSentiment enables sentiment analysis.
	FeatureSentiment
)

type PartOfSpeechAspect

type PartOfSpeechAspect int32

type PartOfSpeechCase

type PartOfSpeechCase int32

type PartOfSpeechForm

type PartOfSpeechForm int32

type PartOfSpeechGender

type PartOfSpeechGender int32

type PartOfSpeechMood

type PartOfSpeechMood int32

type PartOfSpeechNumber

type PartOfSpeechNumber int32

type PartOfSpeechPerson

type PartOfSpeechPerson int32

type PartOfSpeechProper

type PartOfSpeechProper int32

type PartOfSpeechReciprocity

type PartOfSpeechReciprocity int32

type PartOfSpeechTag

type PartOfSpeechTag int32

type PartOfSpeechTense

type PartOfSpeechTense int32

type PartOfSpeechVoice

type PartOfSpeechVoice int32

type Sentence

type Sentence struct {
	Text      *TextSpan
	Sentiment *Sentiment
}

type Sentiment

type Sentiment struct {
	Magnitude float32
	Score     float32
}

type TextSpan

type TextSpan struct {
	Content     string
	BeginOffset int32
}

type Token

type Token struct {
	Text           *TextSpan
	PartOfSpeech   *PartOfSpeech
	DependencyEdge *DependencyEdge
	Lemma          string
}

func (Token) Clone

func (t Token) Clone() (token *Token)

func (Token) DOTID

func (t Token) DOTID() string

func (Token) ID

func (t Token) ID() int64

func (Token) String

func (t Token) String() string

type Tokenizer

type Tokenizer interface {
	Tokenize(ctx context.Context, text string, feats Features) (Analysis, error)
}

Tokenizer is an interface for tokenizing text with the specified features.

Directories

Path Synopsis
nlp
v2

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL