Versions in this module Expand all Collapse all v1 v1.2.2 May 26, 2022 v1.2.1 May 26, 2022 Changes in this version + const Default_ModelProto_SentencePiece_Type + const Default_NormalizerSpec_AddDummyPrefix + const Default_NormalizerSpec_EscapeWhitespaces + const Default_NormalizerSpec_RemoveExtraWhitespaces + const Default_TrainerSpec_AllowWhitespaceOnlyPieces + const Default_TrainerSpec_BosId + const Default_TrainerSpec_BosPiece + const Default_TrainerSpec_ByteFallback + const Default_TrainerSpec_CharacterCoverage + const Default_TrainerSpec_DifferentialPrivacyClippingThreshold + const Default_TrainerSpec_DifferentialPrivacyNoiseLevel + const Default_TrainerSpec_EnableDifferentialPrivacy + const Default_TrainerSpec_EosId + const Default_TrainerSpec_EosPiece + const Default_TrainerSpec_HardVocabLimit + const Default_TrainerSpec_InputSentenceSize + const Default_TrainerSpec_MaxSentenceLength + const Default_TrainerSpec_MaxSentencepieceLength + const Default_TrainerSpec_ModelType + const Default_TrainerSpec_NumSubIterations + const Default_TrainerSpec_NumThreads + const Default_TrainerSpec_PadId + const Default_TrainerSpec_PadPiece + const Default_TrainerSpec_SeedSentencepieceSize + const Default_TrainerSpec_SelfTestSampleSize + const Default_TrainerSpec_ShrinkingFactor + const Default_TrainerSpec_ShuffleInputSentence + const Default_TrainerSpec_SplitByNumber + const Default_TrainerSpec_SplitByUnicodeScript + const Default_TrainerSpec_SplitByWhitespace + const Default_TrainerSpec_SplitDigits + const Default_TrainerSpec_TrainExtremelyLargeCorpus + const Default_TrainerSpec_TreatWhitespaceAsSuffix + const Default_TrainerSpec_UnkId + const Default_TrainerSpec_UnkPiece + const Default_TrainerSpec_UnkSurface + const Default_TrainerSpec_UseAllVocab + const Default_TrainerSpec_VocabSize + const Default_TrainerSpec_VocabularyOutputPieceScore + var File_sentencepiece_model_proto protoreflect.FileDescriptor + var ModelProto_SentencePiece_Type_name = map[int32]string + var ModelProto_SentencePiece_Type_value = map[string]int32 + var TrainerSpec_ModelType_name = map[int32]string + var TrainerSpec_ModelType_value = map[string]int32 + type ModelProto struct + DenormalizerSpec *NormalizerSpec + NormalizerSpec *NormalizerSpec + Pieces []*ModelProto_SentencePiece + SelfTestData *SelfTestData + TrainerSpec *TrainerSpec + func (*ModelProto) Descriptor() ([]byte, []int) + func (*ModelProto) ProtoMessage() + func (x *ModelProto) GetDenormalizerSpec() *NormalizerSpec + func (x *ModelProto) GetNormalizerSpec() *NormalizerSpec + func (x *ModelProto) GetPieces() []*ModelProto_SentencePiece + func (x *ModelProto) GetSelfTestData() *SelfTestData + func (x *ModelProto) GetTrainerSpec() *TrainerSpec + func (x *ModelProto) ProtoReflect() protoreflect.Message + func (x *ModelProto) Reset() + func (x *ModelProto) String() string + type ModelProto_SentencePiece struct + Piece *string + Score *float32 + Type *ModelProto_SentencePiece_Type + func (*ModelProto_SentencePiece) Descriptor() ([]byte, []int) + func (*ModelProto_SentencePiece) ProtoMessage() + func (x *ModelProto_SentencePiece) GetPiece() string + func (x *ModelProto_SentencePiece) GetScore() float32 + func (x *ModelProto_SentencePiece) GetType() ModelProto_SentencePiece_Type + func (x *ModelProto_SentencePiece) ProtoReflect() protoreflect.Message + func (x *ModelProto_SentencePiece) Reset() + func (x *ModelProto_SentencePiece) String() string + type ModelProto_SentencePiece_Type int32 + const ModelProto_SentencePiece_BYTE + const ModelProto_SentencePiece_CONTROL + const ModelProto_SentencePiece_NORMAL + const ModelProto_SentencePiece_UNKNOWN + const ModelProto_SentencePiece_UNUSED + const ModelProto_SentencePiece_USER_DEFINED + func (ModelProto_SentencePiece_Type) Descriptor() protoreflect.EnumDescriptor + func (ModelProto_SentencePiece_Type) EnumDescriptor() ([]byte, []int) + func (ModelProto_SentencePiece_Type) Type() protoreflect.EnumType + func (x *ModelProto_SentencePiece_Type) UnmarshalJSON(b []byte) error + func (x ModelProto_SentencePiece_Type) Enum() *ModelProto_SentencePiece_Type + func (x ModelProto_SentencePiece_Type) Number() protoreflect.EnumNumber + func (x ModelProto_SentencePiece_Type) String() string + type NormalizerSpec struct + AddDummyPrefix *bool + EscapeWhitespaces *bool + Name *string + NormalizationRuleTsv *string + PrecompiledCharsmap []byte + RemoveExtraWhitespaces *bool + func (*NormalizerSpec) Descriptor() ([]byte, []int) + func (*NormalizerSpec) ProtoMessage() + func (x *NormalizerSpec) GetAddDummyPrefix() bool + func (x *NormalizerSpec) GetEscapeWhitespaces() bool + func (x *NormalizerSpec) GetName() string + func (x *NormalizerSpec) GetNormalizationRuleTsv() string + func (x *NormalizerSpec) GetPrecompiledCharsmap() []byte + func (x *NormalizerSpec) GetRemoveExtraWhitespaces() bool + func (x *NormalizerSpec) ProtoReflect() protoreflect.Message + func (x *NormalizerSpec) Reset() + func (x *NormalizerSpec) String() string + type SelfTestData struct + Samples []*SelfTestData_Sample + func (*SelfTestData) Descriptor() ([]byte, []int) + func (*SelfTestData) ProtoMessage() + func (x *SelfTestData) GetSamples() []*SelfTestData_Sample + func (x *SelfTestData) ProtoReflect() protoreflect.Message + func (x *SelfTestData) Reset() + func (x *SelfTestData) String() string + type SelfTestData_Sample struct + Expected *string + Input *string + func (*SelfTestData_Sample) Descriptor() ([]byte, []int) + func (*SelfTestData_Sample) ProtoMessage() + func (x *SelfTestData_Sample) GetExpected() string + func (x *SelfTestData_Sample) GetInput() string + func (x *SelfTestData_Sample) ProtoReflect() protoreflect.Message + func (x *SelfTestData_Sample) Reset() + func (x *SelfTestData_Sample) String() string + type Sentencepiece struct + func NewEmptySentencepiece(filename string, lowercase bool) (Sentencepiece, error) + func NewSentencepieceFromFile(filename string, lowercase bool) (Sentencepiece, error) + func (s *Sentencepiece) EncodeBySPM(text string) ([]int, error) + func (s *Sentencepiece) Free() + func (s *Sentencepiece) GetControlWord(word string) (int32, bool) + func (s *Sentencepiece) GetUnknownIndex() int32 + func (s *Sentencepiece) IdToToken(id int32) (string, bool) + func (s *Sentencepiece) IdsToTokens(ids []int32) []string + func (s *Sentencepiece) SetControlWord(word string, index int32) + func (s *Sentencepiece) SetUnknownIndex(index int32) + func (s *Sentencepiece) TokenToId(word string) (int32, bool) + func (s *Sentencepiece) Tokenize(text string) []Token + func (s *Sentencepiece) TokenizeToIDs(text string) []int32 + func (s *Sentencepiece) TokensToIds(tokens []string) []int32 + type Token struct + ID int32 + Text string + type TrainerSpec struct + AcceptLanguage []string + AllowWhitespaceOnlyPieces *bool + BosId *int32 + BosPiece *string + ByteFallback *bool + CharacterCoverage *float32 + ControlSymbols []string + DifferentialPrivacyClippingThreshold *uint64 + DifferentialPrivacyNoiseLevel *float32 + EnableDifferentialPrivacy *bool + EosId *int32 + EosPiece *string + HardVocabLimit *bool + Input []string + InputFormat *string + InputSentenceSize *uint64 + MaxSentenceLength *int32 + MaxSentencepieceLength *int32 + MiningSentenceSize *int32 + ModelPrefix *string + ModelType *TrainerSpec_ModelType + NumSubIterations *int32 + NumThreads *int32 + PadId *int32 + PadPiece *string + RequiredChars *string + SeedSentencepieceSize *int32 + SelfTestSampleSize *int32 + ShrinkingFactor *float32 + ShuffleInputSentence *bool + SplitByNumber *bool + SplitByUnicodeScript *bool + SplitByWhitespace *bool + SplitDigits *bool + TrainExtremelyLargeCorpus *bool + TrainingSentenceSize *int32 + TreatWhitespaceAsSuffix *bool + UnkId *int32 + UnkPiece *string + UnkSurface *string + UseAllVocab *bool + UserDefinedSymbols []string + VocabSize *int32 + VocabularyOutputPieceScore *bool + func (*TrainerSpec) Descriptor() ([]byte, []int) + func (*TrainerSpec) ProtoMessage() + func (x *TrainerSpec) GetAcceptLanguage() []string + func (x *TrainerSpec) GetAllowWhitespaceOnlyPieces() bool + func (x *TrainerSpec) GetBosId() int32 + func (x *TrainerSpec) GetBosPiece() string + func (x *TrainerSpec) GetByteFallback() bool + func (x *TrainerSpec) GetCharacterCoverage() float32 + func (x *TrainerSpec) GetControlSymbols() []string + func (x *TrainerSpec) GetDifferentialPrivacyClippingThreshold() uint64 + func (x *TrainerSpec) GetDifferentialPrivacyNoiseLevel() float32 + func (x *TrainerSpec) GetEnableDifferentialPrivacy() bool + func (x *TrainerSpec) GetEosId() int32 + func (x *TrainerSpec) GetEosPiece() string + func (x *TrainerSpec) GetHardVocabLimit() bool + func (x *TrainerSpec) GetInput() []string + func (x *TrainerSpec) GetInputFormat() string + func (x *TrainerSpec) GetInputSentenceSize() uint64 + func (x *TrainerSpec) GetMaxSentenceLength() int32 + func (x *TrainerSpec) GetMaxSentencepieceLength() int32 + func (x *TrainerSpec) GetMiningSentenceSize() int32 + func (x *TrainerSpec) GetModelPrefix() string + func (x *TrainerSpec) GetModelType() TrainerSpec_ModelType + func (x *TrainerSpec) GetNumSubIterations() int32 + func (x *TrainerSpec) GetNumThreads() int32 + func (x *TrainerSpec) GetPadId() int32 + func (x *TrainerSpec) GetPadPiece() string + func (x *TrainerSpec) GetRequiredChars() string + func (x *TrainerSpec) GetSeedSentencepieceSize() int32 + func (x *TrainerSpec) GetSelfTestSampleSize() int32 + func (x *TrainerSpec) GetShrinkingFactor() float32 + func (x *TrainerSpec) GetShuffleInputSentence() bool + func (x *TrainerSpec) GetSplitByNumber() bool + func (x *TrainerSpec) GetSplitByUnicodeScript() bool + func (x *TrainerSpec) GetSplitByWhitespace() bool + func (x *TrainerSpec) GetSplitDigits() bool + func (x *TrainerSpec) GetTrainExtremelyLargeCorpus() bool + func (x *TrainerSpec) GetTrainingSentenceSize() int32 + func (x *TrainerSpec) GetTreatWhitespaceAsSuffix() bool + func (x *TrainerSpec) GetUnkId() int32 + func (x *TrainerSpec) GetUnkPiece() string + func (x *TrainerSpec) GetUnkSurface() string + func (x *TrainerSpec) GetUseAllVocab() bool + func (x *TrainerSpec) GetUserDefinedSymbols() []string + func (x *TrainerSpec) GetVocabSize() int32 + func (x *TrainerSpec) GetVocabularyOutputPieceScore() bool + func (x *TrainerSpec) ProtoReflect() protoreflect.Message + func (x *TrainerSpec) Reset() + func (x *TrainerSpec) String() string + type TrainerSpec_ModelType int32 + const TrainerSpec_BPE + const TrainerSpec_CHAR + const TrainerSpec_UNIGRAM + const TrainerSpec_WORD + func (TrainerSpec_ModelType) Descriptor() protoreflect.EnumDescriptor + func (TrainerSpec_ModelType) EnumDescriptor() ([]byte, []int) + func (TrainerSpec_ModelType) Type() protoreflect.EnumType + func (x *TrainerSpec_ModelType) UnmarshalJSON(b []byte) error + func (x TrainerSpec_ModelType) Enum() *TrainerSpec_ModelType + func (x TrainerSpec_ModelType) Number() protoreflect.EnumNumber + func (x TrainerSpec_ModelType) String() string