Versions in this module Expand all Collapse all v0 v0.2.0 Mar 16, 2026 v0.1.0 Mar 16, 2026 Changes in this version + type BPETokenizer struct + func LoadFromJSON(path string) (*BPETokenizer, error) + func NewBPETokenizer(vocab map[string]int, merges []MergePair, special SpecialTokens, ...) *BPETokenizer + func (t *BPETokenizer) Decode(ids []int) (string, error) + func (t *BPETokenizer) Encode(text string) ([]int, error) + func (t *BPETokenizer) EncodeWithSpecialTokens(text string, addBOS bool, addEOS bool) ([]int, error) + func (t *BPETokenizer) GetID(token string) (int, bool) + func (t *BPETokenizer) GetToken(id int) (string, bool) + func (t *BPETokenizer) SetSentencePiece(enabled bool) + func (t *BPETokenizer) SetSpecialTokenStrings(tokens map[string]int) + func (t *BPETokenizer) SpecialTokens() SpecialTokens + func (t *BPETokenizer) VocabSize() int + type MergePair struct + Left string + Right string + type NormalizerFunc func(string) string + type SpecialTokens struct + BOS int + EOS int + PAD int + UNK int + type Tokenizer interface + Decode func(ids []int) (string, error) + Encode func(text string) ([]int, error) + GetID func(token string) (int, bool) + GetToken func(id int) (string, bool) + SpecialTokens func() SpecialTokens + VocabSize func() int + type WhitespaceTokenizer struct + func NewWhitespaceTokenizer() *WhitespaceTokenizer + func (t *WhitespaceTokenizer) AddToken(token string) int + func (t *WhitespaceTokenizer) Decode(ids []int) (string, error) + func (t *WhitespaceTokenizer) Encode(text string) ([]int, error) + func (t *WhitespaceTokenizer) GetID(token string) (int, bool) + func (t *WhitespaceTokenizer) GetToken(id int) (string, bool) + func (t *WhitespaceTokenizer) SpecialTokens() SpecialTokens + func (t *WhitespaceTokenizer) VocabSize() int