Documentation ¶
Index ¶
- Variables
- func CheckDictIsLoaded(dict *Dict) error
- func GetFrequency(result []string) map[string]int
- func Reverse(s []string) []string
- type BiDirectionalMaxMatch
- type BiDirectionalMinMatch
- type BigramDict
- type Dict
- type DictRecord
- type MaxMatch
- type MinMatch
- type NumAndLetterWordFilter
- type ReverseMaxMatch
- type ReverseMinMatch
- type StopTokens
- type Tokenizer
- type WordFilter
Constants ¶
This section is empty.
Variables ¶
var DefaultMinTokenLen = 2
DefaultMinTokenLen is default minimum tokenLen
Functions ¶
func CheckDictIsLoaded ¶
CheckDictIsLoaded that checks dict is Loaded
func GetFrequency ¶
GetFrequency returns frequency of tokens
Types ¶
type BiDirectionalMaxMatch ¶
type BiDirectionalMaxMatch struct { MMScore float64 RMMScore float64 MM *MaxMatch RMM *ReverseMaxMatch // contains filtered or unexported fields }
BiDirectionalMaxMatch records dict and bigramDic etc.
func NewBiDirectionalMaxMatch ¶
func NewBiDirectionalMaxMatch(dictPath, bigramDictPath string) *BiDirectionalMaxMatch
NewBiDirectionalMaxMatch returns a newly initialized BiDirectionalMaxMatch object
func (*BiDirectionalMaxMatch) Get ¶
func (bdmm *BiDirectionalMaxMatch) Get(text string) ([]string, error)
Get returns segmentation that implements the Tokenizer interface
func (*BiDirectionalMaxMatch) GetFrequency ¶
func (bdmm *BiDirectionalMaxMatch) GetFrequency(text string) (map[string]int, error)
GetFrequency returns token frequency that implements the Tokenizer interface
func (*BiDirectionalMaxMatch) LoadDict ¶
func (bdmm *BiDirectionalMaxMatch) LoadDict() error
LoadDict load dict and bigramDic that implements the Tokenizer interface
type BiDirectionalMinMatch ¶
type BiDirectionalMinMatch struct { MMScore float64 RMMScore float64 MM *MinMatch RMM *ReverseMinMatch // contains filtered or unexported fields }
BiDirectionalMinMatch records dict and bigramDic etc.
func NewBiDirectionalMinMatch ¶
func NewBiDirectionalMinMatch(dictPath, bigramDictPath string) *BiDirectionalMinMatch
NewBiDirectionalMinMatch returns a newly initialized BiDirectionalMinMatch object
func (*BiDirectionalMinMatch) Get ¶
func (bdmm *BiDirectionalMinMatch) Get(text string) ([]string, error)
Get returns segmentation that implements the Tokenizer interface
func (*BiDirectionalMinMatch) GetFrequency ¶
func (bdmm *BiDirectionalMinMatch) GetFrequency(text string) (map[string]int, error)
GetFrequency returns token frequency that implements the Tokenizer interface
func (*BiDirectionalMinMatch) LoadDict ¶
func (bdmm *BiDirectionalMinMatch) LoadDict() error
LoadDict load dict and bigramDic that implements the Tokenizer interface
type BigramDict ¶
type BigramDict struct {
// contains filtered or unexported fields
}
BigramDict records dictPath and bigram records
func NewBigramDict ¶
func NewBigramDict(dictPath string) *BigramDict
NewBigramDict returns a newly initialized BigramDict object
type Dict ¶
type Dict struct { Records map[string]DictRecord DictPath string // contains filtered or unexported fields }
Dict records Records and DictPath etc.
type DictRecord ¶
DictRecord records dict meta info
type MaxMatch ¶
type MaxMatch struct { WordFilter WordFilter EnabledFilterStopToken bool StopTokens *StopTokens // contains filtered or unexported fields }
MaxMatch records dict and dictPath
func NewMaxMatch ¶
NewMaxMatch returns a newly initialized MaxMatch object
func (*MaxMatch) GetFrequency ¶
GetFrequency returns token frequency that implements the Tokenizer interface
type MinMatch ¶
type MinMatch struct {
// contains filtered or unexported fields
}
MinMatch records dict and dictPath
func NewMinMatch ¶
NewMinMatch returns a newly initialized MinMatch object
func (*MinMatch) GetFrequency ¶
GetFrequency returns token frequency that implements the Tokenizer interface
type NumAndLetterWordFilter ¶ added in v1.1.0
type NumAndLetterWordFilter struct { }
NumAndLetterWordFilter that implements the WordFilter interface
func (*NumAndLetterWordFilter) Filter ¶ added in v1.1.0
func (nlFilter *NumAndLetterWordFilter) Filter(text string) bool
Filter that implements the WordFilter interface
type ReverseMaxMatch ¶
type ReverseMaxMatch struct { WordFilter WordFilter EnabledFilterStopToken bool StopTokens *StopTokens // contains filtered or unexported fields }
ReverseMaxMatch records dict and dictPath
func NewReverseMaxMatch ¶
func NewReverseMaxMatch(dictPath string) *ReverseMaxMatch
NewReverseMaxMatch returns a newly initialized ReverseMaxMatch object
func (*ReverseMaxMatch) Get ¶
func (rmm *ReverseMaxMatch) Get(text string) ([]string, error)
Get returns segmentation that implements the Tokenizer interface
func (*ReverseMaxMatch) GetFrequency ¶
func (rmm *ReverseMaxMatch) GetFrequency(text string) (map[string]int, error)
GetFrequency returns token frequency that implements the Tokenizer interface
func (*ReverseMaxMatch) LoadDict ¶
func (rmm *ReverseMaxMatch) LoadDict() error
LoadDict loads dict that implements the Tokenizer interface
type ReverseMinMatch ¶
type ReverseMinMatch struct {
// contains filtered or unexported fields
}
ReverseMinMatch records dict and dictPath
func NewReverseMinMatch ¶
func NewReverseMinMatch(dictPath string) *ReverseMinMatch
NewReverseMinMatch returns a newly initialized ReverseMinMatch object
func (*ReverseMinMatch) Get ¶
func (rmm *ReverseMinMatch) Get(text string) ([]string, error)
Get returns segmentation that implements the Tokenizer interface
func (*ReverseMinMatch) GetFrequency ¶
func (rmm *ReverseMinMatch) GetFrequency(text string) (map[string]int, error)
GetFrequency returns token frequency that implements the Tokenizer interface
func (*ReverseMinMatch) LoadDict ¶
func (rmm *ReverseMinMatch) LoadDict() error
LoadDict loads dict that implements the Tokenizer interface
type StopTokens ¶
type StopTokens struct { IsLoaded bool // contains filtered or unexported fields }
StopTokens records paths and records
func NewStopTokens ¶
func NewStopTokens() *StopTokens
NewStopTokens returns a newly initialized StopTokens object
func (*StopTokens) IsStopToken ¶
func (st *StopTokens) IsStopToken(token string) bool
IsStopToken returns if token is a token
func (*StopTokens) Load ¶
func (st *StopTokens) Load(path string) error
Load that loads StopToken dict
type Tokenizer ¶
type Tokenizer interface { GetFrequency(text string) (map[string]int, error) Get(text string) ([]string, error) LoadDict() error }
Tokenizer defines interface of Tokenizer