Documentation ¶
Index ¶
- func NewCharacterTokenizerWithOptions(opts map[string]interface{}) (*tokenizer.CharacterTokenizer, error)
- func NewExceptionsTokenizerWithOptions(opts map[string]interface{}) (*tokenizer.ExceptionsTokenizer, error)
- func NewKagomeTokenizerWithOptions(opts map[string]interface{}) (analysis.Tokenizer, error)
- func NewRegexpTokenizerWithOptions(opts map[string]interface{}) (*tokenizer.RegexpTokenizer, error)
- type Tokenizer
- type TokenizerSetting
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func NewCharacterTokenizerWithOptions ¶
func NewCharacterTokenizerWithOptions(opts map[string]interface{}) (*tokenizer.CharacterTokenizer, error)
Create new CharacterTokenizer with given options. Options example:
{ "rune": "graphic" }
func NewExceptionsTokenizerWithOptions ¶
func NewExceptionsTokenizerWithOptions(opts map[string]interface{}) (*tokenizer.ExceptionsTokenizer, error)
Create new ExceptionsTokenizer with given options. Options example:
{ "patterns": [ "[hH][tT][tT][pP][sS]?://(\S)*", "[fF][iI][lL][eE]://(\S)*", "[fF][tT][pP]://(\S)*", "\S+@\S+" ] }
func NewKagomeTokenizerWithOptions ¶
Create new KagomeTokenizer with given options. Options example:
{ "dictionary": "IPADIC", "stop_tags": [ "接続詞", "助詞", "助詞-格助詞", "助詞-格助詞-一般", "助詞-格助詞-引用", "助詞-格助詞-連語", "助詞-接続助詞", "助詞-係助詞", "助詞-副助詞", "助詞-間投助詞", "助詞-並立助詞", "助詞-終助詞", "助詞-副助詞/並立助詞/終助詞", "助詞-連体化", "助詞-副詞化", "助詞-特殊", "助動詞", "記号", "記号-一般", "記号-読点", "記号-句点", "記号-空白", "記号-括弧開", "記号-括弧閉", "その他-間投", "フィラー", "非言語音" ], "base_forms": [ "動詞", "形容詞", "形容動詞" ] }
func NewRegexpTokenizerWithOptions ¶
func NewRegexpTokenizerWithOptions(opts map[string]interface{}) (*tokenizer.RegexpTokenizer, error)
Create new RegexTokenizer with given options. Options example:
{ "pattern": "[0-9a-zA-Z_]*" }
Types ¶
type Tokenizer ¶
type Tokenizer string
const ( CharacterTokenizer Tokenizer = "character" ExceptionTokenizer Tokenizer = "exception" KagomeTokenizer Tokenizer = "kagome" LetterTokenizer Tokenizer = "letter" RegexpTokenizer Tokenizer = "regexp" SingleTokenTokenizer Tokenizer = "single_token" UnicodeTokenizer Tokenizer = "unicode" WebTokenizer Tokenizer = "web" WhitespaceTokenizer Tokenizer = "whitespace" )
type TokenizerSetting ¶
Click to show internal directories.
Click to hide internal directories.