Versions in this module Expand all Collapse all v1 v1.1.0 Oct 3, 2017 v1.0.0 Sep 24, 2017 Changes in this version + func TextToWords(text string) []string + type PragmaticSegmenter struct + func NewPragmaticSegmenter(lang string) (*PragmaticSegmenter, error) + func (p *PragmaticSegmenter) Tokenize(text string) []string + type ProseTokenizer interface + Tokenize func(text string) []string + type PunktSentenceTokenizer struct + func NewPunktSentenceTokenizer() *PunktSentenceTokenizer + func (p PunktSentenceTokenizer) Tokenize(text string) []string + type RegexpTokenizer struct + func NewBlanklineTokenizer() *RegexpTokenizer + func NewRegexpTokenizer(pattern string, gaps, discard bool) *RegexpTokenizer + func NewWordBoundaryTokenizer() *RegexpTokenizer + func NewWordPunctTokenizer() *RegexpTokenizer + func (r RegexpTokenizer) Tokenize(text string) []string + type TreebankWordTokenizer struct + func NewTreebankWordTokenizer() *TreebankWordTokenizer + func (t TreebankWordTokenizer) Tokenize(text string) []string