View Source

    the maximum rank to consider for the base language

    View Source
    const DEFAULT_NDEPTH = 4

      the depth of n-gram tokens that are created. if DEFAULT_NDEPTH=1, only 1-letter tokens are created


      View Source
      var DefaultMinimumConfidence float32 = 0.7

        DefaultMinimumConfidence is the minimum confidence that a language-match must have to be returned as detected language


        func CreateOccurenceMap

        func CreateOccurenceMap(text string, gramDepth int) map[string]int

          CreateOccurenceMap creates a map[token]occurrence from a given text and up to a given gram depth gramDepth=1 means only 1-letter tokens are created, gramDepth=2 means 1- and 2-letters token are created, etc.

          func CreateRankLookupMap

          func CreateRankLookupMap(input map[string]int) map[string]int

            creates the map [token] rank from a map [token] occurrence

            func GetDistance

            func GetDistance(mapA, mapB map[string]int, maxDist int, maxRank int) int

              GetDistance calculates the out-of-place distance between two Profiles, taking into account only items of mapA, that have a rank 1 to maxRank


              type ByOccurrence

              type ByOccurrence []Token

                ByOccurrence represents an array of tokens which can be sorted by occurrences of the tokens.

                func (ByOccurrence) Len

                func (a ByOccurrence) Len() int

                func (ByOccurrence) Less

                func (a ByOccurrence) Less(i, j int) bool

                func (ByOccurrence) Swap

                func (a ByOccurrence) Swap(i, j int)

                type DetectionResult

                type DetectionResult struct {
                	Name       string
                	Confidence int

                  DetectionResult represents the result from comparing 2 Profiles. It includes the confidence which is basically the the relative distance between the two profiles.

                  type Detector

                  type Detector struct {
                  	Languages         []LanguageComparator
                  	MinimumConfidence float32
                  	NDepth            int
                  	MaxRank           int

                    Detector has an array of detectable Languages and methods to determine the closest Language to a text.

                    func NewDetector

                    func NewDetector() Detector

                      NewDetector returns a new Detector without any language. It can be used to add languages selectively.

                      func (*Detector) AddLanguage

                      func (d *Detector) AddLanguage(languages ...Language)

                        Add language adds a language to the list of detectable languages by this Detector instance.

                        func (*Detector) AddLanguageComparators

                        func (d *Detector) AddLanguageComparators(comparators ...LanguageComparator)

                          Add language adds a languageComparator to the list of detectable languages by this Detector instance.

                          func (*Detector) AddLanguageFromText

                          func (d *Detector) AddLanguageFromText(textToAnalyze, languageName string)

                            Add language analyzes a text and creates a new Language with given name. The new language will be detectable afterwards by this Detector instance.

                            func (*Detector) GetClosestLanguage

                            func (d *Detector) GetClosestLanguage(text string) string

                              GetClosestLanguage returns the name of the language which is closest to the given text if it is confident enough. It returns undefined otherwise. Set detector's MinimumConfidence for customization.

                              func (*Detector) GetLanguages

                              func (d *Detector) GetLanguages(text string) []DetectionResult

                                GetLanguages analyzes a text and returns the DetectionResult of all languages of this detector.

                                type Language

                                type Language struct {
                                	Profile map[string]int
                                	Name    string

                                func Analyze

                                func Analyze(text, name string) Language

                                func AnalyzeWithNDepth

                                func AnalyzeWithNDepth(text, name string, nDepth int) Language

                                  Analyze creates the language profile from a given Text and returns it in a Language struct.

                                  func (*Language) CompareTo

                                  func (l *Language) CompareTo(lazyLookupMap func() map[string]int, originaltext string, maxRank int) DetectionResult

                                  func (*Language) GetName

                                  func (l *Language) GetName() string

                                  type LanguageComparator

                                  type LanguageComparator interface {
                                  	CompareTo(lazyLookupMap func() map[string]int, originalText string, maxRank int) DetectionResult
                                  	GetName() string

                                    Language represents a language by its name and the profile ( map[token]OccurrenceRank )

                                    type ResByConf

                                    type ResByConf []DetectionResult

                                      ResByConf represents an array of DetectionResult and can be sorted by Confidence.

                                      func (ResByConf) Len

                                      func (a ResByConf) Len() int

                                      func (ResByConf) Less

                                      func (a ResByConf) Less(i, j int) bool

                                      func (ResByConf) Swap

                                      func (a ResByConf) Swap(i, j int)

                                      type Token

                                      type Token struct {
                                      	Occurrence int
                                      	Key        string

                                        Token represents a text token and its occurence in an analyzed text

                                        type UnicodeRangeLanguageComparator

                                        type UnicodeRangeLanguageComparator struct {
                                        	Name       string
                                        	RangeTable *unicode.RangeTable

                                          Chinese, Japanese, Korean Charset

                                          func (*UnicodeRangeLanguageComparator) CompareTo

                                          func (u *UnicodeRangeLanguageComparator) CompareTo(_ func() map[string]int, originalInput string, _ int) DetectionResult

                                          func (*UnicodeRangeLanguageComparator) GetName


                                          Path Synopsis