Documentation ¶
Overview ¶
Package suggest provides fuzzy search and autocomplete functionality
Example ¶
This example demonstrates how to use this package.
package main import ( "fmt" "log" "github.com/teng231/suggest/pkg/dictionary" "github.com/teng231/suggest/pkg/metric" "github.com/teng231/suggest/pkg/suggest" ) func main() { // we create InMemoryDictionary. Here we can use anything we want, // for example SqlDictionary, CDBDictionary and so on dict := dictionary.NewInMemoryDictionary([]string{ "Nissan March", "Nissan Juke", "Nissan Maxima", "Nissan Murano", "Nissan Note", "Toyota Mark II", "Toyota Corolla", "Toyota Corona", }) // describe index configuration indexDescription := suggest.IndexDescription{ Name: "cars", // name of the dictionary NGramSize: 3, // size of the nGram Wrap: [2]string{"$", "$"}, // wrap symbols (front and rear) Pad: "$", // pad to replace with forbidden chars Alphabet: []string{"english", "$"}, // alphabet of allowed chars (other chars will be replaced with pad symbol) } // create runtime search index builder builder, err := suggest.NewRAMBuilder(dict, indexDescription) if err != nil { log.Fatalf("Unexpected error: %v", err) } service := suggest.NewService() // asking our service for adding a new search index with given configuration if err := service.AddIndex(indexDescription.Name, dict, builder); err != nil { log.Fatalf("Unexpected error: %v", err) } // declare a search configuration (query, topK elements, type of metric, min similarity) searchConf, err := suggest.NewSearchConfig("niss ma", 5, metric.CosineMetric(), 0.4) if err != nil { log.Fatalf("Unexpected error: %v", err) } result, err := service.Suggest("cars", searchConf) if err != nil { log.Fatalf("Unexpected error: %v", err) } values := make([]string, 0, len(result)) for _, item := range result { values = append(values, item.Value) } fmt.Println(values) }
Output: [Nissan Maxima Nissan March]
Index ¶
- func Index(directory store.Directory, dict dictionary.Dictionary, ...) error
- func NewAutocompleteTokenizer(d IndexDescription) analysis.Tokenizer
- func NewSuggestTokenizer(d IndexDescription) analysis.Tokenizer
- type Autocomplete
- type Builder
- type Candidate
- type Collector
- type CollectorManager
- type CollectorManagerFactory
- type Driver
- type FirstKCollectorManager
- type FuzzyCollectorManager
- type IndexDescription
- type NGramIndex
- type ResultItem
- type Scorer
- type SearchConfig
- type Service
- func (s *Service) AddIndex(name string, dict dictionary.Dictionary, builder Builder) error
- func (s *Service) AddIndexByDescription(description IndexDescription) error
- func (s *Service) AddOnDiscIndex(description IndexDescription) error
- func (s *Service) AddRunTimeIndex(description IndexDescription) error
- func (s *Service) Autocomplete(dictName string, query string, limit int) ([]ResultItem, error)
- func (s *Service) GetDictionaries() []string
- func (s *Service) Suggest(dictName string, config SearchConfig) ([]ResultItem, error)
- type Suggester
- type TopKQueue
Examples ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func Index ¶
func Index( directory store.Directory, dict dictionary.Dictionary, config index.WriterConfig, tokenizer analysis.Tokenizer, ) error
Index builds a search index by using the given config and the dictionary and persists it the directory
func NewAutocompleteTokenizer ¶
func NewAutocompleteTokenizer(d IndexDescription) analysis.Tokenizer
NewAutocompleteTokenizer creates a tokenizer for autocomplete service
func NewSuggestTokenizer ¶
func NewSuggestTokenizer(d IndexDescription) analysis.Tokenizer
NewSuggestTokenizer creates a tokenizer for suggester service
Types ¶
type Autocomplete ¶
type Autocomplete interface { // Autocomplete returns candidates where the query string is a substring of each candidate Autocomplete(query string, factory CollectorManagerFactory) ([]Candidate, error) }
Autocomplete provides autocomplete functionality for candidates search
func NewAutocomplete ¶
func NewAutocomplete( indices index.InvertedIndexIndices, searcher index.Searcher, tokenizer analysis.Tokenizer, ) Autocomplete
NewAutocomplete creates a new instance of Autocomplete
type Builder ¶
type Builder interface { // Build configures and returns a new instance of NGramIndex Build() (NGramIndex, error) }
Builder is the entity that is responsible for tuning and creating a NGramIndex
func NewBuilder ¶
func NewBuilder(directory store.Directory, description IndexDescription) (Builder, error)
NewBuilder works with already indexed data
func NewFSBuilder ¶
func NewFSBuilder(description IndexDescription) (Builder, error)
NewFSBuilder works with already indexed data
func NewRAMBuilder ¶
func NewRAMBuilder(dict dictionary.Dictionary, description IndexDescription) (Builder, error)
NewRAMBuilder creates a search index by using the given dictionary and the index description in a RAMDriver directory
type Candidate ¶
type Candidate struct { // Key is a position (docId) in posting list Key index.Position // Score is a float64 number that represents a score of a document Score float64 }
Candidate is an item of Collector
type Collector ¶
type Collector interface { merger.Collector // SetScorer sets a scorer before collection starts SetScorer(scorer Scorer) }
Collector collects the doc stream satisfied to a search criteria
type CollectorManager ¶
type CollectorManager interface { // Create creates a new collector that will be used for a search segment Create() Collector // Collect returns back the given collectors. Collect(collectors ...Collector) error // GetCandidates returns currently collected candidates. GetCandidates() []Candidate }
CollectorManager is responsible for creating collectors and reducing them into the result set
type CollectorManagerFactory ¶
type CollectorManagerFactory func() CollectorManager
CollectorManagerFactory is a factory method for creating a new instance of CollectorManager.
type FirstKCollectorManager ¶
type FirstKCollectorManager struct {
// contains filtered or unexported fields
}
FirstKCollectorManager represents first k collector manager.
func NewFirstKCollectorManager ¶
func NewFirstKCollectorManager(limit int, queue TopKQueue) *FirstKCollectorManager
NewFirstKCollectorManager creates a new instance of CollectorManager with firstK collectors
func (*FirstKCollectorManager) Collect ¶
func (m *FirstKCollectorManager) Collect(collectors ...Collector) error
Collect returns back the given collectors.
func (*FirstKCollectorManager) Create ¶
func (m *FirstKCollectorManager) Create() Collector
Create creates a new collector that will be used for a search segment
func (*FirstKCollectorManager) GetCandidates ¶
func (m *FirstKCollectorManager) GetCandidates() []Candidate
GetCandidates returns currently collected candidates.
type FuzzyCollectorManager ¶
type FuzzyCollectorManager struct {
// contains filtered or unexported fields
}
FuzzyCollectorManager represents fuzzy collector manager.
func NewFuzzyCollectorManager ¶
func NewFuzzyCollectorManager(queueFactory func() TopKQueue) *FuzzyCollectorManager
NewFuzzyCollectorManager creates a new instance of FuzzyCollectorManager.
func (*FuzzyCollectorManager) Collect ¶
func (m *FuzzyCollectorManager) Collect(collectors ...Collector) error
Collect returns back the given collectors.
func (*FuzzyCollectorManager) Create ¶
func (m *FuzzyCollectorManager) Create() Collector
Create creates a new collector that will be used for a search segment
func (*FuzzyCollectorManager) GetCandidates ¶
func (m *FuzzyCollectorManager) GetCandidates() []Candidate
GetCandidates returns currently collected candidates.
func (*FuzzyCollectorManager) GetLowestScore ¶
func (m *FuzzyCollectorManager) GetLowestScore() float64
GetLowestScore returns the lowest collected score.
type IndexDescription ¶
type IndexDescription struct { Driver Driver `json:"driver"` Name string `json:"name"` NGramSize int `json:"nGramSize"` SourcePath string `json:"source"` OutputPath string `json:"output"` Alphabet []string `json:"alphabet"` Pad string `json:"pad"` Wrap [2]string `json:"wrap"` // contains filtered or unexported fields }
IndexDescription is config for NgramIndex structure
func ReadConfigs ¶
func ReadConfigs(configPath string) ([]IndexDescription, error)
ReadConfigs reads and returns a list of IndexDescription from the given reader
func (*IndexDescription) GetDictionaryFile ¶
func (d *IndexDescription) GetDictionaryFile() string
GetDictionaryFile returns a path to a dictionary file from the configuration
func (*IndexDescription) GetIndexPath ¶
func (d *IndexDescription) GetIndexPath() string
GetIndexPath returns a output path of the built index
func (*IndexDescription) GetIndexTokenizer ¶
func (d *IndexDescription) GetIndexTokenizer() analysis.Tokenizer
GetIndexTokenizer returns a tokenizer for indexing
func (*IndexDescription) GetSourcePath ¶
func (d *IndexDescription) GetSourcePath() string
GetSourcePath returns a source path of the index description
func (*IndexDescription) GetWriterConfig ¶
func (d *IndexDescription) GetWriterConfig() index.WriterConfig
GetWriterConfig creates and returns IndexWriter config from the given index description
type NGramIndex ¶
type NGramIndex interface { Suggester Autocomplete }
NGramIndex is the interface that provides the access to approximate string search and autocomplete
func NewNGramIndex ¶
func NewNGramIndex(suggester Suggester, autocomplete Autocomplete) NGramIndex
NewNGramIndex creates a new instance of NGramIndex
type ResultItem ¶
type ResultItem struct { // Score is a float64 value of a candidate Score float64 // Value is a string value of candidate Value string }
ResultItem represents element of top-k similar strings in dictionary for given query
type Scorer ¶
type Scorer interface { // Score returns the score of the given candidate Score(position merger.MergeCandidate) float64 }
Scorer is responsible for scoring an index position
type SearchConfig ¶
type SearchConfig struct {
// contains filtered or unexported fields
}
SearchConfig is a config for NGramIndex Suggest method
func NewSearchConfig ¶
func NewSearchConfig(query string, topK int, metric metric.Metric, similarity float64) (SearchConfig, error)
NewSearchConfig returns new instance of SearchConfig
type Service ¶
Service provides methods for autocomplete and topK approximate string search
func (*Service) AddIndex ¶
func (s *Service) AddIndex(name string, dict dictionary.Dictionary, builder Builder) error
AddIndex adds an index with the given name, dictionary and builder
func (*Service) AddIndexByDescription ¶
func (s *Service) AddIndexByDescription(description IndexDescription) error
AddIndexByDescription adds a new search index with given description
func (*Service) AddOnDiscIndex ¶
func (s *Service) AddOnDiscIndex(description IndexDescription) error
AddOnDiscIndex adds a new DISC search index with the given description
func (*Service) AddRunTimeIndex ¶
func (s *Service) AddRunTimeIndex(description IndexDescription) error
AddRunTimeIndex adds a new RAM search index with the given description
func (*Service) Autocomplete ¶
Autocomplete returns limit candidates where the query string is a prefix of each candidate
func (*Service) GetDictionaries ¶
GetDictionaries returns the managed list of dictionaries
func (*Service) Suggest ¶
func (s *Service) Suggest(dictName string, config SearchConfig) ([]ResultItem, error)
Suggest returns Top-k approximate strings for the given query in the dict
type Suggester ¶
type Suggester interface { // Suggest returns top-k similar candidates Suggest(query string, similarity float64, metric metric.Metric, factory CollectorManagerFactory) ([]Candidate, error) }
Suggester is the interface that provides the access to approximate string search
func NewSuggester ¶
func NewSuggester( indices index.InvertedIndexIndices, searcher index.Searcher, tokenizer analysis.Tokenizer, ) Suggester
NewSuggester returns a new Suggester instance
type TopKQueue ¶
type TopKQueue interface { // Add adds item with given position and distance to collection if item belongs to `top k items` Add(candidate index.Position, score float64) // GetLowestScore returns the lowest score of the collected candidates. If collection is empty, 0 will be returned GetLowestScore() float64 // CanTakeWithScore returns true if a candidate with the given score can be accepted CanTakeWithScore(score float64) bool // IsFull tells if selector has collected `top k elements` IsFull() bool // GetCandidates returns `top k items` GetCandidates() []Candidate // Merge merges the given queue with the current Merge(other TopKQueue) // Reset resets the given queue with the provided topK Reset(topK int) }
TopKQueue is an accumulator that selects the "top k" elements added to it
func NewTopKQueue ¶
NewTopKQueue returns instance of TopKQueue