Documentation ¶
Index ¶
- Constants
- Variables
- type Bitmap
- type Document
- type FalseNegativeError
- type Hyperplanes
- type LSH
- func (l *LSH) Delete(uid uint64) error
- func (l *LSH) Filter(v []float64, s *SearchOptions) ([]uint64, []float64, error)
- func (l *LSH) Index(d Document) error
- func (l *LSH) Load(filepath string) error
- func (l *LSH) Save(filepath string, d Document) error
- func (l *LSH) Score(v []float64, docIds []uint64, res *Results)
- func (l *LSH) Search(v []float64, s *SearchOptions) (Scores, int, error)
- func (l *LSH) Stats() *Statistics
- type Options
- type Results
- type Score
- type Scores
- type SearchOptions
- type SignFilter
- type SimpleDocument
- type Statistics
- type Table
Constants ¶
const ( SignFilter_POS = 1 SignFilter_NEG = -1 SignFilter_ANY = 0 )
Variables ¶
var ( ErrExceededMaxNumHyperplanes = fmt.Errorf("number of hyperplanes exceeded max of, %d", maxNumHyperplanes) ErrInvalidNumHyperplanes = errors.New("invalid number of hyperplanes, must be at least 1") ErrInvalidNumTables = errors.New("invalid number of tables, must be at least 1") ErrInvalidVectorLength = errors.New("invalid vector length, must be at least 1") ErrInvalidDocument = errors.New("vector length does not match with the configured options") ErrDuplicateDocument = errors.New("document is already indexed") ErrNoOptions = errors.New("no options set for LSH") ErrNoVectorComplexity = errors.New("vector does not have enough complexity with a standard deviation of 0") ErrVectorLengthMismatch = errors.New("vector length mismatch") ErrNoVector = errors.New("no vector provided") ErrDocumentNotStored = errors.New("document id is not stored") ErrHashNotFound = errors.New("hash not found in table") ErrInvalidNumToReturn = errors.New("invalid NumToReturn, must be at least 1") ErrInvalidThreshold = errors.New("invalid threshold, must be between 0 and 1 inclusive") ErrInvalidSignFilter = errors.New("invalid sign filter, must be any, neg, or pos") )
var ( ErrNoHyperplanes = errors.New("no hyperplanes provided to creation of new tables") ErrTableToHyperplanesMismatch = errors.New("number of hyperplane tables does not match configured tables in options") )
var (
ErrNumHyperplanesExceedHashBits = errors.New("number of hyperplanes exceeds available bits to encode vector")
)
Functions ¶
This section is empty.
Types ¶
type Bitmap ¶
Bitmap is a go-routine safe wrapping of the roarding bitmap
func (*Bitmap) CheckedAdd ¶
func (*Bitmap) CheckedRemove ¶
type FalseNegativeError ¶
type FalseNegativeError struct { Threshold float64 `json:"threshold"` Probability float64 `json:"probability"` }
FalseNegativeError represents the probability that a document will be missed during a search when it should be found. This document should match with the query document, but due to the number of hyperplanes, number of tables and the desired threshold will not with this probability. Closer to zero means there's less chance for missing document results and closer to 1 means a higher likelihood of missing the documents in the search.
type Hyperplanes ¶
type Hyperplanes struct {
Planes [][]float64
}
Hyperplanes is composed of a number of randomly generated unit vectors where the vector length is based on the configured vector length it is to represent.
func NewHyperplanes ¶
func NewHyperplanes(numHyperplanes, vecLen int) (*Hyperplanes, error)
type LSH ¶
type LSH struct { Opt *Options HyperplaneTables []*Hyperplanes // N sets of randomly generated hyperplanes Tables []*Table // N tables each using a different randomly generated set of hyperplanes Docs map[uint64]Document }
LSH represents the locality sensitive hash struct that stores the multiple tables containing the configured number of hyperplanes along with the documents currently indexed.
func (*LSH) Filter ¶
Filter returns a set of document ids that match the given vector and search options along with the input vector noramlized
func (*LSH) Index ¶
Index stores the document in the LSH data structure. Returns an error if the document is already present.
func (*LSH) Save ¶
Save takes a filepath and a document interface representing the indexed documents and saves the lsh index to disk. Only one type of document is currently supported which will be registered with gob to encode and save to disk.
func (*LSH) Score ¶
Score takes a set of document ids and scores them against a provided search query
func (*LSH) Search ¶
Search looks through and merges results from all tables to find the nearest neighbors to the provided vector
func (*LSH) Stats ¶
func (l *LSH) Stats() *Statistics
Stats returns the current statistics about the configured LSH struct.
type Options ¶
Options represents a set of parameters that configure the LSH tables
func NewDefaultOptions ¶
func NewDefaultOptions() *Options
NewDefaultOptions returns a set of default options to create the LSH tables
type Results ¶
type Results struct { TopN int Threshold float64 SignFilter SignFilter NumScored int // contains filtered or unexported fields }
func NewResults ¶
func NewResults(topN int, threshold float64, signFilter SignFilter) *Results
NewResults creates a new instance of results to track similar vectors
type Scores ¶
type Scores []Score
Scores is a slice of individual Score's
func (*Scores) Pop ¶
func (s *Scores) Pop() interface{}
Pop implements the function in the heap interface
type SearchOptions ¶
type SearchOptions struct { NumToReturn int `json:"num_to_return"` Threshold float64 `json:"threshold"` SignFilter SignFilter `json:"sign_filter"` }
SearchOptions represent a set of parameters to be used to customize search results
func NewDefaultSearchOptions ¶
func NewDefaultSearchOptions() *SearchOptions
NewDefaultSearchOptions returns a default set of parameters to be used for search.
func (*SearchOptions) Validate ¶
func (s *SearchOptions) Validate() error
Validate returns an error if any of the input options are invalid
type SignFilter ¶
type SignFilter int
type SimpleDocument ¶
func NewSimpleDocument ¶
func NewSimpleDocument(uid uint64, v []float64) *SimpleDocument
func (SimpleDocument) GetUID ¶
func (d SimpleDocument) GetUID() uint64
func (SimpleDocument) GetVector ¶
func (d SimpleDocument) GetVector() []float64
func (SimpleDocument) Register ¶
func (d SimpleDocument) Register()
type Statistics ¶
type Statistics struct { NumDocs int `json:"num_docs"` FalseNegativeErrors []FalseNegativeError `json:"false_negative_errors"` }
Statistics returns the total number of indexed documents along with a slice of the false negative errors for a variety of query thresholds. This can help determine if the configured number of hyperplanes and tables can give the desired results for a given threshold.
type Table ¶
type Table struct { Hyperplanes *Hyperplanes Table map[uint16]*Bitmap Doc2Hash map[uint64]uint16 }
Table maps buckets to a bitmap of document ids. Where documents are stored in the table is determined by finding the bucket a document is mapped to.
func NewTable ¶
func NewTable(h *Hyperplanes) (*Table, error)