lsh

package module
v0.0.0-...-c24504c Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 8, 2023 License: MIT Imports: 12 Imported by: 0

README

Build Status codecov Go Report Card GoDoc License

go-lsh

Documentation

Index

Constants

View Source
const (
	SignFilter_POS = 1
	SignFilter_NEG = -1
	SignFilter_ANY = 0
)

Variables

View Source
var (
	ErrExceededMaxNumHyperplanes = fmt.Errorf("number of hyperplanes exceeded max of, %d", maxNumHyperplanes)
	ErrInvalidNumHyperplanes     = errors.New("invalid number of hyperplanes, must be at least 1")
	ErrInvalidNumTables          = errors.New("invalid number of tables, must be at least 1")
	ErrInvalidVectorLength       = errors.New("invalid vector length, must be at least 1")
	ErrInvalidDocument           = errors.New("vector length does not match with the configured options")
	ErrDuplicateDocument         = errors.New("document is already indexed")
	ErrNoOptions                 = errors.New("no options set for LSH")
	ErrNoVectorComplexity        = errors.New("vector does not have enough complexity with a standard deviation of 0")
	ErrVectorLengthMismatch      = errors.New("vector length mismatch")
	ErrNoVector                  = errors.New("no vector provided")
	ErrDocumentNotStored         = errors.New("document id is not stored")
	ErrHashNotFound              = errors.New("hash not found in table")
	ErrInvalidNumToReturn        = errors.New("invalid NumToReturn, must be at least 1")
	ErrInvalidThreshold          = errors.New("invalid threshold, must be between 0 and 1 inclusive")
	ErrInvalidSignFilter         = errors.New("invalid sign filter, must be any, neg, or pos")
)
View Source
var (
	ErrNoHyperplanes              = errors.New("no hyperplanes provided to creation of new tables")
	ErrTableToHyperplanesMismatch = errors.New("number of hyperplane tables does not match configured tables in options")
)
View Source
var (
	ErrNumHyperplanesExceedHashBits = errors.New("number of hyperplanes exceeds available bits to encode vector")
)

Functions

This section is empty.

Types

type Bitmap

type Bitmap struct {
	Rb *roaring64.Bitmap
	// contains filtered or unexported fields
}

Bitmap is a go-routine safe wrapping of the roarding bitmap

func (*Bitmap) CheckedAdd

func (b *Bitmap) CheckedAdd(uid uint64) bool

func (*Bitmap) CheckedRemove

func (b *Bitmap) CheckedRemove(uid uint64) bool

func (*Bitmap) IsEmpty

func (b *Bitmap) IsEmpty() bool

type Document

type Document interface {
	GetUID() uint64
	GetVector() []float64
	Register()
}

type FalseNegativeError

type FalseNegativeError struct {
	Threshold   float64 `json:"threshold"`
	Probability float64 `json:"probability"`
}

FalseNegativeError represents the probability that a document will be missed during a search when it should be found. This document should match with the query document, but due to the number of hyperplanes, number of tables and the desired threshold will not with this probability. Closer to zero means there's less chance for missing document results and closer to 1 means a higher likelihood of missing the documents in the search.

type Hyperplanes

type Hyperplanes struct {
	Planes [][]float64
}

Hyperplanes is composed of a number of randomly generated unit vectors where the vector length is based on the configured vector length it is to represent.

func NewHyperplanes

func NewHyperplanes(numHyperplanes, vecLen int) (*Hyperplanes, error)

func (*Hyperplanes) Hash16

func (h *Hyperplanes) Hash16(f []float64) (uint16, error)

func (*Hyperplanes) Hash32

func (h *Hyperplanes) Hash32(f []float64) (uint32, error)

func (*Hyperplanes) Hash64

func (h *Hyperplanes) Hash64(f []float64) (uint64, error)

func (*Hyperplanes) Hash8

func (h *Hyperplanes) Hash8(f []float64) (uint8, error)

type LSH

type LSH struct {
	Opt              *Options
	HyperplaneTables []*Hyperplanes // N sets of randomly generated hyperplanes
	Tables           []*Table       // N tables each using a different randomly generated set of hyperplanes
	Docs             map[uint64]Document
}

LSH represents the locality sensitive hash struct that stores the multiple tables containing the configured number of hyperplanes along with the documents currently indexed.

func New

func New(opt *Options) (*LSH, error)

New returns a new Locality Sensitive Hash struct ready for indexing and searching

func (*LSH) Delete

func (l *LSH) Delete(uid uint64) error

Delete attempts to remove the uid from the tables and also the document map

func (*LSH) Filter

func (l *LSH) Filter(v []float64, s *SearchOptions) ([]uint64, []float64, error)

Filter returns a set of document ids that match the given vector and search options along with the input vector noramlized

func (*LSH) Index

func (l *LSH) Index(d Document) error

Index stores the document in the LSH data structure. Returns an error if the document is already present.

func (*LSH) Load

func (l *LSH) Load(filepath string) error

func (*LSH) Save

func (l *LSH) Save(filepath string, d Document) error

Save takes a filepath and a document interface representing the indexed documents and saves the lsh index to disk. Only one type of document is currently supported which will be registered with gob to encode and save to disk.

func (*LSH) Score

func (l *LSH) Score(v []float64, docIds []uint64, res *Results)

Score takes a set of document ids and scores them against a provided search query

func (*LSH) Search

func (l *LSH) Search(v []float64, s *SearchOptions) (Scores, int, error)

Search looks through and merges results from all tables to find the nearest neighbors to the provided vector

func (*LSH) Stats

func (l *LSH) Stats() *Statistics

Stats returns the current statistics about the configured LSH struct.

type Options

type Options struct {
	NumHyperplanes int
	NumTables      int
	VectorLength   int
}

Options represents a set of parameters that configure the LSH tables

func NewDefaultOptions

func NewDefaultOptions() *Options

NewDefaultOptions returns a set of default options to create the LSH tables

func (*Options) Validate

func (o *Options) Validate() error

Validate returns an error if any of the LSH options are invalid

type Results

type Results struct {
	TopN       int
	Threshold  float64
	SignFilter SignFilter

	NumScored int
	// contains filtered or unexported fields
}

func NewResults

func NewResults(topN int, threshold float64, signFilter SignFilter) *Results

NewResults creates a new instance of results to track similar vectors

func (*Results) Fetch

func (r *Results) Fetch() Scores

Fetch returns the sorted scores in ascending order

func (*Results) Update

func (r *Results) Update(s Score)

Update records the input score

type Score

type Score struct {
	UID   uint64  `json:"uid"`
	Score float64 `json:"score"`
}

type Scores

type Scores []Score

Scores is a slice of individual Score's

func (Scores) Len

func (s Scores) Len() int

func (Scores) Less

func (s Scores) Less(i, j int) bool

func (*Scores) Pop

func (s *Scores) Pop() interface{}

Pop implements the function in the heap interface

func (*Scores) Push

func (s *Scores) Push(x interface{})

Push implements the function in the heap interface

func (Scores) Scores

func (s Scores) Scores() []float64

func (Scores) Swap

func (s Scores) Swap(i, j int)

func (Scores) UIDs

func (s Scores) UIDs() []uint64

type SearchOptions

type SearchOptions struct {
	NumToReturn int        `json:"num_to_return"`
	Threshold   float64    `json:"threshold"`
	SignFilter  SignFilter `json:"sign_filter"`
}

SearchOptions represent a set of parameters to be used to customize search results

func NewDefaultSearchOptions

func NewDefaultSearchOptions() *SearchOptions

NewDefaultSearchOptions returns a default set of parameters to be used for search.

func (*SearchOptions) Validate

func (s *SearchOptions) Validate() error

Validate returns an error if any of the input options are invalid

type SignFilter

type SignFilter int

type SimpleDocument

type SimpleDocument struct {
	UID    uint64    `json:"uid"`
	Vector []float64 `json:"vector"`
}

func NewSimpleDocument

func NewSimpleDocument(uid uint64, v []float64) *SimpleDocument

func (SimpleDocument) GetUID

func (d SimpleDocument) GetUID() uint64

func (SimpleDocument) GetVector

func (d SimpleDocument) GetVector() []float64

func (SimpleDocument) Register

func (d SimpleDocument) Register()

type Statistics

type Statistics struct {
	NumDocs             int                  `json:"num_docs"`
	FalseNegativeErrors []FalseNegativeError `json:"false_negative_errors"`
}

Statistics returns the total number of indexed documents along with a slice of the false negative errors for a variety of query thresholds. This can help determine if the configured number of hyperplanes and tables can give the desired results for a given threshold.

type Table

type Table struct {
	Hyperplanes *Hyperplanes
	Table       map[uint16]*Bitmap
	Doc2Hash    map[uint64]uint16
}

Table maps buckets to a bitmap of document ids. Where documents are stored in the table is determined by finding the bucket a document is mapped to.

func NewTable

func NewTable(h *Hyperplanes) (*Table, error)

func NewTables

func NewTables(opt *Options, ht []*Hyperplanes) ([]*Table, error)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL