lsh

package module
v0.0.0-...-90dd7a0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 2, 2025 License: MIT Imports: 8 Imported by: 0

README

简介

该项目实现了一个局部敏感哈希(LSH)算法,用于高效地进行相似性搜索和向量查询。

功能

  • 添加向量
  • 查询相似向量
  • 批量查询相似向量
  • 迁移哈希表
  • 评估LSH算法的精度、召回率和F1值
  • 保存和加载哈希表
  • 设置不同精度处理器

安装

克隆

git clone https://github.com/lejianwen/lsh
cd lsh

使用

go get -u github.com/lejianwen/lsh

添加向量

var vectors = map[string][]float64{
"item1": {-0.010979787, 0.0208240951, -0.006406612, 0.0070472737, ...},
"item2": {-0.0402136976, 0.0058495451, -0.0306575614, ...},
"item3": {-0.0333253096, -0.0037028126, -0.0085615029, ...},
"item4": {-0.01552303, 0.0351111111, -0.0183980546, ...},
"item5": {0.0097557013, 0.0152862035, 0.0014691215,...},
"item6": {-0.0180807306, 0.0101798173, -0.0130499444,...},
...
}

l := NewLSH(10, 10, 2048)
l.AddVector("item1", vectors["item1"])

l.AddVectors(vectors)

vec := []float64{3,4,...} //非L2归一化的向量
//先归一化
lsh.L2Normalize(vec)
l.AddVector("item1", vec)

查询相似向量

neighbors := lsh.Query(vectors["item1"], 2)
fmt.Printf("Query result: %v\n", neighbors)

批量查询相似向量

neighbors := lsh.BatchQuery(vectors, 2)
fmt.Printf("Query result: %+v\n", neighbors)

迁移哈希表

err, _ := lsh.Migrate(12, 12)
if err != nil {
log.Fatalf("Migrate error: %v", err)
}

评估LSH算法

precision, recall, f1 := lsh.EvaluateLSH(qvectors, groundTruth, 2)
fmt.Printf("Evaluate result: precision=%v, recall=%v, f1=%v\n", precision, recall, f1)

保存和加载到文件

lsh.SetFilePath("./")
err := lsh.SaveToFile()
if err != nil {
log.Fatalf("SaveToFile error: %v", err)
}

err = lsh.LoadFromFile()
if err != nil {
log.Fatalf("LoadFromFile error: %v", err)
}

设置精度处理器

支持int8int16float32float64精度 精度越低内存占用越小,但是精度越低,查询结果的准确性也会降低

lsh.SetPrecisionHandler(NewPrecisionHandler(PrecisionInt16))

许可证

此项目使用MIT许可证。详见LICENSE文件。

Documentation

Index

Constants

View Source
const (
	PrecisionFloat32 string = "float32"
	PrecisionFloat64 string = "float64"
	PrecisionInt8    string = "int8"
	PrecisionInt16   string = "int16"
)
View Source
const (
	Int8Scale  float64 = 126
	Int16Scale float64 = 32766
)

Variables

This section is empty.

Functions

func DotProduct

func DotProduct[T PrecisionType](vecA, vecB []T) float64

计算点积

func L2Normalize

func L2Normalize(vec []float64) []float64

L2Normalize 归一化

Types

type Float32Handler

type Float32Handler struct{}

func (*Float32Handler) ConvertVector

func (h *Float32Handler) ConvertVector(vec []float64) interface{}

func (*Float32Handler) DotProduct

func (h *Float32Handler) DotProduct(a, b interface{}) float64

func (*Float32Handler) Type

func (h *Float32Handler) Type() string

type Float64Handler

type Float64Handler struct{}

高精度处理器(float64)

func (*Float64Handler) ConvertVector

func (h *Float64Handler) ConvertVector(vec []float64) interface{}

func (*Float64Handler) DotProduct

func (h *Float64Handler) DotProduct(a, b interface{}) float64

func (*Float64Handler) Type

func (h *Float64Handler) Type() string

type Int8Handler

type Int8Handler struct{}

func (*Int8Handler) ConvertVector

func (h *Int8Handler) ConvertVector(vec []float64) interface{}

func (*Int8Handler) DotProduct

func (h *Int8Handler) DotProduct(a, b interface{}) float64

func (*Int8Handler) Type

func (h *Int8Handler) Type() string

type Int16Handler

type Int16Handler struct{}

func (*Int16Handler) ConvertVector

func (h *Int16Handler) ConvertVector(vec []float64) interface{}

func (*Int16Handler) DotProduct

func (h *Int16Handler) DotProduct(a, b interface{}) float64

func (*Int16Handler) Type

func (h *Int16Handler) Type() string

type LSH

type LSH struct {
	// contains filtered or unexported fields
}

LSH 结构体

func NewLSH

func NewLSH(numTables, numHashes, vectorSize int) *LSH

初始化 LSH

func NewNoCacheLSH

func NewNoCacheLSH(numTables, numHashes, vectorSize int) *LSH

func (*LSH) AddVector

func (l *LSH) AddVector(id string, vector []float64)

添加向量

func (*LSH) AddVectors

func (l *LSH) AddVectors(vectors map[string][]float64)

批量添加向量

func (*LSH) BatchQuery

func (l *LSH) BatchQuery(vectors map[string][]float64, k int) map[string][]string

批量查询

func (*LSH) DisableCache

func (l *LSH) DisableCache()

func (*LSH) EnableCache

func (l *LSH) EnableCache()

func (*LSH) EvaluateLSH

func (l *LSH) EvaluateLSH(testQueries map[string][]float64, groundTruth map[string][]string, k int) (float64, float64, float64)

评估 LSH 的召回率和精度

func (*LSH) LoadFromFile

func (l *LSH) LoadFromFile() error

func (*LSH) LoadRandomVecs

func (l *LSH) LoadRandomVecs() error

func (*LSH) Migrate

func (l *LSH) Migrate(numTables, numHashes int) (error, *LSH)

func (*LSH) Query

func (l *LSH) Query(vector []float64, k int) []string

查询近似最近邻

func (*LSH) SaveRandomVecs

func (l *LSH) SaveRandomVecs() error

SaveRandomVecs 保存随机向量

func (*LSH) SaveToFile

func (l *LSH) SaveToFile() error

func (*LSH) SetFilePath

func (l *LSH) SetFilePath(filePath string)

func (*LSH) SetPrecisionHandler

func (l *LSH) SetPrecisionHandler(precisionHandler PrecisionHandler)

type PrecisionHandler

type PrecisionHandler interface {
	Type() string
	ConvertVector([]float64) interface{}
	DotProduct(interface{}, interface{}) float64
}

func NewPrecisionHandler

func NewPrecisionHandler(precision string) PrecisionHandler

type PrecisionType

type PrecisionType interface {
	~float32 | ~float64 | ~int8 | ~int16
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL