vectorstores

package
v0.31.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 5, 2026 License: MIT Imports: 11 Imported by: 0

Documentation

Overview

Package vectorstores provides interfaces and implementations for vector databases. Vector stores are used to persist and search document embeddings for RAG applications.

Index

Constants

This section is empty.

Variables

View Source
var ErrCollectionNotFound = errors.New("collection not found")

ErrCollectionNotFound is returned when a collection does not exist.

Functions

func ToRetriever

func ToRetriever(vectorStore VectorStore, numDocs int, options ...Option) schema.Retriever

ToRetriever creates a retriever from a vector store.

Types

type ContextNetwork added in v0.18.0

type ContextNetwork struct {
	// Dependencies are documents that the current code imports/depends on
	Dependencies []schema.Document
	// Dependents are documents that import/depend on the current code (impact analysis)
	Dependents []schema.Document
}

ContextNetwork represents the graph neighborhood of a piece of code

type DefinitionRetriever added in v0.22.0

type DefinitionRetriever struct {
	// contains filtered or unexported fields
}

func NewDefinitionRetriever added in v0.22.0

func NewDefinitionRetriever(store VectorStore) (*DefinitionRetriever, error)

func (*DefinitionRetriever) GetDefinition added in v0.22.0

func (r *DefinitionRetriever) GetDefinition(ctx context.Context, symbolName string) ([]schema.Document, error)

GetDefinition looks up a symbol definition using hybrid search (dense + sparse). Filters by identifier and is_definition metadata for precise results.

type DependencyRetriever added in v0.18.0

type DependencyRetriever struct {
	// contains filtered or unexported fields
}

DependencyRetriever optimizes RAG by traversing the dependency graph

func NewDependencyRetriever added in v0.18.0

func NewDependencyRetriever(store VectorStore) (*DependencyRetriever, error)

NewDependencyRetriever creates a new graph-based retriever. It returns an error if store is nil.

func (*DependencyRetriever) GetContextNetwork added in v0.18.0

func (r *DependencyRetriever) GetContextNetwork(ctx context.Context, packageName string, imports []string) (*ContextNetwork, error)

GetContextNetwork retrieves both upstream dependencies and downstream impact

type DocumentWithScore

type DocumentWithScore struct {
	// Document is the retrieved document.
	Document schema.Document
	// Score is the similarity score (higher is more similar).
	Score float32
}

DocumentWithScore represents a document with its similarity score.

type HyDEOption added in v0.23.0

type HyDEOption func(*HyDERetriever)

func WithNumGenerations added in v0.23.0

func WithNumGenerations(n int) HyDEOption

WithNumGenerations sets how many hypothetical documents to generate.

type HyDERetriever added in v0.23.0

type HyDERetriever struct {
	// BaseRetriever performs the actual similarity search
	BaseRetriever schema.Retriever
	// Generator produces a hypothetical document from a query
	Generator func(ctx context.Context, query string) (string, error)
	// NumGenerations controls how many hypothetical docs to generate (default 1).
	// When > 1, results from all generated docs are deduplicated.
	NumGenerations int
}

HyDERetriever implements the Hypothetical Document Embedding pattern. It asks an LLM to generate a hypothetical answer to the query, then uses that hypothetical answer as the search query against the base retriever. This often finds better matches because the hypothetical answer is closer in embedding space to the actual stored documents.

func NewHyDERetriever added in v0.23.0

func NewHyDERetriever(baseRetriever schema.Retriever, generator func(ctx context.Context, query string) (string, error), opts ...HyDEOption) *HyDERetriever

func (*HyDERetriever) GetRelevantDocuments added in v0.23.0

func (r *HyDERetriever) GetRelevantDocuments(ctx context.Context, query string) ([]schema.Document, error)

type MultiQueryRetriever added in v0.21.0

type MultiQueryRetriever struct {
	Store        VectorStore
	LLM          llms.Model
	NumDocuments int
	Count        int
	// Max results to return after deduplication across all query variations.
	// Defaults to NumDocuments when zero.
	MaxResults int
	// Hook to generate sparse vectors for the newly generated queries
	SparseGenFunc func(ctx context.Context, queries []string) ([]*schema.SparseVector, error)
}

func (MultiQueryRetriever) GetRelevantDocuments added in v0.21.0

func (r MultiQueryRetriever) GetRelevantDocuments(ctx context.Context, query string) ([]schema.Document, error)

type Option

type Option func(*Options)

Option configures vector store operations.

func WithCollectionName added in v0.7.1

func WithCollectionName(name string) Option

WithCollectionName sets the collection name for the operation.

func WithEmbedder

func WithEmbedder(embedder embeddings.Embedder) Option

WithEmbedder sets the embedder for the operation.

func WithFilter

func WithFilter(key string, value any) Option

WithFilter adds a single metadata filter for the search.

func WithFilters

func WithFilters(filters map[string]any) Option

WithFilters sets metadata filters for the search.

func WithNameSpace

func WithNameSpace(namespace string) Option

WithNameSpace sets the namespace for the operation.

func WithScoreThreshold

func WithScoreThreshold(threshold float32) Option

WithScoreThreshold sets the minimum score threshold for results.

func WithSparseQueries added in v0.21.0

func WithSparseQueries(sparse []*schema.SparseVector) Option

WithSparseQueries sets sparse vectors for batch hybrid search.

func WithSparseQuery added in v0.21.0

func WithSparseQuery(sparse *schema.SparseVector) Option

WithSparseQuery sets the sparse vector for hybrid search.

type Options

type Options struct {
	// Embedder overrides the default embedder for this operation.
	Embedder embeddings.Embedder
	// NameSpace is an optional namespace for the operation.
	NameSpace string
	// CollectionName specifies the collection to use.
	CollectionName string
	// ScoreThreshold filters results below this score.
	ScoreThreshold float32
	// Filters contains metadata filters for the search.
	Filters map[string]any
	// SparseQuery is the sparse vector for hybrid search.
	SparseQuery *schema.SparseVector
	// SparseQueries are sparse vectors for batch hybrid search.
	SparseQueries []*schema.SparseVector
}

Options contains configuration for vector store operations.

func ParseOptions

func ParseOptions(options ...Option) Options

ParseOptions creates Options from functional options.

type RerankingRetriever added in v0.19.0

type RerankingRetriever struct {
	Retriever schema.Retriever
	Reranker  schema.Reranker
	TopK      int // Final number of documents to return after reranking

	// Pre-filter candidates before sending to the reranker. Useful for cheap
	// filtering (e.g. BM25 scoring) to reduce the number of documents the
	// expensive LLM-based reranker has to process.
	CandidateFilter func(query string, docs []schema.Document) []schema.Document

	// MinScore filters out documents that have a reranked score below this threshold.
	// If zero, no threshold is applied.
	MinScore float32
}

RerankingRetriever wraps a standard retriever and uses a reranker to refine results.

func (RerankingRetriever) GetRelevantDocuments added in v0.19.0

func (r RerankingRetriever) GetRelevantDocuments(ctx context.Context, query string) ([]schema.Document, error)

func (RerankingRetriever) GetRelevantScoredDocuments added in v0.19.0

func (r RerankingRetriever) GetRelevantScoredDocuments(ctx context.Context, query string) ([]schema.ScoredDocument, error)

type Retriever

type Retriever interface {
	GetRelevantDocuments(ctx context.Context, query string) ([]schema.Document, error)
}

Retriever is the interface for fetching relevant documents for a query.

type ScoredRetriever added in v0.19.0

type ScoredRetriever interface {
	GetRelevantScoredDocuments(ctx context.Context, query string) ([]schema.ScoredDocument, error)
}

ScoredRetriever is a retriever that returns documents with relevance scores.

type VectorStore

type VectorStore interface {
	// AddDocuments adds documents to the vector store and returns their IDs.
	AddDocuments(ctx context.Context, docs []schema.Document, options ...Option) ([]string, error)
	// SimilaritySearch returns documents similar to the query.
	SimilaritySearch(ctx context.Context, query string, numDocuments int, options ...Option) ([]schema.Document, error)
	// SimilaritySearchBatch returns documents similar to multiple queries.
	SimilaritySearchBatch(ctx context.Context, queries []string, numDocuments int, options ...Option) ([][]schema.Document, error)
	// SimilaritySearchWithScores returns documents with similarity scores.
	SimilaritySearchWithScores(ctx context.Context, query string, numDocuments int, options ...Option) ([]DocumentWithScore, error)
	// ListCollections returns all collection names.
	ListCollections(ctx context.Context) ([]string, error)
	// DeleteCollection deletes a collection by name.
	DeleteCollection(ctx context.Context, collectionName string) error
	// DeleteDocumentsByFilter deletes documents matching the filter.
	DeleteDocumentsByFilter(ctx context.Context, filters map[string]any, options ...Option) error
}

VectorStore is the interface for vector database operations. Implementations support document storage, similarity search, and collection management.

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL