Documentation
¶
Overview ¶
Package vectorstores provides interfaces and implementations for vector databases. Vector stores are used to persist and search document embeddings for RAG applications.
Index ¶
- Variables
- func ToRetriever(vectorStore VectorStore, numDocs int, options ...Option) schema.Retriever
- type ContextNetwork
- type DefinitionRetriever
- type DependencyRetriever
- type DocumentWithScore
- type HyDEOption
- type HyDERetriever
- type MultiQueryRetriever
- type Option
- func WithCollectionName(name string) Option
- func WithEmbedder(embedder embeddings.Embedder) Option
- func WithFilter(key string, value any) Option
- func WithFilters(filters map[string]any) Option
- func WithNameSpace(namespace string) Option
- func WithScoreThreshold(threshold float32) Option
- func WithSparseQueries(sparse []*schema.SparseVector) Option
- func WithSparseQuery(sparse *schema.SparseVector) Option
- type Options
- type RerankingRetriever
- type Retriever
- type ScoredRetriever
- type VectorStore
Constants ¶
This section is empty.
Variables ¶
var ErrCollectionNotFound = errors.New("collection not found")
ErrCollectionNotFound is returned when a collection does not exist.
Functions ¶
func ToRetriever ¶
func ToRetriever(vectorStore VectorStore, numDocs int, options ...Option) schema.Retriever
ToRetriever creates a retriever from a vector store.
Types ¶
type ContextNetwork ¶ added in v0.18.0
type ContextNetwork struct {
// Dependencies are documents that the current code imports/depends on
Dependencies []schema.Document
// Dependents are documents that import/depend on the current code (impact analysis)
Dependents []schema.Document
}
ContextNetwork represents the graph neighborhood of a piece of code
type DefinitionRetriever ¶ added in v0.22.0
type DefinitionRetriever struct {
// contains filtered or unexported fields
}
func NewDefinitionRetriever ¶ added in v0.22.0
func NewDefinitionRetriever(store VectorStore) (*DefinitionRetriever, error)
func (*DefinitionRetriever) GetDefinition ¶ added in v0.22.0
func (r *DefinitionRetriever) GetDefinition(ctx context.Context, symbolName string) ([]schema.Document, error)
GetDefinition looks up a symbol definition using hybrid search (dense + sparse). Filters by identifier and is_definition metadata for precise results.
type DependencyRetriever ¶ added in v0.18.0
type DependencyRetriever struct {
// contains filtered or unexported fields
}
DependencyRetriever optimizes RAG by traversing the dependency graph
func NewDependencyRetriever ¶ added in v0.18.0
func NewDependencyRetriever(store VectorStore) (*DependencyRetriever, error)
NewDependencyRetriever creates a new graph-based retriever. It returns an error if store is nil.
func (*DependencyRetriever) GetContextNetwork ¶ added in v0.18.0
func (r *DependencyRetriever) GetContextNetwork(ctx context.Context, packageName string, imports []string) (*ContextNetwork, error)
GetContextNetwork retrieves both upstream dependencies and downstream impact
type DocumentWithScore ¶
type DocumentWithScore struct {
// Document is the retrieved document.
Document schema.Document
// Score is the similarity score (higher is more similar).
Score float32
}
DocumentWithScore represents a document with its similarity score.
type HyDEOption ¶ added in v0.23.0
type HyDEOption func(*HyDERetriever)
func WithNumGenerations ¶ added in v0.23.0
func WithNumGenerations(n int) HyDEOption
WithNumGenerations sets how many hypothetical documents to generate.
type HyDERetriever ¶ added in v0.23.0
type HyDERetriever struct {
// BaseRetriever performs the actual similarity search
BaseRetriever schema.Retriever
// Generator produces a hypothetical document from a query
Generator func(ctx context.Context, query string) (string, error)
// NumGenerations controls how many hypothetical docs to generate (default 1).
// When > 1, results from all generated docs are deduplicated.
NumGenerations int
}
HyDERetriever implements the Hypothetical Document Embedding pattern. It asks an LLM to generate a hypothetical answer to the query, then uses that hypothetical answer as the search query against the base retriever. This often finds better matches because the hypothetical answer is closer in embedding space to the actual stored documents.
func NewHyDERetriever ¶ added in v0.23.0
func NewHyDERetriever(baseRetriever schema.Retriever, generator func(ctx context.Context, query string) (string, error), opts ...HyDEOption) *HyDERetriever
func (*HyDERetriever) GetRelevantDocuments ¶ added in v0.23.0
type MultiQueryRetriever ¶ added in v0.21.0
type MultiQueryRetriever struct {
Store VectorStore
LLM llms.Model
NumDocuments int
Count int
// Max results to return after deduplication across all query variations.
// Defaults to NumDocuments when zero.
MaxResults int
// Hook to generate sparse vectors for the newly generated queries
SparseGenFunc func(ctx context.Context, queries []string) ([]*schema.SparseVector, error)
}
func (MultiQueryRetriever) GetRelevantDocuments ¶ added in v0.21.0
type Option ¶
type Option func(*Options)
Option configures vector store operations.
func WithCollectionName ¶ added in v0.7.1
WithCollectionName sets the collection name for the operation.
func WithEmbedder ¶
func WithEmbedder(embedder embeddings.Embedder) Option
WithEmbedder sets the embedder for the operation.
func WithFilter ¶
WithFilter adds a single metadata filter for the search.
func WithFilters ¶
WithFilters sets metadata filters for the search.
func WithNameSpace ¶
WithNameSpace sets the namespace for the operation.
func WithScoreThreshold ¶
WithScoreThreshold sets the minimum score threshold for results.
func WithSparseQueries ¶ added in v0.21.0
func WithSparseQueries(sparse []*schema.SparseVector) Option
WithSparseQueries sets sparse vectors for batch hybrid search.
func WithSparseQuery ¶ added in v0.21.0
func WithSparseQuery(sparse *schema.SparseVector) Option
WithSparseQuery sets the sparse vector for hybrid search.
type Options ¶
type Options struct {
// Embedder overrides the default embedder for this operation.
Embedder embeddings.Embedder
// NameSpace is an optional namespace for the operation.
NameSpace string
// CollectionName specifies the collection to use.
CollectionName string
// ScoreThreshold filters results below this score.
ScoreThreshold float32
// Filters contains metadata filters for the search.
Filters map[string]any
// SparseQuery is the sparse vector for hybrid search.
SparseQuery *schema.SparseVector
// SparseQueries are sparse vectors for batch hybrid search.
SparseQueries []*schema.SparseVector
}
Options contains configuration for vector store operations.
func ParseOptions ¶
ParseOptions creates Options from functional options.
type RerankingRetriever ¶ added in v0.19.0
type RerankingRetriever struct {
Retriever schema.Retriever
Reranker schema.Reranker
TopK int // Final number of documents to return after reranking
// Pre-filter candidates before sending to the reranker. Useful for cheap
// filtering (e.g. BM25 scoring) to reduce the number of documents the
// expensive LLM-based reranker has to process.
CandidateFilter func(query string, docs []schema.Document) []schema.Document
// MinScore filters out documents that have a reranked score below this threshold.
// If zero, no threshold is applied.
MinScore float32
}
RerankingRetriever wraps a standard retriever and uses a reranker to refine results.
func (RerankingRetriever) GetRelevantDocuments ¶ added in v0.19.0
func (RerankingRetriever) GetRelevantScoredDocuments ¶ added in v0.19.0
func (r RerankingRetriever) GetRelevantScoredDocuments(ctx context.Context, query string) ([]schema.ScoredDocument, error)
type Retriever ¶
type Retriever interface {
GetRelevantDocuments(ctx context.Context, query string) ([]schema.Document, error)
}
Retriever is the interface for fetching relevant documents for a query.
type ScoredRetriever ¶ added in v0.19.0
type ScoredRetriever interface {
GetRelevantScoredDocuments(ctx context.Context, query string) ([]schema.ScoredDocument, error)
}
ScoredRetriever is a retriever that returns documents with relevance scores.
type VectorStore ¶
type VectorStore interface {
// AddDocuments adds documents to the vector store and returns their IDs.
AddDocuments(ctx context.Context, docs []schema.Document, options ...Option) ([]string, error)
// SimilaritySearch returns documents similar to the query.
SimilaritySearch(ctx context.Context, query string, numDocuments int, options ...Option) ([]schema.Document, error)
// SimilaritySearchBatch returns documents similar to multiple queries.
SimilaritySearchBatch(ctx context.Context, queries []string, numDocuments int, options ...Option) ([][]schema.Document, error)
// SimilaritySearchWithScores returns documents with similarity scores.
SimilaritySearchWithScores(ctx context.Context, query string, numDocuments int, options ...Option) ([]DocumentWithScore, error)
// ListCollections returns all collection names.
ListCollections(ctx context.Context) ([]string, error)
// DeleteCollection deletes a collection by name.
DeleteCollection(ctx context.Context, collectionName string) error
// DeleteDocumentsByFilter deletes documents matching the filter.
DeleteDocumentsByFilter(ctx context.Context, filters map[string]any, options ...Option) error
}
VectorStore is the interface for vector database operations. Implementations support document storage, similarity search, and collection management.