embeddings

package
v1.4.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 4, 2026 License: Apache-2.0 Imports: 10 Imported by: 0

Documentation

Overview

Package embeddings provides vector embedding generation with multiple provider support.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type AdaptiveCache added in v1.3.0

type AdaptiveCache struct {
	// contains filtered or unexported fields
}

AdaptiveCache implements cache with adaptive TTL based on access patterns.

func NewAdaptiveCache added in v1.3.0

func NewAdaptiveCache(maxSize int, minTTL, maxTTL, baseTTL time.Duration) *AdaptiveCache

NewAdaptiveCache creates a new adaptive cache.

func (*AdaptiveCache) Clear added in v1.3.0

func (c *AdaptiveCache) Clear()

Clear removes all cached entries.

func (*AdaptiveCache) Get added in v1.3.0

func (c *AdaptiveCache) Get(text string) ([]float32, bool)

Get retrieves an embedding and updates access tracking.

func (*AdaptiveCache) GetHitRate added in v1.3.0

func (c *AdaptiveCache) GetHitRate() float64

GetHitRate returns the cache hit rate (0.0-1.0).

func (*AdaptiveCache) GetStats added in v1.3.0

func (c *AdaptiveCache) GetStats() AdaptiveCacheStats

GetStats returns current cache statistics.

func (*AdaptiveCache) Put added in v1.3.0

func (c *AdaptiveCache) Put(text string, embedding []float32)

Put stores an embedding with initial TTL.

func (*AdaptiveCache) RemoveExpired added in v1.3.0

func (c *AdaptiveCache) RemoveExpired() int

RemoveExpired removes all expired entries.

func (*AdaptiveCache) Size added in v1.3.0

func (c *AdaptiveCache) Size() int

Size returns the current cache size.

type AdaptiveCacheEntry added in v1.3.0

type AdaptiveCacheEntry struct {
	// contains filtered or unexported fields
}

AdaptiveCacheEntry extends cache entry with access tracking.

type AdaptiveCacheStats added in v1.3.0

type AdaptiveCacheStats struct {
	Hits           int64         `json:"hits"`
	Misses         int64         `json:"misses"`
	Evictions      int64         `json:"evictions"`
	TotalCached    int           `json:"total_cached"`
	HotEntries     int64         `json:"hot_entries"`  // High frequency
	ColdEntries    int64         `json:"cold_entries"` // Low frequency
	AvgTTL         time.Duration `json:"avg_ttl"`
	TTLAdjustments int64         `json:"ttl_adjustments"`
	LastCleared    time.Time     `json:"last_cleared"`
}

AdaptiveCacheStats tracks cache behavior with adaptive metrics.

type AdaptiveCachedProvider added in v1.3.0

type AdaptiveCachedProvider struct {
	// contains filtered or unexported fields
}

AdaptiveCachedProvider wraps a provider with adaptive caching.

func NewAdaptiveCachedProvider added in v1.3.0

func NewAdaptiveCachedProvider(provider Provider, config Config, minTTL, maxTTL time.Duration) *AdaptiveCachedProvider

NewAdaptiveCachedProvider wraps a provider with adaptive caching.

func (*AdaptiveCachedProvider) ClearCache added in v1.3.0

func (c *AdaptiveCachedProvider) ClearCache()

func (*AdaptiveCachedProvider) Dimensions added in v1.3.0

func (c *AdaptiveCachedProvider) Dimensions() int

func (*AdaptiveCachedProvider) Embed added in v1.3.0

func (c *AdaptiveCachedProvider) Embed(ctx context.Context, text string) ([]float32, error)

func (*AdaptiveCachedProvider) EmbedBatch added in v1.3.0

func (c *AdaptiveCachedProvider) EmbedBatch(ctx context.Context, texts []string) ([][]float32, error)

func (*AdaptiveCachedProvider) GetAdaptiveStats added in v1.3.0

func (c *AdaptiveCachedProvider) GetAdaptiveStats() AdaptiveCacheStats

func (*AdaptiveCachedProvider) GetStats added in v1.3.0

func (c *AdaptiveCachedProvider) GetStats() CacheStats

func (*AdaptiveCachedProvider) Name added in v1.3.0

func (c *AdaptiveCachedProvider) Name() string

type CacheStats

type CacheStats struct {
	Hits        int64     `json:"hits"`
	Misses      int64     `json:"misses"`
	Evictions   int64     `json:"evictions"`
	TotalCached int       `json:"total_cached"`
	LastCleared time.Time `json:"last_cleared"`
}

CacheStats tracks cache performance metrics.

type CachedProvider

type CachedProvider struct {
	// contains filtered or unexported fields
}

CachedProvider wraps a provider with LRU caching.

func NewCachedProvider

func NewCachedProvider(provider Provider, config Config) *CachedProvider

NewCachedProvider wraps a provider with caching.

func (*CachedProvider) CleanExpired

func (c *CachedProvider) CleanExpired() int

CleanExpired removes expired cache entries.

func (*CachedProvider) ClearCache

func (c *CachedProvider) ClearCache()

ClearCache removes all cached embeddings.

func (*CachedProvider) Cost

func (c *CachedProvider) Cost() float64

func (*CachedProvider) Dimensions

func (c *CachedProvider) Dimensions() int

func (*CachedProvider) Embed

func (c *CachedProvider) Embed(ctx context.Context, text string) ([]float32, error)

func (*CachedProvider) EmbedBatch

func (c *CachedProvider) EmbedBatch(ctx context.Context, texts []string) ([][]float32, error)

func (*CachedProvider) GetStats

func (c *CachedProvider) GetStats() CacheStats

GetStats returns cache statistics.

func (*CachedProvider) HitRate

func (c *CachedProvider) HitRate() float64

HitRate returns the cache hit rate (0.0 - 1.0).

func (*CachedProvider) IsAvailable

func (c *CachedProvider) IsAvailable(ctx context.Context) bool

func (*CachedProvider) Name

func (c *CachedProvider) Name() string

type Config

type Config struct {
	// Provider selects which embedding provider to use
	// Options: "openai", "transformers", "sentence", "onnx", "auto"
	Provider string `json:"provider"`

	// OpenAI specific
	OpenAIKey     string `json:"openai_key,omitempty"`
	OpenAIModel   string `json:"openai_model,omitempty"` // default: text-embedding-3-small
	OpenAITimeout int    `json:"openai_timeout,omitempty"`

	// Local Transformers specific
	TransformersModel string `json:"transformers_model,omitempty"` // default: all-MiniLM-L6-v2
	TransformersCache string `json:"transformers_cache,omitempty"`

	// Sentence Transformers specific
	SentenceModel string `json:"sentence_model,omitempty"` // default: paraphrase-multilingual
	SentenceCache string `json:"sentence_cache,omitempty"`

	// ONNX specific
	ONNXModel string `json:"onnx_model,omitempty"` // default: ms-marco-MiniLM
	ONNXCache string `json:"onnx_cache,omitempty"`
	UseGPU    bool   `json:"use_gpu,omitempty"`

	// Cache configuration
	EnableCache  bool          `json:"enable_cache"`
	CacheTTL     time.Duration `json:"cache_ttl"`
	CacheMaxSize int           `json:"cache_max_size"` // Max cached embeddings

	// Fallback configuration
	EnableFallback   bool     `json:"enable_fallback"`
	FallbackPriority []string `json:"fallback_priority"` // e.g., ["openai", "transformers", "onnx"]
}

Config holds configuration for embedding providers.

func DefaultConfig

func DefaultConfig() Config

DefaultConfig returns sensible defaults for embedding configuration.

type Factory

type Factory struct {
	// contains filtered or unexported fields
}

Factory creates embedding providers based on configuration.

func NewFactory

func NewFactory(config Config) *Factory

NewFactory creates a new provider factory.

func NewFactoryFromEnv

func NewFactoryFromEnv() *Factory

NewFactoryFromEnv creates a factory from environment variables.

func (*Factory) Create

func (f *Factory) Create(ctx context.Context) (Provider, error)

Create creates a provider based on configuration.

func (*Factory) CreateWithFallback

func (f *Factory) CreateWithFallback(ctx context.Context) (Provider, error)

CreateWithFallback creates a provider with automatic fallback support.

type FallbackProvider

type FallbackProvider struct {
	// contains filtered or unexported fields
}

FallbackProvider wraps a provider with automatic fallback.

func (*FallbackProvider) Cost

func (f *FallbackProvider) Cost() float64

func (*FallbackProvider) Dimensions

func (f *FallbackProvider) Dimensions() int

func (*FallbackProvider) Embed

func (f *FallbackProvider) Embed(ctx context.Context, text string) ([]float32, error)

func (*FallbackProvider) EmbedBatch

func (f *FallbackProvider) EmbedBatch(ctx context.Context, texts []string) ([][]float32, error)

func (*FallbackProvider) IsAvailable

func (f *FallbackProvider) IsAvailable(ctx context.Context) bool

func (*FallbackProvider) Name

func (f *FallbackProvider) Name() string

type LRUCache

type LRUCache struct {
	// contains filtered or unexported fields
}

LRUCache implements an LRU cache for embeddings.

func NewLRUCache

func NewLRUCache(maxSize int, ttl time.Duration) *LRUCache

NewLRUCache creates a new LRU cache.

func (*LRUCache) Clear

func (c *LRUCache) Clear()

Clear removes all cached entries.

func (*LRUCache) Get

func (c *LRUCache) Get(text string) ([]float32, bool)

Get retrieves an embedding from cache.

func (*LRUCache) Put

func (c *LRUCache) Put(text string, embedding []float32)

Put stores an embedding in cache.

func (*LRUCache) RemoveExpired

func (c *LRUCache) RemoveExpired() int

RemoveExpired removes all expired entries.

func (*LRUCache) Size

func (c *LRUCache) Size() int

Size returns the current cache size.

type MockProvider

type MockProvider struct {
	// contains filtered or unexported fields
}

MockProvider is a mock embedding provider for testing.

func NewMockProvider

func NewMockProvider(name string, dims int) *MockProvider

NewMockProvider creates a new mock provider.

func (*MockProvider) Cost

func (m *MockProvider) Cost() float64

func (*MockProvider) Dimensions

func (m *MockProvider) Dimensions() int

func (*MockProvider) Embed

func (m *MockProvider) Embed(ctx context.Context, text string) ([]float32, error)

func (*MockProvider) EmbedBatch

func (m *MockProvider) EmbedBatch(ctx context.Context, texts []string) ([][]float32, error)

func (*MockProvider) IsAvailable

func (m *MockProvider) IsAvailable(ctx context.Context) bool

func (*MockProvider) Name

func (m *MockProvider) Name() string

type Provider

type Provider interface {
	// Name returns the provider identifier (e.g., "openai", "transformers")
	Name() string

	// Dimensions returns the vector dimensionality for this provider
	Dimensions() int

	// Embed generates embeddings for a single text
	Embed(ctx context.Context, text string) ([]float32, error)

	// EmbedBatch generates embeddings for multiple texts efficiently
	EmbedBatch(ctx context.Context, texts []string) ([][]float32, error)

	// IsAvailable checks if the provider is ready to use
	IsAvailable(ctx context.Context) bool

	// Cost returns the estimated cost per 1000 tokens (0.0 for local providers)
	Cost() float64
}

Provider defines the interface for embedding generation services. Implementations include OpenAI, Local Transformers, Sentence Transformers, and ONNX.

type Result

type Result struct {
	ID         string                 `json:"id"`
	Score      float64                `json:"score"`
	Text       string                 `json:"text"`
	Metadata   map[string]interface{} `json:"metadata,omitempty"`
	Embedding  []float32              `json:"-"` // Not serialized
	Distance   float64                `json:"distance,omitempty"`
	Similarity float64                `json:"similarity"`
}

Result represents a semantic search result.

type SimilarityMetric

type SimilarityMetric string

SimilarityMetric defines how to calculate vector similarity.

const (
	// CosineSimilarity measures cosine similarity (default, normalized).
	CosineSimilarity SimilarityMetric = "cosine"

	// EuclideanDistance measures L2 distance.
	EuclideanDistance SimilarityMetric = "euclidean"

	// DotProduct measures dot product similarity.
	DotProduct SimilarityMetric = "dotproduct"
)

type Stats

type Stats struct {
	Provider        string        `json:"provider"`
	TotalEmbeddings int64         `json:"total_embeddings"`
	TotalTokens     int64         `json:"total_tokens"`
	TotalCost       float64       `json:"total_cost"`
	AvgLatency      time.Duration `json:"avg_latency"`
	CacheHits       int64         `json:"cache_hits"`
	CacheMisses     int64         `json:"cache_misses"`
	Errors          int64         `json:"errors"`
	LastUsed        time.Time     `json:"last_used"`
}

Stats tracks provider usage statistics.

Directories

Path Synopsis
Package providers implements concrete embedding providers.
Package providers implements concrete embedding providers.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL