rag

package

v0.2.12 Latest Latest Go to latest Published: Dec 31, 2025 License: MIT Imports: 28 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/takuphilchan/offgrid-llm

Links

Documentation ¶

Overview ¶

Package rag provides distributed RAG index functionality

Index ¶

func AutoTuneChunkingOptions(content string) (ChunkingOptions, DocumentAnalysis)
func GenerateChunkID(documentID string, index int) string
func GenerateDocumentID(content []byte) string
func IsSupportedExtension(ext string) bool
func SupportedExtensions() []string
type Chunk
type ChunkResult
type Chunker
- func NewChunker(opts ChunkingOptions) *Chunker
- func (c *Chunker) ChunkText(documentID, text string) []*Chunk
type ChunkingOptions
- func DefaultChunkingOptions() ChunkingOptions
- func LargeDocumentChunkingOptions() ChunkingOptions
type DistributedNode
type DistributedRAG
- func NewDistributedRAG(config DistributedRAGConfig) *DistributedRAG
- func (dr *DistributedRAG) AddNode(node DistributedNode)
- func (dr *DistributedRAG) GetStats() map[string]interface{}
- func (dr *DistributedRAG) ListNodes() []DistributedNode
- func (dr *DistributedRAG) RebalanceShards(ctx context.Context) error
- func (dr *DistributedRAG) RemoveNode(nodeID string) error
- func (dr *DistributedRAG) Search(ctx context.Context, query string, topK int, ...) (*DistributedSearchResult, error)
- func (dr *DistributedRAG) Stop()
- func (dr *DistributedRAG) SyncIndex(ctx context.Context) error
type DistributedRAGConfig
type DistributedSearchResult
type Document
type DocumentAnalysis
type DocumentParser
- func NewDocumentParser() *DocumentParser
- func (p *DocumentParser) Parse(content []byte, filename string, ext string) (*ParseResult, error)
- func (p *DocumentParser) ParseCSV(content []byte) (*ParseResult, error)
- func (p *DocumentParser) ParseCode(content []byte, ext string) (*ParseResult, error)
- func (p *DocumentParser) ParseDOCX(content []byte) (*ParseResult, error)
- func (p *DocumentParser) ParseHTML(content []byte) (*ParseResult, error)
- func (p *DocumentParser) ParseJSON(content []byte) (*ParseResult, error)
- func (p *DocumentParser) ParsePDF(content []byte) (*ParseResult, error)
- func (p *DocumentParser) ParsePPTX(content []byte) (*ParseResult, error)
- func (p *DocumentParser) ParsePlainText(content []byte) (*ParseResult, error)
- func (p *DocumentParser) ParseRTF(content []byte) (*ParseResult, error)
- func (p *DocumentParser) ParseXLSX(content []byte) (*ParseResult, error)
- func (p *DocumentParser) ParseXML(content []byte) (*ParseResult, error)
type Engine
- func NewEngine(embeddingEngine *inference.EmbeddingEngine, dataDir string) *Engine
- func (e *Engine) AnalyzeDocument(content string) DocumentAnalysis
- func (e *Engine) AutoEnableWithModel(ctx context.Context, availableModels []string) error
- func (e *Engine) AutoRestore(ctx context.Context) error
- func (e *Engine) DeleteDocument(id string) bool
- func (e *Engine) Disable()
- func (e *Engine) Enable(ctx context.Context, embeddingModel string) error
- func (e *Engine) EnhancePrompt(ctx context.Context, userMessage string) (string, *RAGContext, error)
- func (e *Engine) GetDocument(id string) *Document
- func (e *Engine) GetPersistedModel() string
- func (e *Engine) IngestFile(ctx context.Context, filePath string, metadata map[string]string) (*Document, error)
- func (e *Engine) IngestReader(ctx context.Context, name string, reader io.Reader, metadata map[string]string) (*Document, error)
- func (e *Engine) IngestText(ctx context.Context, name, content string, metadata map[string]string) (*Document, error)
- func (e *Engine) IsAutoTuningEnabled() bool
- func (e *Engine) IsEnabled() bool
- func (e *Engine) ListDocuments() []*Document
- func (e *Engine) Search(ctx context.Context, query string, opts SearchOptions) (*RAGContext, error)
- func (e *Engine) SetAutoTuning(enabled bool)
- func (e *Engine) Stats() map[string]interface{}
type NodeSearchResult
type ParseResult
type RAGContext
- func (rc *RAGContext) FormatContext() string
- func (rc *RAGContext) TruncateContext(maxLen int)
- func (rc *RAGContext) UniqueDocumentCount() int
type SQLiteStore
- func NewSQLiteStore(dataDir string) (*SQLiteStore, error)
- func (s *SQLiteStore) AddChunk(chunk *Chunk, embedding []float32) error
- func (s *SQLiteStore) AddDocument(doc *Document) error
- func (s *SQLiteStore) Close() error
- func (s *SQLiteStore) DeleteDocument(id string) error
- func (s *SQLiteStore) GetDocument(id string) (*Document, error)
- func (s *SQLiteStore) HybridSearch(queryEmbedding []float32, query string, limit int, minScore float32, ...) ([]SearchResult, error)
- func (s *SQLiteStore) ListDocuments() ([]*Document, error)
- func (s *SQLiteStore) Search(queryEmbedding []float32, limit int, minScore float32) ([]SearchResult, error)
- func (s *SQLiteStore) Stats() map[string]interface{}
type SearchOptions
- func DefaultSearchOptions() SearchOptions
type SearchResult
type Store
type VectorStore
- func NewVectorStore() *VectorStore
- func (vs *VectorStore) AddChunk(chunk *Chunk, embedding []float32) error
- func (vs *VectorStore) AddDocument(doc *Document) error
- func (vs *VectorStore) Close() error
- func (vs *VectorStore) DeleteDocument(docID string) error
- func (vs *VectorStore) GetAllEmbeddings() map[string][]float32
- func (vs *VectorStore) GetChunk(id string) (*Chunk, error)
- func (vs *VectorStore) GetDocument(id string) (*Document, error)
- func (vs *VectorStore) HybridSearch(queryEmbedding []float32, query string, limit int, minScore float32, ...) ([]SearchResult, error)
- func (vs *VectorStore) ListChunks() []*Chunk
- func (vs *VectorStore) ListDocuments() ([]*Document, error)
- func (vs *VectorStore) Search(queryEmbedding []float32, limit int, minScore float32) ([]SearchResult, error)
- func (vs *VectorStore) Stats() map[string]interface{}

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func AutoTuneChunkingOptions ¶ added in v0.2.11

func AutoTuneChunkingOptions(content string) (ChunkingOptions, DocumentAnalysis)

AutoTuneChunkingOptions analyzes document and returns optimized chunking options

func GenerateChunkID ¶

func GenerateChunkID(documentID string, index int) string

GenerateChunkID creates a unique ID for a chunk

func GenerateDocumentID ¶

func GenerateDocumentID(content []byte) string

GenerateDocumentID creates a unique ID for a document based on content hash

func IsSupportedExtension ¶ added in v0.2.3

func IsSupportedExtension(ext string) bool

IsSupportedExtension checks if a file extension is supported

func SupportedExtensions ¶ added in v0.2.3

func SupportedExtensions() []string

SupportedExtensions returns all supported file extensions Includes formats that can be parsed with built-in or optional external tools

Types ¶

type Chunk ¶

type Chunk struct {
	ID         string    `json:"id"`
	DocumentID string    `json:"document_id"`
	Content    string    `json:"content"`
	Index      int       `json:"index"`      // Position in document
	StartChar  int       `json:"start_char"` // Character offset in original document
	EndChar    int       `json:"end_char"`
	Embedding  []float32 `json:"-"` // Stored separately for efficiency
	CreatedAt  time.Time `json:"created_at"`
}

Chunk represents a chunk of text from a document

type ChunkResult ¶ added in v0.2.11

type ChunkResult struct {
	DocumentID string            `json:"document_id"`
	ChunkID    string            `json:"chunk_id"`
	Content    string            `json:"content"`
	Score      float64           `json:"score"`
	Metadata   map[string]string `json:"metadata,omitempty"`
	SourceNode string            `json:"source_node,omitempty"`
}

ChunkResult represents a single chunk search result

type Chunker ¶

type Chunker struct {
	// contains filtered or unexported fields
}

Chunker handles splitting documents into chunks

func NewChunker ¶

func NewChunker(opts ChunkingOptions) *Chunker

NewChunker creates a new chunker with the given options

func (*Chunker) ChunkText ¶

func (c *Chunker) ChunkText(documentID, text string) []*Chunk

ChunkText splits text into overlapping chunks

type ChunkingOptions ¶

type ChunkingOptions struct {
	ChunkSize    int    `json:"chunk_size"`    // Target size in characters
	ChunkOverlap int    `json:"chunk_overlap"` // Overlap between chunks
	Separator    string `json:"separator"`     // Primary separator (default: paragraph)
}

ChunkingOptions configures how documents are chunked

func DefaultChunkingOptions ¶

func DefaultChunkingOptions() ChunkingOptions

DefaultChunkingOptions returns sensible defaults for general documents

func LargeDocumentChunkingOptions ¶

func LargeDocumentChunkingOptions() ChunkingOptions

LargeDocumentChunkingOptions returns options for longer documents

type DistributedNode ¶ added in v0.2.11

type DistributedNode struct {
	ID          string    `json:"id"`
	URL         string    `json:"url"`
	Name        string    `json:"name"`
	Healthy     bool      `json:"healthy"`
	LastCheck   time.Time `json:"last_check"`
	DocCount    int       `json:"doc_count"`
	ChunkCount  int       `json:"chunk_count"`
	IndexSizeMB float64   `json:"index_size_mb"`
}

DistributedNode represents a node in the distributed RAG cluster

type DistributedRAG ¶ added in v0.2.11

type DistributedRAG struct {
	// contains filtered or unexported fields
}

DistributedRAG manages distributed RAG search across multiple nodes

func NewDistributedRAG ¶ added in v0.2.11

func NewDistributedRAG(config DistributedRAGConfig) *DistributedRAG

NewDistributedRAG creates a new distributed RAG manager

func (*DistributedRAG) AddNode ¶ added in v0.2.11

func (dr *DistributedRAG) AddNode(node DistributedNode)

AddNode adds a node to the distributed cluster

func (*DistributedRAG) GetStats ¶ added in v0.2.11

func (dr *DistributedRAG) GetStats() map[string]interface{}

GetStats returns distributed RAG statistics

func (*DistributedRAG) ListNodes ¶ added in v0.2.11

func (dr *DistributedRAG) ListNodes() []DistributedNode

ListNodes returns all configured nodes

func (*DistributedRAG) RebalanceShards ¶ added in v0.2.11

func (dr *DistributedRAG) RebalanceShards(ctx context.Context) error

RebalanceShards redistributes documents across nodes This is a placeholder for future implementation of sharding

func (*DistributedRAG) RemoveNode ¶ added in v0.2.11

func (dr *DistributedRAG) RemoveNode(nodeID string) error

RemoveNode removes a node from the cluster

func (*DistributedRAG) Search ¶ added in v0.2.11

func (dr *DistributedRAG) Search(ctx context.Context, query string, topK int, localSearch func(string, int) ([]ChunkResult, error)) (*DistributedSearchResult, error)

Search performs a distributed search across all healthy nodes

func (*DistributedRAG) Stop ¶ added in v0.2.11

func (dr *DistributedRAG) Stop()

Stop stops the distributed RAG manager

func (*DistributedRAG) SyncIndex ¶ added in v0.2.11

func (dr *DistributedRAG) SyncIndex(ctx context.Context) error

SyncIndex triggers index synchronization across nodes This is a placeholder for future implementation of index replication

type DistributedRAGConfig ¶ added in v0.2.11

type DistributedRAGConfig struct {
	LocalNodeID        string `json:"local_node_id"`
	SearchPath         string `json:"search_path"`
	HealthPath         string `json:"health_path"`
	TimeoutSeconds     int    `json:"timeout_seconds"`
	HealthCheckSeconds int    `json:"health_check_seconds"`
}

DistributedRAGConfig contains configuration for distributed RAG

type DistributedSearchResult ¶ added in v0.2.11

type DistributedSearchResult struct {
	Chunks       []ChunkResult      `json:"chunks"`
	TotalChunks  int                `json:"total_chunks"`
	SearchTimeMS int64              `json:"search_time_ms"`
	NodesQueried int                `json:"nodes_queried"`
	NodeResults  []NodeSearchResult `json:"node_results"`
}

DistributedSearchResult represents a search result from a distributed search

type DocumentAnalysis ¶ added in v0.2.11

type DocumentAnalysis struct {
	TotalChars      int             `json:"total_chars"`
	TotalWords      int             `json:"total_words"`
	TotalParagraphs int             `json:"total_paragraphs"`
	TotalSentences  int             `json:"total_sentences"`
	AvgWordsPerPara float64         `json:"avg_words_per_para"`
	AvgWordsPerSent float64         `json:"avg_words_per_sent"`
	DocumentType    string          `json:"document_type"` // prose, technical, code, list, mixed
	RecommendedOpts ChunkingOptions `json:"recommended_options"`
	Reasoning       string          `json:"reasoning"`
}

DocumentAnalysis contains analysis results for automatic chunking tuning

type DocumentParser ¶ added in v0.2.3

type DocumentParser struct {
	// contains filtered or unexported fields
}

DocumentParser handles parsing of various document formats

func NewDocumentParser ¶ added in v0.2.3

func NewDocumentParser() *DocumentParser

NewDocumentParser creates a new document parser

func (*DocumentParser) Parse ¶ added in v0.2.3

func (p *DocumentParser) Parse(content []byte, filename string, ext string) (*ParseResult, error)

Parse attempts to parse a document based on its extension

func (*DocumentParser) ParseCSV ¶ added in v0.2.3

func (p *DocumentParser) ParseCSV(content []byte) (*ParseResult, error)

ParseCSV handles CSV files

func (*DocumentParser) ParseCode ¶ added in v0.2.3

func (p *DocumentParser) ParseCode(content []byte, ext string) (*ParseResult, error)

ParseCode handles source code files with syntax awareness

func (*DocumentParser) ParseDOCX ¶ added in v0.2.3

func (p *DocumentParser) ParseDOCX(content []byte) (*ParseResult, error)

ParseDOCX extracts text from a DOCX file

func (*DocumentParser) ParseHTML ¶ added in v0.2.3

func (p *DocumentParser) ParseHTML(content []byte) (*ParseResult, error)

ParseHTML extracts text from HTML

func (*DocumentParser) ParseJSON ¶ added in v0.2.3

func (p *DocumentParser) ParseJSON(content []byte) (*ParseResult, error)

ParseJSON formats JSON for readability

func (*DocumentParser) ParsePDF ¶ added in v0.2.3

func (p *DocumentParser) ParsePDF(content []byte) (*ParseResult, error)

ParsePDF extracts text from a PDF file with layout awareness First tries pdftotext (poppler-utils) for best results, falls back to basic extraction Also detects images and provides metadata about document structure

func (*DocumentParser) ParsePPTX ¶ added in v0.2.3

func (p *DocumentParser) ParsePPTX(content []byte) (*ParseResult, error)

ParsePPTX extracts text from a PPTX file

func (*DocumentParser) ParsePlainText ¶ added in v0.2.3

func (p *DocumentParser) ParsePlainText(content []byte) (*ParseResult, error)

ParsePlainText handles plain text files

func (*DocumentParser) ParseRTF ¶ added in v0.2.3

func (p *DocumentParser) ParseRTF(content []byte) (*ParseResult, error)

ParseRTF extracts text from RTF

func (*DocumentParser) ParseXLSX ¶ added in v0.2.3

func (p *DocumentParser) ParseXLSX(content []byte) (*ParseResult, error)

ParseXLSX extracts text from an XLSX file

func (*DocumentParser) ParseXML ¶ added in v0.2.3

func (p *DocumentParser) ParseXML(content []byte) (*ParseResult, error)

ParseXML extracts text from XML

type Engine ¶

type Engine struct {
	// contains filtered or unexported fields
}

Engine is the main RAG engine that coordinates document ingestion and search

func NewEngine ¶

func NewEngine(embeddingEngine *inference.EmbeddingEngine, dataDir string) *Engine

NewEngine creates a new RAG engine

func (*Engine) AnalyzeDocument ¶ added in v0.2.11

func (e *Engine) AnalyzeDocument(content string) DocumentAnalysis

AnalyzeDocument analyzes a document and returns chunking recommendations This is useful for previewing what settings would be used before ingestion

func (*Engine) AutoEnableWithModel ¶ added in v0.2.9

func (e *Engine) AutoEnableWithModel(ctx context.Context, availableModels []string) error

AutoEnableWithModel auto-enables RAG if an embedding model is available It looks for models with names containing "embed", "bge", "minilm", or "nomic"

func (*Engine) AutoRestore ¶

func (e *Engine) AutoRestore(ctx context.Context) error

AutoRestore attempts to restore RAG state from disk if data exists

func (*Engine) DeleteDocument ¶

func (e *Engine) DeleteDocument(id string) bool

DeleteDocument removes a document and its chunks

func (*Engine) Disable ¶

func (e *Engine) Disable()

Disable disables RAG

func (*Engine) Enable ¶

func (e *Engine) Enable(ctx context.Context, embeddingModel string) error

Enable enables RAG with the specified embedding model

func (*Engine) EnhancePrompt ¶

func (e *Engine) EnhancePrompt(ctx context.Context, userMessage string) (string, *RAGContext, error)

EnhancePrompt enhances a user prompt with relevant context from documents

func (*Engine) GetDocument ¶

func (e *Engine) GetDocument(id string) *Document

GetDocument returns a document by ID

func (*Engine) GetPersistedModel ¶

func (e *Engine) GetPersistedModel() string

GetPersistedModel returns the embedding model from persisted data (if any) This is used to auto-restore RAG on server startup

func (*Engine) IngestFile ¶

func (e *Engine) IngestFile(ctx context.Context, filePath string, metadata map[string]string) (*Document, error)

IngestFile ingests a file from the filesystem Now supports PDF, DOCX, XLSX, PPTX, and many more formats

func (*Engine) IngestReader ¶

func (e *Engine) IngestReader(ctx context.Context, name string, reader io.Reader, metadata map[string]string) (*Document, error)

IngestReader ingests content from an io.Reader

func (*Engine) IngestText ¶

func (e *Engine) IngestText(ctx context.Context, name, content string, metadata map[string]string) (*Document, error)

IngestText ingests plain text content

func (*Engine) IsAutoTuningEnabled ¶ added in v0.2.11

func (e *Engine) IsAutoTuningEnabled() bool

IsAutoTuningEnabled returns whether auto-tuning is enabled

func (*Engine) IsEnabled ¶

func (e *Engine) IsEnabled() bool

IsEnabled returns whether RAG is enabled

func (*Engine) ListDocuments ¶

func (e *Engine) ListDocuments() []*Document

ListDocuments returns all documents

func (*Engine) Search ¶

func (e *Engine) Search(ctx context.Context, query string, opts SearchOptions) (*RAGContext, error)

Search searches for relevant chunks using hybrid search (semantic + keyword)

func (*Engine) SetAutoTuning ¶ added in v0.2.11

func (e *Engine) SetAutoTuning(enabled bool)

SetAutoTuning enables or disables automatic chunking tuning

func (*Engine) Stats ¶

func (e *Engine) Stats() map[string]interface{}

Stats returns statistics about the RAG engine

type NodeSearchResult ¶ added in v0.2.11

type NodeSearchResult struct {
	NodeID       string        `json:"node_id"`
	Chunks       []ChunkResult `json:"chunks"`
	SearchTimeMS int64         `json:"search_time_ms"`
	Error        string        `json:"error,omitempty"`
}

NodeSearchResult represents search results from a single node

type ParseResult ¶ added in v0.2.3

type ParseResult struct {
	Content     string
	ContentType string
	Metadata    map[string]string
	PageCount   int
	WordCount   int
}

ParseResult contains the extracted content and metadata

type RAGContext ¶

type RAGContext struct {
	Query   string         `json:"query"`
	Results []SearchResult `json:"results"`
	Context string         `json:"context"` // Formatted context string for injection
}

RAGContext represents context to inject into LLM prompts

func (*RAGContext) FormatContext ¶

func (rc *RAGContext) FormatContext() string

FormatContext formats search results into a context string for LLM injection Groups chunks by their source document to avoid confusion

func (*RAGContext) TruncateContext ¶

func (rc *RAGContext) TruncateContext(maxLen int)

TruncateContext truncates the context to fit within maxLen characters while keeping complete chunks

func (*RAGContext) UniqueDocumentCount ¶ added in v0.2.9

func (rc *RAGContext) UniqueDocumentCount() int

UniqueDocumentCount returns the number of unique documents in the results

type SQLiteStore ¶ added in v0.2.6

type SQLiteStore struct {
	// contains filtered or unexported fields
}

SQLiteStore implements a persistent vector store using SQLite

func NewSQLiteStore ¶ added in v0.2.6

func NewSQLiteStore(dataDir string) (*SQLiteStore, error)

NewSQLiteStore creates a new SQLite-based vector store

func (*SQLiteStore) AddChunk ¶ added in v0.2.6

func (s *SQLiteStore) AddChunk(chunk *Chunk, embedding []float32) error

AddChunk adds a chunk with its embedding to the store

func (*SQLiteStore) AddDocument ¶ added in v0.2.6

func (s *SQLiteStore) AddDocument(doc *Document) error

AddDocument adds a document to the store

func (*SQLiteStore) Close ¶ added in v0.2.6

func (s *SQLiteStore) Close() error

Close closes the database connection

func (*SQLiteStore) DeleteDocument ¶ added in v0.2.6

func (s *SQLiteStore) DeleteDocument(id string) error

DeleteDocument deletes a document and its chunks

func (*SQLiteStore) GetDocument ¶ added in v0.2.6

func (s *SQLiteStore) GetDocument(id string) (*Document, error)

GetDocument retrieves a document by ID

func (*SQLiteStore) HybridSearch ¶ added in v0.2.11

func (s *SQLiteStore) HybridSearch(queryEmbedding []float32, query string, limit int, minScore float32, alpha float32) ([]SearchResult, error)

HybridSearch performs a hybrid search combining semantic similarity with FTS5 keyword matching

func (*SQLiteStore) ListDocuments ¶ added in v0.2.6

func (s *SQLiteStore) ListDocuments() ([]*Document, error)

ListDocuments returns all documents

func (*SQLiteStore) Search ¶ added in v0.2.6

func (s *SQLiteStore) Search(queryEmbedding []float32, limit int, minScore float32) ([]SearchResult, error)

Search performs a semantic search using cosine similarity Uses a min-heap for efficient top-k selection (O(n log k) vs O(n log n) for full sort)

func (*SQLiteStore) Stats ¶ added in v0.2.6

func (s *SQLiteStore) Stats() map[string]interface{}

Stats returns statistics about the store

type SearchOptions ¶

type SearchOptions struct {
	TopK           int      `json:"top_k"`           // Number of results to return
	MinScore       float32  `json:"min_score"`       // Minimum similarity score (0-1)
	DocumentFilter []string `json:"document_filter"` // Only search these document IDs
	IncludeContent bool     `json:"include_content"` // Include chunk content in results
}

SearchOptions configures search behavior

func DefaultSearchOptions ¶

func DefaultSearchOptions() SearchOptions

DefaultSearchOptions returns sensible defaults

type SearchResult ¶

type SearchResult struct {
	Chunk      *Chunk            `json:"chunk"`
	Score      float32           `json:"score"` // Cosine similarity score (0-1)
	DocumentID string            `json:"document_id"`
	DocName    string            `json:"document_name"`
	Metadata   map[string]string `json:"metadata,omitempty"` // Source URL, author, etc.
}

SearchResult represents a search result with relevance score

type Store ¶ added in v0.2.6

type Store interface {
	AddDocument(doc *Document) error
	AddChunk(chunk *Chunk, embedding []float32) error
	GetDocument(id string) (*Document, error)
	ListDocuments() ([]*Document, error)
	DeleteDocument(id string) error
	Search(queryEmbedding []float32, limit int, minScore float32) ([]SearchResult, error)
	HybridSearch(queryEmbedding []float32, query string, limit int, minScore float32, alpha float32) ([]SearchResult, error)
	Stats() map[string]interface{}
	Close() error
}

Store defines the interface for vector storage

type VectorStore ¶

type VectorStore struct {
	// contains filtered or unexported fields
}

VectorStore is an in-memory vector database for semantic search

func NewVectorStore ¶

func NewVectorStore() *VectorStore

NewVectorStore creates a new in-memory vector store

func (*VectorStore) AddChunk ¶

func (vs *VectorStore) AddChunk(chunk *Chunk, embedding []float32) error

AddChunk adds a chunk with its embedding to the store

func (*VectorStore) AddDocument ¶

func (vs *VectorStore) AddDocument(doc *Document) error

AddDocument adds a document to the store

func (*VectorStore) Close ¶ added in v0.2.6

func (vs *VectorStore) Close() error

Close closes the store (no-op for in-memory)

func (*VectorStore) DeleteDocument ¶

func (vs *VectorStore) DeleteDocument(docID string) error

DeleteDocument removes a document and all its chunks

func (*VectorStore) GetAllEmbeddings ¶

func (vs *VectorStore) GetAllEmbeddings() map[string][]float32

GetAllEmbeddings returns all embeddings

func (*VectorStore) GetChunk ¶

func (vs *VectorStore) GetChunk(id string) (*Chunk, error)

GetChunk retrieves a chunk by ID

func (*VectorStore) GetDocument ¶

func (vs *VectorStore) GetDocument(id string) (*Document, error)

GetDocument retrieves a document by ID

func (*VectorStore) HybridSearch ¶

func (vs *VectorStore) HybridSearch(queryEmbedding []float32, query string, limit int, minScore float32, alpha float32) ([]SearchResult, error)

HybridSearch implements the Store interface for hybrid search

func (*VectorStore) ListChunks ¶

func (vs *VectorStore) ListChunks() []*Chunk

ListChunks returns all chunks

func (*VectorStore) ListDocuments ¶

func (vs *VectorStore) ListDocuments() ([]*Document, error)

ListDocuments returns all documents

func (*VectorStore) Search ¶

func (vs *VectorStore) Search(queryEmbedding []float32, limit int, minScore float32) ([]SearchResult, error)

Search finds the top-k most similar chunks to the query embedding

func (*VectorStore) Stats ¶

func (vs *VectorStore) Stats() map[string]interface{}

Stats returns statistics about the store

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL

Documentation ¶

Overview ¶

Index ¶

Constants ¶

Variables ¶

Functions ¶

func AutoTuneChunkingOptions ¶ added in v0.2.11

func GenerateChunkID ¶

func GenerateDocumentID ¶

func IsSupportedExtension ¶ added in v0.2.3

func SupportedExtensions ¶ added in v0.2.3

Types ¶

type Chunk ¶

type ChunkResult ¶ added in v0.2.11

type Chunker ¶

func NewChunker ¶

func (*Chunker) ChunkText ¶

type ChunkingOptions ¶

func DefaultChunkingOptions ¶

func LargeDocumentChunkingOptions ¶

type DistributedNode ¶ added in v0.2.11

type DistributedRAG ¶ added in v0.2.11

func NewDistributedRAG ¶ added in v0.2.11

func (*DistributedRAG) AddNode ¶ added in v0.2.11

func (*DistributedRAG) GetStats ¶ added in v0.2.11

func (*DistributedRAG) ListNodes ¶ added in v0.2.11

func (*DistributedRAG) RebalanceShards ¶ added in v0.2.11

func (*DistributedRAG) RemoveNode ¶ added in v0.2.11

func (*DistributedRAG) Search ¶ added in v0.2.11

func (*DistributedRAG) Stop ¶ added in v0.2.11

func (*DistributedRAG) SyncIndex ¶ added in v0.2.11

type DistributedRAGConfig ¶ added in v0.2.11

type DistributedSearchResult ¶ added in v0.2.11

type Document ¶

type DocumentAnalysis ¶ added in v0.2.11

type DocumentParser ¶ added in v0.2.3

func NewDocumentParser ¶ added in v0.2.3

func (*DocumentParser) Parse ¶ added in v0.2.3

func (*DocumentParser) ParseCSV ¶ added in v0.2.3

func (*DocumentParser) ParseCode ¶ added in v0.2.3

func (*DocumentParser) ParseDOCX ¶ added in v0.2.3

func (*DocumentParser) ParseHTML ¶ added in v0.2.3

func (*DocumentParser) ParseJSON ¶ added in v0.2.3

func (*DocumentParser) ParsePDF ¶ added in v0.2.3

func (*DocumentParser) ParsePPTX ¶ added in v0.2.3

func (*DocumentParser) ParsePlainText ¶ added in v0.2.3

func (*DocumentParser) ParseRTF ¶ added in v0.2.3

func (*DocumentParser) ParseXLSX ¶ added in v0.2.3

func (*DocumentParser) ParseXML ¶ added in v0.2.3

type Engine ¶

func NewEngine ¶

func (*Engine) AnalyzeDocument ¶ added in v0.2.11

func (*Engine) AutoEnableWithModel ¶ added in v0.2.9

func (*Engine) AutoRestore ¶

func (*Engine) DeleteDocument ¶

func (*Engine) Disable ¶

func (*Engine) Enable ¶

func (*Engine) EnhancePrompt ¶

func (*Engine) GetDocument ¶

func (*Engine) GetPersistedModel ¶

func (*Engine) IngestFile ¶

func (*Engine) IngestReader ¶

func (*Engine) IngestText ¶

func (*Engine) IsAutoTuningEnabled ¶ added in v0.2.11

func (*Engine) IsEnabled ¶

func (*Engine) ListDocuments ¶

func (*Engine) Search ¶

func (*Engine) SetAutoTuning ¶ added in v0.2.11

func (*Engine) Stats ¶

type NodeSearchResult ¶ added in v0.2.11

type ParseResult ¶ added in v0.2.3

type RAGContext ¶

func (*RAGContext) FormatContext ¶

func (*RAGContext) TruncateContext ¶

func (*RAGContext) UniqueDocumentCount ¶ added in v0.2.9

type SQLiteStore ¶ added in v0.2.6

func NewSQLiteStore ¶ added in v0.2.6

func (*SQLiteStore) AddChunk ¶ added in v0.2.6

func (*SQLiteStore) AddDocument ¶ added in v0.2.6

func (*SQLiteStore) Close ¶ added in v0.2.6