Documentation
¶
Overview ¶
Package fts provides full-text search functionality for XxSQL.
Package fts provides full-text search functionality for XxSQL.
Package fts provides full-text search functionality for XxSQL.
Package fts provides full-text search functionality for XxSQL.
Package fts provides full-text search functionality for XxSQL.
Index ¶
- func DefaultStopWords() map[string]bool
- type BM25Ranker
- type FTSIndex
- func (idx *FTSIndex) Columns() []string
- func (idx *FTSIndex) GetDocumentIDs() []uint64
- func (idx *FTSIndex) IndexDocument(docID uint64, values map[string]interface{}) error
- func (idx *FTSIndex) Load() error
- func (idx *FTSIndex) Name() string
- func (idx *FTSIndex) RemoveDocument(docID uint64)
- func (idx *FTSIndex) Save() error
- func (idx *FTSIndex) Search(query string) ([]SearchResult, error)
- func (idx *FTSIndex) SearchWithTerms(terms []string, useOr bool) ([]SearchResult, error)
- func (idx *FTSIndex) SetRanker(ranker Ranker)
- func (idx *FTSIndex) Stats() IndexStats
- func (idx *FTSIndex) TableName() string
- func (idx *FTSIndex) UpdateDocument(docID uint64, values map[string]interface{}) error
- type FTSIndexConfig
- type FTSManager
- func (m *FTSManager) Close() error
- func (m *FTSManager) CreateIndex(name, tableName string, columns []string, tokenizer string) (*FTSIndex, error)
- func (m *FTSManager) DropIndex(name string) error
- func (m *FTSManager) DropIndexForTable(tableName string) error
- func (m *FTSManager) GetIndex(name string) (*FTSIndex, error)
- func (m *FTSManager) GetIndexesForTable(tableName string) []*FTSIndex
- func (m *FTSManager) GetStats() []IndexStats
- func (m *FTSManager) IndexDocument(tableName string, docID uint64, values map[string]interface{}) error
- func (m *FTSManager) ListIndexes() []string
- func (m *FTSManager) LoadAll() error
- func (m *FTSManager) RemoveDocument(tableName string, docID uint64)
- func (m *FTSManager) SaveAll() error
- func (m *FTSManager) Search(indexName, query string) ([]SearchResult, error)
- func (m *FTSManager) UpdateDocument(tableName string, docID uint64, values map[string]interface{}) error
- type IndexStats
- type InvertedIndex
- func (idx *InvertedIndex) AddDocument(docID uint64, text string)
- func (idx *InvertedIndex) AverageDocumentLength() float64
- func (idx *InvertedIndex) DocumentFrequency(term string) int
- func (idx *InvertedIndex) DocumentLength(docID uint64) int
- func (idx *InvertedIndex) GetPostings(term string) PostingsList
- func (idx *InvertedIndex) RemoveDocument(docID uint64)
- func (idx *InvertedIndex) Search(terms []string) PostingsList
- func (idx *InvertedIndex) SearchAny(terms []string) PostingsList
- func (idx *InvertedIndex) Terms() []string
- func (idx *InvertedIndex) TotalDocuments() uint64
- func (idx *InvertedIndex) UpdateDocument(docID uint64, text string)
- type PorterStemmerTokenizer
- type Posting
- type PostingsList
- type RankedResult
- type RankedResults
- type Ranker
- type SearchResult
- type SimpleRanker
- type SimpleTokenizer
- type TFIDFRanker
- type Token
- type Tokenizer
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func DefaultStopWords ¶
DefaultStopWords returns a set of common English stop words.
Types ¶
type BM25Ranker ¶
type BM25Ranker struct {
// K1 controls term saturation (typically 1.2-2.0)
K1 float64
// B controls length normalization (typically 0.75)
B float64
}
BM25Ranker implements the BM25 ranking algorithm. BM25 is the default ranking algorithm used by many search engines.
func NewBM25Ranker ¶
func NewBM25Ranker() *BM25Ranker
NewBM25Ranker creates a new BM25 ranker with default parameters.
func (*BM25Ranker) Score ¶
func (r *BM25Ranker) Score(posting Posting, index *InvertedIndex, queryTerms []string) float64
Score calculates the BM25 score for a document.
type FTSIndex ¶
type FTSIndex struct {
// contains filtered or unexported fields
}
FTSIndex represents a full-text search index on a table.
func NewFTSIndex ¶
func NewFTSIndex(config FTSIndexConfig) *FTSIndex
NewFTSIndex creates a new full-text search index.
func (*FTSIndex) GetDocumentIDs ¶
GetDocumentIDs returns all document IDs in the index.
func (*FTSIndex) IndexDocument ¶
IndexDocument indexes a document (row) with the given ID. The values map should contain the values for each indexed column.
func (*FTSIndex) RemoveDocument ¶
RemoveDocument removes a document from the index.
func (*FTSIndex) Search ¶
func (idx *FTSIndex) Search(query string) ([]SearchResult, error)
Search performs a full-text search with the given query. The query supports: - Simple terms: "hello world" - AND: "hello AND world" - OR: "hello OR world" - NOT: "hello NOT world"
func (*FTSIndex) SearchWithTerms ¶
func (idx *FTSIndex) SearchWithTerms(terms []string, useOr bool) ([]SearchResult, error)
SearchWithTerms performs a search with pre-tokenized terms.
func (*FTSIndex) Stats ¶
func (idx *FTSIndex) Stats() IndexStats
Stats returns statistics about the index.
type FTSIndexConfig ¶
type FTSIndexConfig struct {
Name string
TableName string
Columns []string
Tokenizer string // "simple", "porter"
Persistent bool
DataDir string
}
FTSIndexConfig holds configuration for creating an FTS index.
type FTSManager ¶
type FTSManager struct {
// contains filtered or unexported fields
}
FTSManager manages all FTS indexes for a database.
func NewFTSManager ¶
func NewFTSManager(dataDir string) *FTSManager
NewFTSManager creates a new FTS manager.
func (*FTSManager) CreateIndex ¶
func (m *FTSManager) CreateIndex(name, tableName string, columns []string, tokenizer string) (*FTSIndex, error)
CreateIndex creates a new FTS index.
func (*FTSManager) DropIndex ¶
func (m *FTSManager) DropIndex(name string) error
DropIndex removes an FTS index.
func (*FTSManager) DropIndexForTable ¶
func (m *FTSManager) DropIndexForTable(tableName string) error
DropIndexForTable removes all FTS indexes for a dropped table.
func (*FTSManager) GetIndex ¶
func (m *FTSManager) GetIndex(name string) (*FTSIndex, error)
GetIndex returns an FTS index by name.
func (*FTSManager) GetIndexesForTable ¶
func (m *FTSManager) GetIndexesForTable(tableName string) []*FTSIndex
GetIndexesForTable returns all FTS indexes for a table.
func (*FTSManager) GetStats ¶
func (m *FTSManager) GetStats() []IndexStats
GetStats returns statistics for all indexes.
func (*FTSManager) IndexDocument ¶
func (m *FTSManager) IndexDocument(tableName string, docID uint64, values map[string]interface{}) error
IndexDocument indexes a document in all relevant indexes for a table.
func (*FTSManager) ListIndexes ¶
func (m *FTSManager) ListIndexes() []string
ListIndexes returns all FTS index names.
func (*FTSManager) LoadAll ¶
func (m *FTSManager) LoadAll() error
LoadAll loads all indexes from disk.
func (*FTSManager) RemoveDocument ¶
func (m *FTSManager) RemoveDocument(tableName string, docID uint64)
RemoveDocument removes a document from all relevant indexes.
func (*FTSManager) SaveAll ¶
func (m *FTSManager) SaveAll() error
SaveAll saves all indexes to disk.
func (*FTSManager) Search ¶
func (m *FTSManager) Search(indexName, query string) ([]SearchResult, error)
Search performs a search on a specific index.
func (*FTSManager) UpdateDocument ¶
func (m *FTSManager) UpdateDocument(tableName string, docID uint64, values map[string]interface{}) error
UpdateDocument updates a document in all relevant indexes.
type IndexStats ¶
type IndexStats struct {
Name string
TableName string
Columns []string
DocumentCount uint64
TermCount uint64
AvgDocLength float64
}
IndexStats holds statistics about an FTS index.
type InvertedIndex ¶
type InvertedIndex struct {
// contains filtered or unexported fields
}
InvertedIndex is the core data structure for full-text search.
func NewInvertedIndex ¶
func NewInvertedIndex(tokenizer Tokenizer) *InvertedIndex
NewInvertedIndex creates a new inverted index.
func (*InvertedIndex) AddDocument ¶
func (idx *InvertedIndex) AddDocument(docID uint64, text string)
AddDocument adds a document to the index.
func (*InvertedIndex) AverageDocumentLength ¶
func (idx *InvertedIndex) AverageDocumentLength() float64
AverageDocumentLength returns the average document length.
func (*InvertedIndex) DocumentFrequency ¶
func (idx *InvertedIndex) DocumentFrequency(term string) int
DocumentFrequency returns the number of documents containing the term.
func (*InvertedIndex) DocumentLength ¶
func (idx *InvertedIndex) DocumentLength(docID uint64) int
DocumentLength returns the length of a document (in tokens).
func (*InvertedIndex) GetPostings ¶
func (idx *InvertedIndex) GetPostings(term string) PostingsList
GetPostings returns the postings list for a single term.
func (*InvertedIndex) RemoveDocument ¶
func (idx *InvertedIndex) RemoveDocument(docID uint64)
RemoveDocument removes a document from the index.
func (*InvertedIndex) Search ¶
func (idx *InvertedIndex) Search(terms []string) PostingsList
Search searches for documents containing the given terms. Returns postings with doc IDs that contain all terms (AND search).
func (*InvertedIndex) SearchAny ¶
func (idx *InvertedIndex) SearchAny(terms []string) PostingsList
SearchAny searches for documents containing any of the given terms (OR search).
func (*InvertedIndex) Terms ¶
func (idx *InvertedIndex) Terms() []string
Terms returns all terms in the index.
func (*InvertedIndex) TotalDocuments ¶
func (idx *InvertedIndex) TotalDocuments() uint64
TotalDocuments returns the total number of indexed documents.
func (*InvertedIndex) UpdateDocument ¶
func (idx *InvertedIndex) UpdateDocument(docID uint64, text string)
UpdateDocument updates a document in the index.
type PorterStemmerTokenizer ¶
type PorterStemmerTokenizer struct {
// contains filtered or unexported fields
}
PorterStemmerTokenizer wraps another tokenizer and applies Porter stemming. This is a simplified implementation - for production, use a proper Porter stemmer.
func NewPorterStemmerTokenizer ¶
func NewPorterStemmerTokenizer(base Tokenizer) *PorterStemmerTokenizer
NewPorterStemmerTokenizer creates a tokenizer that applies Porter stemming.
func (*PorterStemmerTokenizer) Tokenize ¶
func (t *PorterStemmerTokenizer) Tokenize(text string) []Token
Tokenize tokenizes and stems the text.
type Posting ¶
type Posting struct {
DocID uint64 // Document ID (usually row ID)
Positions []int // Positions where the term appears
Frequency int // Number of times the term appears in this document
}
Posting represents a document in the inverted index.
type PostingsList ¶
type PostingsList []Posting
PostingsList is a list of postings for a term, sorted by DocID.
func (PostingsList) Less ¶
func (p PostingsList) Less(i, j int) bool
Less implements sort.Interface.
type RankedResult ¶
RankedResult represents a search result with its score.
type RankedResults ¶
type RankedResults []RankedResult
RankedResults is a list of ranked results, sorted by score descending.
func (RankedResults) Less ¶
func (r RankedResults) Less(i, j int) bool
Less implements sort.Interface (descending order by score).
type Ranker ¶
type Ranker interface {
// Score calculates a relevance score for a posting.
Score(posting Posting, index *InvertedIndex, queryTerms []string) float64
}
Ranker calculates relevance scores for search results.
type SearchResult ¶
SearchResult represents a search result.
type SimpleRanker ¶
type SimpleRanker struct{}
SimpleRanker just uses term frequency.
func NewSimpleRanker ¶
func NewSimpleRanker() *SimpleRanker
NewSimpleRanker creates a simple ranker based on term frequency.
func (*SimpleRanker) Score ¶
func (r *SimpleRanker) Score(posting Posting, index *InvertedIndex, queryTerms []string) float64
Score returns the sum of term frequencies.
type SimpleTokenizer ¶
type SimpleTokenizer struct {
// MinTokenLength is the minimum length for a token to be indexed.
MinTokenLength int
// StopWords is a set of words to skip during tokenization.
StopWords map[string]bool
}
SimpleTokenizer is a basic tokenizer that splits on whitespace and punctuation.
func NewSimpleTokenizer ¶
func NewSimpleTokenizer() *SimpleTokenizer
NewSimpleTokenizer creates a new SimpleTokenizer with default settings.
func (*SimpleTokenizer) Tokenize ¶
func (t *SimpleTokenizer) Tokenize(text string) []Token
Tokenize breaks text into tokens, filtering by length and stop words.
type TFIDFRanker ¶
type TFIDFRanker struct{}
TFIDFRanker implements TF-IDF ranking.
func NewTFIDFRanker ¶
func NewTFIDFRanker() *TFIDFRanker
NewTFIDFRanker creates a new TF-IDF ranker.
func (*TFIDFRanker) Score ¶
func (r *TFIDFRanker) Score(posting Posting, index *InvertedIndex, queryTerms []string) float64
Score calculates the TF-IDF score for a document.