fts

package
v0.0.8 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 3, 2026 License: MIT Imports: 9 Imported by: 0

Documentation

Overview

Package fts provides full-text search functionality for XxSQL.

Package fts provides full-text search functionality for XxSQL.

Package fts provides full-text search functionality for XxSQL.

Package fts provides full-text search functionality for XxSQL.

Package fts provides full-text search functionality for XxSQL.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func DefaultStopWords

func DefaultStopWords() map[string]bool

DefaultStopWords returns a set of common English stop words.

Types

type BM25Ranker

type BM25Ranker struct {
	// K1 controls term saturation (typically 1.2-2.0)
	K1 float64
	// B controls length normalization (typically 0.75)
	B float64
}

BM25Ranker implements the BM25 ranking algorithm. BM25 is the default ranking algorithm used by many search engines.

func NewBM25Ranker

func NewBM25Ranker() *BM25Ranker

NewBM25Ranker creates a new BM25 ranker with default parameters.

func (*BM25Ranker) Score

func (r *BM25Ranker) Score(posting Posting, index *InvertedIndex, queryTerms []string) float64

Score calculates the BM25 score for a document.

type FTSIndex

type FTSIndex struct {
	// contains filtered or unexported fields
}

FTSIndex represents a full-text search index on a table.

func NewFTSIndex

func NewFTSIndex(config FTSIndexConfig) *FTSIndex

NewFTSIndex creates a new full-text search index.

func (*FTSIndex) Columns

func (idx *FTSIndex) Columns() []string

Columns returns the indexed columns.

func (*FTSIndex) GetDocumentIDs

func (idx *FTSIndex) GetDocumentIDs() []uint64

GetDocumentIDs returns all document IDs in the index.

func (*FTSIndex) IndexDocument

func (idx *FTSIndex) IndexDocument(docID uint64, values map[string]interface{}) error

IndexDocument indexes a document (row) with the given ID. The values map should contain the values for each indexed column.

func (*FTSIndex) Load

func (idx *FTSIndex) Load() error

Load loads the index from disk.

func (*FTSIndex) Name

func (idx *FTSIndex) Name() string

Name returns the index name.

func (*FTSIndex) RemoveDocument

func (idx *FTSIndex) RemoveDocument(docID uint64)

RemoveDocument removes a document from the index.

func (*FTSIndex) Save

func (idx *FTSIndex) Save() error

Save persists the index to disk.

func (*FTSIndex) Search

func (idx *FTSIndex) Search(query string) ([]SearchResult, error)

Search performs a full-text search with the given query. The query supports: - Simple terms: "hello world" - AND: "hello AND world" - OR: "hello OR world" - NOT: "hello NOT world"

func (*FTSIndex) SearchWithTerms

func (idx *FTSIndex) SearchWithTerms(terms []string, useOr bool) ([]SearchResult, error)

SearchWithTerms performs a search with pre-tokenized terms.

func (*FTSIndex) SetRanker

func (idx *FTSIndex) SetRanker(ranker Ranker)

SetRanker sets a custom ranker for the index.

func (*FTSIndex) Stats

func (idx *FTSIndex) Stats() IndexStats

Stats returns statistics about the index.

func (*FTSIndex) TableName

func (idx *FTSIndex) TableName() string

TableName returns the table name.

func (*FTSIndex) UpdateDocument

func (idx *FTSIndex) UpdateDocument(docID uint64, values map[string]interface{}) error

UpdateDocument updates a document in the index.

type FTSIndexConfig

type FTSIndexConfig struct {
	Name       string
	TableName  string
	Columns    []string
	Tokenizer  string // "simple", "porter"
	Persistent bool
	DataDir    string
}

FTSIndexConfig holds configuration for creating an FTS index.

type FTSManager

type FTSManager struct {
	// contains filtered or unexported fields
}

FTSManager manages all FTS indexes for a database.

func NewFTSManager

func NewFTSManager(dataDir string) *FTSManager

NewFTSManager creates a new FTS manager.

func (*FTSManager) Close

func (m *FTSManager) Close() error

Close closes all indexes.

func (*FTSManager) CreateIndex

func (m *FTSManager) CreateIndex(name, tableName string, columns []string, tokenizer string) (*FTSIndex, error)

CreateIndex creates a new FTS index.

func (*FTSManager) DropIndex

func (m *FTSManager) DropIndex(name string) error

DropIndex removes an FTS index.

func (*FTSManager) DropIndexForTable

func (m *FTSManager) DropIndexForTable(tableName string) error

DropIndexForTable removes all FTS indexes for a dropped table.

func (*FTSManager) GetIndex

func (m *FTSManager) GetIndex(name string) (*FTSIndex, error)

GetIndex returns an FTS index by name.

func (*FTSManager) GetIndexesForTable

func (m *FTSManager) GetIndexesForTable(tableName string) []*FTSIndex

GetIndexesForTable returns all FTS indexes for a table.

func (*FTSManager) GetStats

func (m *FTSManager) GetStats() []IndexStats

GetStats returns statistics for all indexes.

func (*FTSManager) IndexDocument

func (m *FTSManager) IndexDocument(tableName string, docID uint64, values map[string]interface{}) error

IndexDocument indexes a document in all relevant indexes for a table.

func (*FTSManager) ListIndexes

func (m *FTSManager) ListIndexes() []string

ListIndexes returns all FTS index names.

func (*FTSManager) LoadAll

func (m *FTSManager) LoadAll() error

LoadAll loads all indexes from disk.

func (*FTSManager) RemoveDocument

func (m *FTSManager) RemoveDocument(tableName string, docID uint64)

RemoveDocument removes a document from all relevant indexes.

func (*FTSManager) SaveAll

func (m *FTSManager) SaveAll() error

SaveAll saves all indexes to disk.

func (*FTSManager) Search

func (m *FTSManager) Search(indexName, query string) ([]SearchResult, error)

Search performs a search on a specific index.

func (*FTSManager) UpdateDocument

func (m *FTSManager) UpdateDocument(tableName string, docID uint64, values map[string]interface{}) error

UpdateDocument updates a document in all relevant indexes.

type IndexStats

type IndexStats struct {
	Name          string
	TableName     string
	Columns       []string
	DocumentCount uint64
	TermCount     uint64
	AvgDocLength  float64
}

IndexStats holds statistics about an FTS index.

type InvertedIndex

type InvertedIndex struct {
	// contains filtered or unexported fields
}

InvertedIndex is the core data structure for full-text search.

func NewInvertedIndex

func NewInvertedIndex(tokenizer Tokenizer) *InvertedIndex

NewInvertedIndex creates a new inverted index.

func (*InvertedIndex) AddDocument

func (idx *InvertedIndex) AddDocument(docID uint64, text string)

AddDocument adds a document to the index.

func (*InvertedIndex) AverageDocumentLength

func (idx *InvertedIndex) AverageDocumentLength() float64

AverageDocumentLength returns the average document length.

func (*InvertedIndex) DocumentFrequency

func (idx *InvertedIndex) DocumentFrequency(term string) int

DocumentFrequency returns the number of documents containing the term.

func (*InvertedIndex) DocumentLength

func (idx *InvertedIndex) DocumentLength(docID uint64) int

DocumentLength returns the length of a document (in tokens).

func (*InvertedIndex) GetPostings

func (idx *InvertedIndex) GetPostings(term string) PostingsList

GetPostings returns the postings list for a single term.

func (*InvertedIndex) RemoveDocument

func (idx *InvertedIndex) RemoveDocument(docID uint64)

RemoveDocument removes a document from the index.

func (*InvertedIndex) Search

func (idx *InvertedIndex) Search(terms []string) PostingsList

Search searches for documents containing the given terms. Returns postings with doc IDs that contain all terms (AND search).

func (*InvertedIndex) SearchAny

func (idx *InvertedIndex) SearchAny(terms []string) PostingsList

SearchAny searches for documents containing any of the given terms (OR search).

func (*InvertedIndex) Terms

func (idx *InvertedIndex) Terms() []string

Terms returns all terms in the index.

func (*InvertedIndex) TotalDocuments

func (idx *InvertedIndex) TotalDocuments() uint64

TotalDocuments returns the total number of indexed documents.

func (*InvertedIndex) UpdateDocument

func (idx *InvertedIndex) UpdateDocument(docID uint64, text string)

UpdateDocument updates a document in the index.

type PorterStemmerTokenizer

type PorterStemmerTokenizer struct {
	// contains filtered or unexported fields
}

PorterStemmerTokenizer wraps another tokenizer and applies Porter stemming. This is a simplified implementation - for production, use a proper Porter stemmer.

func NewPorterStemmerTokenizer

func NewPorterStemmerTokenizer(base Tokenizer) *PorterStemmerTokenizer

NewPorterStemmerTokenizer creates a tokenizer that applies Porter stemming.

func (*PorterStemmerTokenizer) Tokenize

func (t *PorterStemmerTokenizer) Tokenize(text string) []Token

Tokenize tokenizes and stems the text.

type Posting

type Posting struct {
	DocID     uint64 // Document ID (usually row ID)
	Positions []int  // Positions where the term appears
	Frequency int    // Number of times the term appears in this document
}

Posting represents a document in the inverted index.

type PostingsList

type PostingsList []Posting

PostingsList is a list of postings for a term, sorted by DocID.

func (PostingsList) Len

func (p PostingsList) Len() int

Len implements sort.Interface.

func (PostingsList) Less

func (p PostingsList) Less(i, j int) bool

Less implements sort.Interface.

func (PostingsList) Swap

func (p PostingsList) Swap(i, j int)

Swap implements sort.Interface.

type RankedResult

type RankedResult struct {
	DocID uint64
	Score float64
}

RankedResult represents a search result with its score.

type RankedResults

type RankedResults []RankedResult

RankedResults is a list of ranked results, sorted by score descending.

func (RankedResults) Len

func (r RankedResults) Len() int

Len implements sort.Interface.

func (RankedResults) Less

func (r RankedResults) Less(i, j int) bool

Less implements sort.Interface (descending order by score).

func (RankedResults) Swap

func (r RankedResults) Swap(i, j int)

Swap implements sort.Interface.

type Ranker

type Ranker interface {
	// Score calculates a relevance score for a posting.
	Score(posting Posting, index *InvertedIndex, queryTerms []string) float64
}

Ranker calculates relevance scores for search results.

type SearchResult

type SearchResult struct {
	DocID uint64
	Score float64
}

SearchResult represents a search result.

type SimpleRanker

type SimpleRanker struct{}

SimpleRanker just uses term frequency.

func NewSimpleRanker

func NewSimpleRanker() *SimpleRanker

NewSimpleRanker creates a simple ranker based on term frequency.

func (*SimpleRanker) Score

func (r *SimpleRanker) Score(posting Posting, index *InvertedIndex, queryTerms []string) float64

Score returns the sum of term frequencies.

type SimpleTokenizer

type SimpleTokenizer struct {
	// MinTokenLength is the minimum length for a token to be indexed.
	MinTokenLength int
	// StopWords is a set of words to skip during tokenization.
	StopWords map[string]bool
}

SimpleTokenizer is a basic tokenizer that splits on whitespace and punctuation.

func NewSimpleTokenizer

func NewSimpleTokenizer() *SimpleTokenizer

NewSimpleTokenizer creates a new SimpleTokenizer with default settings.

func (*SimpleTokenizer) Tokenize

func (t *SimpleTokenizer) Tokenize(text string) []Token

Tokenize breaks text into tokens, filtering by length and stop words.

type TFIDFRanker

type TFIDFRanker struct{}

TFIDFRanker implements TF-IDF ranking.

func NewTFIDFRanker

func NewTFIDFRanker() *TFIDFRanker

NewTFIDFRanker creates a new TF-IDF ranker.

func (*TFIDFRanker) Score

func (r *TFIDFRanker) Score(posting Posting, index *InvertedIndex, queryTerms []string) float64

Score calculates the TF-IDF score for a document.

type Token

type Token struct {
	Term     string // The token text (lowercased)
	Position int    // Position in the document (0-indexed)
}

Token represents a token in a document.

type Tokenizer

type Tokenizer interface {
	// Tokenize breaks text into tokens.
	Tokenize(text string) []Token
}

Tokenizer interface defines how text is tokenized for FTS.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL