Documentation
¶
Index ¶
- Constants
- Variables
- func BoolNormalize(v any) (any, bool)
- func CIDRToRange(cidr string) (start, end float64, err error)
- func CompressionStats(terms []string) (compressed, original int, ratio float64)
- func DateToEpochMs(v any) (any, bool)
- func DurationToMs(v any) (any, bool)
- func EmailDomain(v any) (any, bool)
- func Encode(idx *GINIndex) ([]byte, error)
- func EncodeToMetadata(idx *GINIndex, cfg ParquetConfig) (key string, value string, err error)
- func EncodeWithLevel(idx *GINIndex, level CompressionLevel) ([]byte, error)
- func ExtractLiterals(pattern string) ([]string, error)
- func ExtractTrigrams(s string) []string
- func GenerateBigrams(text string) []string
- func GenerateNGrams(text string, n int, opts ...NGramOption) ([]string, error)
- func GenerateTrigrams(text string) []string
- func HasGINIndex(parquetFile string, cfg ParquetConfig) (bool, error)
- func HasGINIndexReader(parquetFile string, cfg ParquetConfig, reader io.ReaderAt, size int64) (bool, error)
- func HasSidecar(parquetFile string) bool
- func IPv4ToInt(v any) (any, bool)
- func ISODateToEpochMs(v any) (any, bool)
- func IsDirectory(path string) bool
- func IsS3Path(path string) bool
- func IsValidJSONPath(path string) bool
- func ListGINFiles(dir string) ([]string, error)
- func ListParquetFiles(dir string) ([]string, error)
- func MustValidateJSONPath(path string) string
- func NormalizePath(path string) string
- func ParseJSONPath(path string) (jp.Expr, error)
- func ParseS3Path(path string) (bucket, key string, err error)
- func RebuildWithIndex(parquetFile string, idx *GINIndex, cfg ParquetConfig) error
- func SemVerToInt(v any) (any, bool)
- func SidecarPath(parquetFile string) string
- func ToLower(v any) (any, bool)
- func URLHost(v any) (any, bool)
- func ValidateJSONPath(path string) error
- func WriteCompressedTerms(w io.Writer, blocks []CompressedTermBlock) error
- func WriteSidecar(parquetFile string, idx *GINIndex) error
- type BloomFilter
- func (bf *BloomFilter) Add(data []byte)
- func (bf *BloomFilter) AddString(s string)
- func (bf *BloomFilter) Bits() []uint64
- func (bf *BloomFilter) MayContain(data []byte) bool
- func (bf *BloomFilter) MayContainString(s string) bool
- func (bf *BloomFilter) NumBits() uint32
- func (bf *BloomFilter) NumHashes() uint8
- type BloomFilterOption
- type BuilderOption
- type CompressedTermBlock
- type CompressionLevel
- type ConfigOption
- func WithBoolNormalizeTransformer(path string) ConfigOption
- func WithCustomDateTransformer(path, layout string) ConfigOption
- func WithDateTransformer(path string) ConfigOption
- func WithDurationTransformer(path string) ConfigOption
- func WithEmailDomainTransformer(path string) ConfigOption
- func WithFTSPaths(paths ...string) ConfigOption
- func WithFieldTransformer(path string, fn FieldTransformer) ConfigOption
- func WithIPv4Transformer(path string) ConfigOption
- func WithISODateTransformer(path string) ConfigOption
- func WithNumericBucketTransformer(path string, size float64) ConfigOption
- func WithRegexExtractIntTransformer(path, pattern string, group int) ConfigOption
- func WithRegexExtractTransformer(path, pattern string, group int) ConfigOption
- func WithRegisteredTransformer(path string, id TransformerID, params []byte) ConfigOption
- func WithSemVerTransformer(path string) ConfigOption
- func WithToLowerTransformer(path string) ConfigOption
- func WithURLHostTransformer(path string) ConfigOption
- type CustomDateParams
- type DocID
- type DocIDCodec
- type FieldTransformer
- func CustomDateToEpochMs(layout string) FieldTransformer
- func NumericBucket(size float64) FieldTransformer
- func ReconstructTransformer(id TransformerID, params json.RawMessage) (FieldTransformer, error)
- func RegexExtract(pattern string, group int) FieldTransformer
- func RegexExtractInt(pattern string, group int) FieldTransformer
- type GINBuilder
- type GINConfig
- type GINIndex
- func BuildFromParquet(parquetFile string, jsonColumn string, config GINConfig) (*GINIndex, error)
- func BuildFromParquetReader(parquetFile string, jsonColumn string, config GINConfig, reader io.ReaderAt, ...) (*GINIndex, error)
- func Decode(data []byte) (*GINIndex, error)
- func DecodeFromMetadata(value string) (*GINIndex, error)
- func LoadIndex(parquetFile string, cfg ParquetConfig) (*GINIndex, error)
- func LoadIndexReader(parquetFile string, cfg ParquetConfig, reader io.ReaderAt, size int64) (*GINIndex, error)
- func NewGINIndex() *GINIndex
- func ReadFromParquetMetadata(parquetFile string, cfg ParquetConfig) (*GINIndex, error)
- func ReadFromParquetMetadataReader(parquetFile string, cfg ParquetConfig, reader io.ReaderAt, size int64) (*GINIndex, error)
- func ReadSidecar(parquetFile string) (*GINIndex, error)
- type Header
- type HyperLogLog
- func (hll *HyperLogLog) Add(data []byte)
- func (hll *HyperLogLog) AddString(s string)
- func (hll *HyperLogLog) Clear()
- func (hll *HyperLogLog) Clone() *HyperLogLog
- func (hll *HyperLogLog) Estimate() uint64
- func (hll *HyperLogLog) Merge(other *HyperLogLog)
- func (hll *HyperLogLog) Precision() uint8
- func (hll *HyperLogLog) Registers() []uint8
- type HyperLogLogOption
- type IdentityCodec
- type JSONPathError
- type NGramConfig
- type NGramOption
- type NullIndex
- type NumericBucketParams
- type NumericIndex
- type Operator
- type ParquetConfig
- type ParquetIndexWriter
- type PathEntry
- type Predicate
- func Contains(path string, pattern string) Predicate
- func EQ(path string, value any) Predicate
- func GT(path string, value any) Predicate
- func GTE(path string, value any) Predicate
- func IN(path string, values ...any) Predicate
- func InSubnet(path, cidr string) []Predicate
- func IsNotNull(path string) Predicate
- func IsNull(path string) Predicate
- func LT(path string, value any) Predicate
- func LTE(path string, value any) Predicate
- func NE(path string, value any) Predicate
- func NIN(path string, values ...any) Predicate
- func Regex(path string, pattern string) Predicate
- type PrefixCompressor
- type PrefixCompressorOption
- type PrefixEntry
- type RGNumericStat
- type RGSet
- func (rs *RGSet) All() *RGSet
- func (rs *RGSet) Clear(rgID int)
- func (rs *RGSet) Clone() *RGSet
- func (rs *RGSet) Count() int
- func (rs *RGSet) Intersect(other *RGSet) *RGSet
- func (rs *RGSet) Invert() *RGSet
- func (rs *RGSet) IsEmpty() bool
- func (rs *RGSet) IsSet(rgID int) bool
- func (rs *RGSet) Roaring() *roaring.Bitmap
- func (rs *RGSet) Set(rgID int)
- func (rs *RGSet) ToSlice() []int
- func (rs *RGSet) Union(other *RGSet) *RGSet
- type RGSetOption
- type RGStringLengthStat
- type RegexLiteralInfo
- type RegexParams
- type RowGroupCodec
- type S3Client
- func (c *S3Client) BuildFromParquet(bucket, key, jsonColumn string, ginCfg GINConfig) (*GINIndex, error)
- func (c *S3Client) Exists(bucket, key string) (bool, error)
- func (c *S3Client) GetObjectSize(bucket, key string) (int64, error)
- func (c *S3Client) HasGINIndex(bucket, key string, cfg ParquetConfig) (bool, error)
- func (c *S3Client) HasSidecar(bucket, parquetKey string) (bool, error)
- func (c *S3Client) ListGINFiles(bucket, prefix string) ([]string, error)
- func (c *S3Client) ListParquetFiles(bucket, prefix string) ([]string, error)
- func (c *S3Client) LoadIndex(bucket, parquetKey string, cfg ParquetConfig) (*GINIndex, error)
- func (c *S3Client) OpenParquet(bucket, key string) (*parquet.File, io.ReaderAt, int64, error)
- func (c *S3Client) ReadFile(bucket, key string) ([]byte, error)
- func (c *S3Client) ReadFromParquetMetadata(bucket, key string, cfg ParquetConfig) (*GINIndex, error)
- func (c *S3Client) ReadSidecar(bucket, parquetKey string) (*GINIndex, error)
- func (c *S3Client) WriteFile(bucket, key string, data []byte) error
- func (c *S3Client) WriteSidecar(bucket, parquetKey string, idx *GINIndex) error
- type S3Config
- type SerializedConfig
- type StringIndex
- type StringLengthIndex
- type TransformerID
- type TransformerSpec
- type TrigramIndex
Constants ¶
const ( MagicBytes = "GIN\x01" Version = uint16(3) )
const ( TypeString uint8 = 1 << iota TypeInt TypeFloat TypeBool TypeNull )
const ( FlagBloomOnly uint8 = 1 << iota FlagTrigramIndex // path has trigram index for CONTAINS queries )
const DefaultMetadataKey = "gin.index"
const (
FlagHasDocIDMap uint16 = 1 << iota
)
Variables ¶
var ( // ErrVersionMismatch is returned by Decode when the binary format version // does not match the expected version (Version constant). ErrVersionMismatch = errors.New("version mismatch") // ErrInvalidFormat is returned by Decode when the binary data is structurally // invalid: unrecognized magic bytes, oversized allocations, or corrupt fields. ErrInvalidFormat = errors.New("invalid format") )
Functions ¶
func BoolNormalize ¶
BoolNormalize normalizes various boolean-like values to actual booleans. Handles: bool, "true"/"false"/"yes"/"no"/"1"/"0"/"on"/"off", float64 (0 = false).
func CIDRToRange ¶
CIDRToRange parses a CIDR notation string and returns the start and end IP addresses as float64 values suitable for use with GTE/LTE predicates on IPv4ToInt-transformed fields. Example: CIDRToRange("192.168.1.0/24") returns (3232235776, 3232236031, nil)
func CompressionStats ¶
CompressionRatio returns the compression ratio for a set of terms. Returns (compressed size, original size, ratio).
func DateToEpochMs ¶
DateToEpochMs parses "2006-01-02" format to Unix milliseconds (midnight UTC).
func DurationToMs ¶
DurationToMs parses Go duration strings (e.g., "1h30m", "500ms") to milliseconds.
func EmailDomain ¶
EmailDomain extracts and lowercases the domain from an email address.
func EncodeToMetadata ¶
func EncodeToMetadata(idx *GINIndex, cfg ParquetConfig) (key string, value string, err error)
func EncodeWithLevel ¶
func EncodeWithLevel(idx *GINIndex, level CompressionLevel) ([]byte, error)
EncodeWithLevel serializes the index with the specified compression level. Use CompressionNone (0) for no compression, or 1-19 for zstd compression levels.
func ExtractLiterals ¶
ExtractLiterals extracts literal strings from a regex pattern that can be used for trigram-based candidate selection. Returns a slice of literal alternatives. For patterns like "foo|bar", returns ["foo", "bar"]. For patterns like "(error|warn)_msg", returns ["error_msg", "warn_msg"] (combined).
func ExtractTrigrams ¶
func GenerateBigrams ¶
func GenerateNGrams ¶
func GenerateNGrams(text string, n int, opts ...NGramOption) ([]string, error)
func GenerateTrigrams ¶
func HasGINIndex ¶
func HasGINIndex(parquetFile string, cfg ParquetConfig) (bool, error)
func HasGINIndexReader ¶
func HasSidecar ¶
func ISODateToEpochMs ¶
ISODateToEpochMs parses RFC3339/ISO8601 strings to Unix milliseconds.
func IsDirectory ¶
func IsValidJSONPath ¶
func ListGINFiles ¶
func ListParquetFiles ¶
func MustValidateJSONPath ¶
func NormalizePath ¶
NormalizePath converts a JSONPath to a canonical dot-notation form.
func ParseJSONPath ¶
ParseJSONPath parses and validates a JSONPath, returning the parsed expression.
func ParseS3Path ¶
func RebuildWithIndex ¶
func RebuildWithIndex(parquetFile string, idx *GINIndex, cfg ParquetConfig) error
func SemVerToInt ¶
SemVerToInt encodes semantic versions as integers: major*1000000 + minor*1000 + patch. Supports formats: "1.2.3", "v1.2.3", "1.2", "v1.2", "1.2.3-beta" (pre-release suffix ignored).
func SidecarPath ¶
func ValidateJSONPath ¶
ValidateJSONPath validates a JSONPath expression and ensures it only uses features supported by the GIN index (dot notation, wildcards). Unsupported: array indices [0], filters [?()], recursive descent .., scripts
func WriteCompressedTerms ¶
func WriteCompressedTerms(w io.Writer, blocks []CompressedTermBlock) error
func WriteSidecar ¶
Types ¶
type BloomFilter ¶
type BloomFilter struct {
// contains filtered or unexported fields
}
func BloomFilterFromBits ¶
func BloomFilterFromBits(bits []uint64, numBits uint32, numHashes uint8) *BloomFilter
func MustNewBloomFilter ¶
func MustNewBloomFilter(numBits uint32, numHashes uint8, opts ...BloomFilterOption) *BloomFilter
func NewBloomFilter ¶
func NewBloomFilter(numBits uint32, numHashes uint8, opts ...BloomFilterOption) (*BloomFilter, error)
func (*BloomFilter) Add ¶
func (bf *BloomFilter) Add(data []byte)
func (*BloomFilter) AddString ¶
func (bf *BloomFilter) AddString(s string)
func (*BloomFilter) Bits ¶
func (bf *BloomFilter) Bits() []uint64
func (*BloomFilter) MayContain ¶
func (bf *BloomFilter) MayContain(data []byte) bool
func (*BloomFilter) MayContainString ¶
func (bf *BloomFilter) MayContainString(s string) bool
func (*BloomFilter) NumBits ¶
func (bf *BloomFilter) NumBits() uint32
func (*BloomFilter) NumHashes ¶
func (bf *BloomFilter) NumHashes() uint8
type BloomFilterOption ¶
type BloomFilterOption func(*BloomFilter) error
type BuilderOption ¶
type BuilderOption func(*GINBuilder) error
func WithCodec ¶
func WithCodec(codec DocIDCodec) BuilderOption
type CompressedTermBlock ¶
type CompressedTermBlock struct {
FirstTerm string
Entries []PrefixEntry
}
func ReadCompressedTerms ¶
func ReadCompressedTerms(r io.Reader) ([]CompressedTermBlock, error)
type CompressionLevel ¶
type CompressionLevel int
CompressionLevel specifies the compression level for index serialization.
const ( CompressionNone CompressionLevel = 0 // No compression CompressionFastest CompressionLevel = 1 // zstd level 1 CompressionBalanced CompressionLevel = 3 // zstd level 3 CompressionBetter CompressionLevel = 9 // zstd level 9 CompressionBest CompressionLevel = 15 // zstd level 15 (recommended) CompressionMax CompressionLevel = 19 // zstd level 19 (slow) )
type ConfigOption ¶
func WithBoolNormalizeTransformer ¶
func WithBoolNormalizeTransformer(path string) ConfigOption
func WithCustomDateTransformer ¶
func WithCustomDateTransformer(path, layout string) ConfigOption
func WithDateTransformer ¶
func WithDateTransformer(path string) ConfigOption
func WithDurationTransformer ¶
func WithDurationTransformer(path string) ConfigOption
func WithEmailDomainTransformer ¶
func WithEmailDomainTransformer(path string) ConfigOption
func WithFTSPaths ¶
func WithFTSPaths(paths ...string) ConfigOption
func WithFieldTransformer ¶
func WithFieldTransformer(path string, fn FieldTransformer) ConfigOption
func WithIPv4Transformer ¶
func WithIPv4Transformer(path string) ConfigOption
func WithISODateTransformer ¶
func WithISODateTransformer(path string) ConfigOption
func WithNumericBucketTransformer ¶
func WithNumericBucketTransformer(path string, size float64) ConfigOption
func WithRegexExtractIntTransformer ¶
func WithRegexExtractIntTransformer(path, pattern string, group int) ConfigOption
func WithRegexExtractTransformer ¶
func WithRegexExtractTransformer(path, pattern string, group int) ConfigOption
func WithRegisteredTransformer ¶
func WithRegisteredTransformer(path string, id TransformerID, params []byte) ConfigOption
func WithSemVerTransformer ¶
func WithSemVerTransformer(path string) ConfigOption
func WithToLowerTransformer ¶
func WithToLowerTransformer(path string) ConfigOption
func WithURLHostTransformer ¶
func WithURLHostTransformer(path string) ConfigOption
type CustomDateParams ¶
type CustomDateParams struct {
Layout string `json:"layout"`
}
type DocIDCodec ¶
DocIDCodec encodes/decodes composite information into a single DocID.
type FieldTransformer ¶
FieldTransformer transforms a value before indexing. Returns (transformedValue, ok). If ok=false, original value is indexed.
func CustomDateToEpochMs ¶
func CustomDateToEpochMs(layout string) FieldTransformer
CustomDateToEpochMs returns a transformer for custom date formats.
func NumericBucket ¶
func NumericBucket(size float64) FieldTransformer
NumericBucket returns a transformer that buckets numeric values by size. Example: NumericBucket(100) transforms 150 -> 100, 250 -> 200.
func ReconstructTransformer ¶
func ReconstructTransformer(id TransformerID, params json.RawMessage) (FieldTransformer, error)
func RegexExtract ¶
func RegexExtract(pattern string, group int) FieldTransformer
RegexExtract returns a transformer that extracts a substring via regex capture group. Pattern is compiled once at config time. Group 0 = full match, group 1+ = capture groups.
func RegexExtractInt ¶
func RegexExtractInt(pattern string, group int) FieldTransformer
RegexExtractInt extracts a substring via regex and converts it to float64.
type GINBuilder ¶
type GINBuilder struct {
// contains filtered or unexported fields
}
func NewBuilder ¶
func NewBuilder(config GINConfig, numRGs int, opts ...BuilderOption) (*GINBuilder, error)
func (*GINBuilder) AddDocument ¶
func (b *GINBuilder) AddDocument(docID DocID, jsonDoc []byte) error
func (*GINBuilder) Finalize ¶
func (b *GINBuilder) Finalize() *GINIndex
type GINConfig ¶
type GINConfig struct {
CardinalityThreshold uint32
BloomFilterSize uint32
BloomFilterHashes uint8
EnableTrigrams bool
TrigramMinLength int
HLLPrecision uint8
PrefixBlockSize int
// contains filtered or unexported fields
}
func DefaultConfig ¶
func DefaultConfig() GINConfig
func NewConfig ¶
func NewConfig(opts ...ConfigOption) (GINConfig, error)
type GINIndex ¶
type GINIndex struct {
Header Header
PathDirectory []PathEntry
GlobalBloom *BloomFilter
StringIndexes map[uint16]*StringIndex
NumericIndexes map[uint16]*NumericIndex
NullIndexes map[uint16]*NullIndex
TrigramIndexes map[uint16]*TrigramIndex
StringLengthIndexes map[uint16]*StringLengthIndex
PathCardinality map[uint16]*HyperLogLog
DocIDMapping []DocID
Config *GINConfig
}
func BuildFromParquet ¶
func BuildFromParquetReader ¶
func DecodeFromMetadata ¶
func LoadIndexReader ¶
func NewGINIndex ¶
func NewGINIndex() *GINIndex
func ReadFromParquetMetadata ¶
func ReadFromParquetMetadata(parquetFile string, cfg ParquetConfig) (*GINIndex, error)
func ReadSidecar ¶
func (*GINIndex) MatchingDocIDs ¶
type HyperLogLog ¶
type HyperLogLog struct {
// contains filtered or unexported fields
}
HyperLogLog implements the HyperLogLog algorithm for cardinality estimation. It uses 2^precision registers to estimate the number of distinct elements.
func HyperLogLogFromRegisters ¶
func HyperLogLogFromRegisters(registers []uint8, precision uint8) *HyperLogLog
func MustNewHyperLogLog ¶
func MustNewHyperLogLog(precision uint8, opts ...HyperLogLogOption) *HyperLogLog
func NewHyperLogLog ¶
func NewHyperLogLog(precision uint8, opts ...HyperLogLogOption) (*HyperLogLog, error)
NewHyperLogLog creates a new HyperLogLog with the given precision. Precision must be between 4 and 16. Higher precision = more accuracy but more memory. Memory usage: 2^precision bytes. Standard error: 1.04 / sqrt(m) where m = 2^precision
func (*HyperLogLog) Add ¶
func (hll *HyperLogLog) Add(data []byte)
func (*HyperLogLog) AddString ¶
func (hll *HyperLogLog) AddString(s string)
func (*HyperLogLog) Clear ¶
func (hll *HyperLogLog) Clear()
func (*HyperLogLog) Clone ¶
func (hll *HyperLogLog) Clone() *HyperLogLog
func (*HyperLogLog) Estimate ¶
func (hll *HyperLogLog) Estimate() uint64
func (*HyperLogLog) Merge ¶
func (hll *HyperLogLog) Merge(other *HyperLogLog)
func (*HyperLogLog) Precision ¶
func (hll *HyperLogLog) Precision() uint8
func (*HyperLogLog) Registers ¶
func (hll *HyperLogLog) Registers() []uint8
type HyperLogLogOption ¶
type HyperLogLogOption func(*HyperLogLog) error
type IdentityCodec ¶
type IdentityCodec struct{}
IdentityCodec treats the position as the DocID (1:1 mapping).
func NewIdentityCodec ¶
func NewIdentityCodec() *IdentityCodec
func (*IdentityCodec) Decode ¶
func (c *IdentityCodec) Decode(docID DocID) []int
func (*IdentityCodec) Encode ¶
func (c *IdentityCodec) Encode(indices ...int) DocID
func (*IdentityCodec) Name ¶
func (c *IdentityCodec) Name() string
type JSONPathError ¶
func (*JSONPathError) Error ¶
func (e *JSONPathError) Error() string
type NGramConfig ¶
type NGramOption ¶
type NGramOption func(*NGramConfig) error
func WithN ¶
func WithN(n int) NGramOption
func WithPadding ¶
func WithPadding(pad string) NGramOption
type NumericBucketParams ¶
type NumericBucketParams struct {
Size float64 `json:"size"`
}
type NumericIndex ¶
type NumericIndex struct {
ValueType uint8
GlobalMin float64
GlobalMax float64
RGStats []RGNumericStat
}
type ParquetConfig ¶
type ParquetConfig struct {
MetadataKey string
}
func DefaultParquetConfig ¶
func DefaultParquetConfig() ParquetConfig
type ParquetIndexWriter ¶
type ParquetIndexWriter struct {
// contains filtered or unexported fields
}
func NewParquetIndexWriter ¶
func NewParquetIndexWriter(w io.Writer, schema *parquet.Schema, jsonColumn string, numRowGroups int, ginConfig GINConfig, pqConfig ParquetConfig) (*ParquetIndexWriter, error)
type Predicate ¶
type PrefixCompressor ¶
type PrefixCompressor struct {
// contains filtered or unexported fields
}
PrefixCompressor implements front-coding compression for sorted string lists. Each string is stored as: shared prefix length + suffix. This works well for sorted terms that share common prefixes.
func MustNewPrefixCompressor ¶
func MustNewPrefixCompressor(blockSize int, opts ...PrefixCompressorOption) *PrefixCompressor
func NewPrefixCompressor ¶
func NewPrefixCompressor(blockSize int, opts ...PrefixCompressorOption) (*PrefixCompressor, error)
func (*PrefixCompressor) BlockSize ¶
func (pc *PrefixCompressor) BlockSize() int
func (*PrefixCompressor) Compress ¶
func (pc *PrefixCompressor) Compress(terms []string) []CompressedTermBlock
func (*PrefixCompressor) Decompress ¶
func (pc *PrefixCompressor) Decompress(blocks []CompressedTermBlock) []string
type PrefixCompressorOption ¶
type PrefixCompressorOption func(*PrefixCompressor) error
type PrefixEntry ¶
type RGNumericStat ¶
type RGSet ¶
type RGSet struct {
NumRGs int
// contains filtered or unexported fields
}
func MustNewRGSet ¶
func MustNewRGSet(numRGs int, opts ...RGSetOption) *RGSet
type RGSetOption ¶
type RGStringLengthStat ¶
type RegexLiteralInfo ¶
type RegexLiteralInfo struct {
Literals []string // Extracted literal strings
HasWildcard bool // Pattern contains unbounded wildcards
MinLength int // Minimum length of any literal
}
RegexLiteralInfo contains extracted information from a regex pattern
func AnalyzeRegex ¶
func AnalyzeRegex(pattern string) (*RegexLiteralInfo, error)
AnalyzeRegex extracts literals and metadata from a regex pattern
type RegexParams ¶
type RowGroupCodec ¶
type RowGroupCodec struct {
// contains filtered or unexported fields
}
RowGroupCodec encodes file index and row group index into a DocID. Layout: DocID = fileIndex * rowGroupsPerFile + rgIndex
func NewRowGroupCodec ¶
func NewRowGroupCodec(rowGroupsPerFile int) *RowGroupCodec
func (*RowGroupCodec) Decode ¶
func (c *RowGroupCodec) Decode(docID DocID) []int
func (*RowGroupCodec) Encode ¶
func (c *RowGroupCodec) Encode(indices ...int) DocID
func (*RowGroupCodec) Name ¶
func (c *RowGroupCodec) Name() string
func (*RowGroupCodec) RowGroupsPerFile ¶
func (c *RowGroupCodec) RowGroupsPerFile() int
type S3Client ¶
type S3Client struct {
// contains filtered or unexported fields
}
func NewS3Client ¶
func NewS3ClientFromEnv ¶
func (*S3Client) BuildFromParquet ¶
func (*S3Client) GetObjectSize ¶
func (*S3Client) HasGINIndex ¶
func (c *S3Client) HasGINIndex(bucket, key string, cfg ParquetConfig) (bool, error)
func (*S3Client) HasSidecar ¶
func (*S3Client) ListGINFiles ¶
func (*S3Client) ListParquetFiles ¶
func (*S3Client) LoadIndex ¶
func (c *S3Client) LoadIndex(bucket, parquetKey string, cfg ParquetConfig) (*GINIndex, error)
func (*S3Client) OpenParquet ¶
func (*S3Client) ReadFromParquetMetadata ¶
func (c *S3Client) ReadFromParquetMetadata(bucket, key string, cfg ParquetConfig) (*GINIndex, error)
func (*S3Client) ReadSidecar ¶
type S3Config ¶
type S3Config struct {
Endpoint string
Region string
AccessKey string
SecretKey string
PathStyle bool
}
func S3ConfigFromEnv ¶
func S3ConfigFromEnv() S3Config
type SerializedConfig ¶
type SerializedConfig struct {
BloomFilterSize uint32 `json:"bloom_filter_size"`
BloomFilterHashes uint8 `json:"bloom_filter_hashes"`
EnableTrigrams bool `json:"enable_trigrams"`
TrigramMinLength int `json:"trigram_min_length"`
HLLPrecision uint8 `json:"hll_precision"`
PrefixBlockSize int `json:"prefix_block_size"`
FTSPaths []string `json:"fts_paths,omitempty"`
Transformers []TransformerSpec `json:"transformers,omitempty"`
}
type StringIndex ¶
type StringLengthIndex ¶
type StringLengthIndex struct {
GlobalMin uint32
GlobalMax uint32
RGStats []RGStringLengthStat
}
type TransformerID ¶
type TransformerID uint8
const ( TransformerUnknown TransformerID = iota TransformerISODateToEpochMs TransformerDateToEpochMs TransformerCustomDateToEpochMs TransformerToLower TransformerIPv4ToInt TransformerSemVerToInt TransformerRegexExtract TransformerRegexExtractInt TransformerDurationToMs TransformerEmailDomain TransformerURLHost TransformerNumericBucket TransformerBoolNormalize )
type TransformerSpec ¶
type TransformerSpec struct {
Path string `json:"path"`
ID TransformerID `json:"id"`
Name string `json:"name"`
Params json.RawMessage `json:"params,omitempty"`
}
func NewTransformerSpec ¶
func NewTransformerSpec(path string, id TransformerID, params json.RawMessage) TransformerSpec
type TrigramIndex ¶
type TrigramIndex struct {
Trigrams map[string]*RGSet
NumRGs int
N int
Padding string
MinLength int
}
func NewTrigramIndex ¶
func NewTrigramIndex(numRGs int, opts ...NGramOption) (*TrigramIndex, error)
func (*TrigramIndex) Add ¶
func (ti *TrigramIndex) Add(value string, rgID int)
func (*TrigramIndex) Search ¶
func (ti *TrigramIndex) Search(pattern string) *RGSet
func (*TrigramIndex) TrigramCount ¶
func (ti *TrigramIndex) TrigramCount() int
Source Files
¶
Directories
¶
| Path | Synopsis |
|---|---|
|
cmd
|
|
|
gin-index
command
|
|
|
examples
|
|
|
basic
command
Example: Basic GIN index usage with equality queries
|
Example: Basic GIN index usage with equality queries |
|
full
command
Example: Comprehensive GIN index usage demonstrating all index types and query operators
|
Example: Comprehensive GIN index usage demonstrating all index types and query operators |
|
fulltext
command
Example: Full-text search with trigram index (CONTAINS queries)
|
Example: Full-text search with trigram index (CONTAINS queries) |
|
nested
command
Example: Nested JSON objects and arrays
|
Example: Nested JSON objects and arrays |
|
null
command
Example: NULL handling queries
|
Example: NULL handling queries |
|
parquet
command
|
|
|
range
command
Example: Numeric range queries with GIN index
|
Example: Numeric range queries with GIN index |
|
regex
command
Example: Regex pattern matching with trigram-based candidate selection
|
Example: Regex pattern matching with trigram-based candidate selection |
|
serialize
command
Example: Serializing and deserializing GIN index
|
Example: Serializing and deserializing GIN index |
|
transformers
command
Example: Field transformers for date indexing
|
Example: Field transformers for date indexing |
|
transformers-advanced
command
Example: Advanced field transformers for IP ranges, semantic versions, emails, and regex extraction
|
Example: Advanced field transformers for IP ranges, semantic versions, emails, and regex extraction |