indexer

package module
v0.1.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 30, 2025 License: MIT Imports: 11 Imported by: 0

Documentation

Index

Constants

View Source
const (
	HolderNameDefault   = "default"
	HolderNameACMatcher = "ac_matcher"
)
View Source
const (
	NULLENTRY EntryID = 0xFFFFFFFFFFFFFFFF

	MaxBEFieldID uint64 = 0xFF             // 8bit
	MaxBEValueID uint64 = 0xFFFFFFFFFFFFFF // 56bit

)
View Source
const (
	DebugLevel = iota
	InfoLevel
	ErrorLevel
)
View Source
const (
	LinearSearchLengthThreshold = 8
)
View Source
const (
	WildcardFieldName = BEField("_Z_")
)

Variables

View Source
var (
	LogLevel int           = InfoLevel // control defaultLogger log level
	Logger   BEIndexLogger = &DefaultLogger{}
)

Functions

func HasHolderBuilder

func HasHolderBuilder(name string) bool

func LogDebugIf

func LogDebugIf(condition bool, format string, v ...interface{})

func LogErrIf

func LogErrIf(condition bool, format string, v ...interface{})

func LogIfErr

func LogIfErr(err error, format string, v ...interface{})

func LogInfoIf

func LogInfoIf(condition bool, format string, v ...interface{})

func NewInt32Values

func NewInt32Values(o ...int32) (res []interface{})

func NewInt32Values2

func NewInt32Values2(v int32, o ...int32) (res []interface{})

func NewInt64Values

func NewInt64Values(o ...int64) (res []interface{})

func NewInt64Values2

func NewInt64Values2(v int64, o ...int64) (res []interface{})

func NewIntValues

func NewIntValues(o ...int) (res []interface{})

func NewIntValues2

func NewIntValues2(v int, o ...int) (res []interface{})

func NewStrValues

func NewStrValues(ss ...string) (res []interface{})

func NewStrValues2

func NewStrValues2(v string, ss ...string) (res []interface{})

func PutCollector

func PutCollector(c *DocIDCollector)

func RegisterEntriesHolder

func RegisterEntriesHolder(name string, builder HolderBuilder) error

Types

type ACEntriesHolder

type ACEntriesHolder struct {
	ACHolderOption

	KeepBuilder bool
	// contains filtered or unexported fields
}

func NewACEntriesHolder

func NewACEntriesHolder(option ACHolderOption) *ACEntriesHolder

NewACEntriesHolder it will default drop the builder after compile ac-machine, you can register a customized ACEntriesHolder(with builder detail), and the register it

RegisterEntriesHolder(HolderNameACMatcher, func() EntriesHolder {
    holder := NewACEntriesHolder()
    holder.KeepBuilder = true
    return holder
})

NOTE: this just for debugging usage, it will consume memory much more

func (*ACEntriesHolder) AddFieldEID

func (h *ACEntriesHolder) AddFieldEID(field *fieldDesc, values Values, eid EntryID) error

func (*ACEntriesHolder) CompileEntries

func (h *ACEntriesHolder) CompileEntries()

func (*ACEntriesHolder) DumpEntries

func (h *ACEntriesHolder) DumpEntries(buffer *strings.Builder)

func (*ACEntriesHolder) EnableDebug

func (h *ACEntriesHolder) EnableDebug(debug bool)

func (*ACEntriesHolder) GetEntries

func (h *ACEntriesHolder) GetEntries(field *fieldDesc, assigns Values) (CursorGroup, error)

type ACHolderOption

type ACHolderOption struct {
	// QuerySep 查询时,当存在多个值时,使用什么分隔符拼接多个查询字段来组成查询语句, 默认使用whitespace
	// 这是因为在语义上'空'更符合逻辑表达的正确性
	QuerySep string
}

type Assignments

type Assignments map[BEField]Values

func (Assignments) Size

func (ass Assignments) Size() (size int)

type BEField

type BEField string

type BEIndex

type BEIndex interface {

	// Retrieve scan index data and retrieve satisfied document
	Retrieve(queries Assignments, opt ...IndexOpt) (DocIDList, error)

	// RetrieveWithCollector scan index data and retrieve satisfied document
	RetrieveWithCollector(Assignments, ResultCollector, ...IndexOpt) error

	// DumpEntries debug api
	DumpEntries() string

	DumpEntriesSummary() string
	// contains filtered or unexported methods
}

func NewSizeGroupedBEIndex

func NewSizeGroupedBEIndex() BEIndex

type BEIndexLogger

type BEIndexLogger interface {
	Debugf(format string, v ...interface{})
	Infof(format string, v ...interface{})
	Errorf(format string, v ...interface{})
}

type BoolValues

type BoolValues struct {
	Incl  bool   `json:"inc"`   // include: true exclude: false
	Value Values `json:"value"` // values can be parser parse to id
}

BoolValues expression a bool logic like: (in) [15,16,17], (not in) [shanghai,yz]

func (*BoolValues) JSONString

func (v *BoolValues) JSONString() string

func (*BoolValues) String

func (v *BoolValues) String() string

type BooleanExpr

type BooleanExpr struct {
	BoolValues
	Field BEField `json:"field"`
}

BooleanExpr expression a bool logic like: age (in) [15,16,17], city (not in) [shanghai,yz]

func NewBoolExpr

func NewBoolExpr(field BEField, inc bool, v Values) *BooleanExpr

func NewBoolExpr2

func NewBoolExpr2(field BEField, expr BoolValues) *BooleanExpr

type CompactedBEIndex

type CompactedBEIndex struct {
	// contains filtered or unexported fields
}

func NewCompactedBEIndex

func NewCompactedBEIndex() *CompactedBEIndex

func (*CompactedBEIndex) DumpEntries

func (bi *CompactedBEIndex) DumpEntries() string

func (*CompactedBEIndex) DumpEntriesSummary

func (bi *CompactedBEIndex) DumpEntriesSummary() string

func (*CompactedBEIndex) Retrieve

func (bi *CompactedBEIndex) Retrieve(
	queries Assignments, opts ...IndexOpt) (result DocIDList, err error)

func (*CompactedBEIndex) RetrieveWithCollector

func (bi *CompactedBEIndex) RetrieveWithCollector(
	queries Assignments, collector ResultCollector, opts ...IndexOpt) (err error)

type ConjID

type ConjID uint64

ConjID max support 56bit len |--[(reserved(16)) | size(8bit) | index(8bit) | docID(32bit)]

func NewConjID

func NewConjID(docID DocID, index, size int) ConjID

NewConjID (reserved(16))| size(8bit) | index(8bit) | docID(32bit)

func (ConjID) DocID

func (id ConjID) DocID() DocID

func (ConjID) Index

func (id ConjID) Index() int

func (ConjID) Size

func (id ConjID) Size() int

type Conjunction

type Conjunction struct {
	Expressions map[BEField]*BoolValues `json:"exprs"` // 同一个Conj内不允许重复的Field
}

func NewConjunction

func NewConjunction() *Conjunction

func (*Conjunction) AddBoolExpr

func (conj *Conjunction) AddBoolExpr(expr *BooleanExpr) *Conjunction

func (*Conjunction) AddBoolExprs

func (conj *Conjunction) AddBoolExprs(exprs ...*BooleanExpr)

AddBoolExprs append boolean expression, don't allow same field added twice in one conjunction

func (*Conjunction) AddExpression

func (conj *Conjunction) AddExpression(expr *BooleanExpr) *Conjunction

func (*Conjunction) AddExpression3

func (conj *Conjunction) AddExpression3(field string, include bool, values Values) *Conjunction

func (*Conjunction) CalcConjSize

func (conj *Conjunction) CalcConjSize() (size int)

func (*Conjunction) Exclude

func (conj *Conjunction) Exclude(field BEField, values Values) *Conjunction

func (*Conjunction) In

func (conj *Conjunction) In(field BEField, values Values) *Conjunction

In any value in values is a **true** expression

func (*Conjunction) Include

func (conj *Conjunction) Include(field BEField, values Values) *Conjunction

func (*Conjunction) JSONString

func (conj *Conjunction) JSONString() string

func (*Conjunction) NotIn

func (conj *Conjunction) NotIn(field BEField, values Values) *Conjunction

NotIn any value in values is a **false** expression

func (*Conjunction) String

func (conj *Conjunction) String() string

type CursorGroup

type CursorGroup []*EntriesCursor

func (CursorGroup) DumpEntries

func (cg CursorGroup) DumpEntries(sb *strings.Builder)

type DefaultEntriesHolder

type DefaultEntriesHolder struct {
	// contains filtered or unexported fields
}

DefaultEntriesHolder EntriesHolder implement base on hash map holder map<key, Entries>

func (*DefaultEntriesHolder) AddFieldEID

func (h *DefaultEntriesHolder) AddFieldEID(field *fieldDesc, values Values, eid EntryID) (err error)

func (*DefaultEntriesHolder) AppendEntryID

func (h *DefaultEntriesHolder) AppendEntryID(key Key, id EntryID)

func (*DefaultEntriesHolder) CompileEntries

func (h *DefaultEntriesHolder) CompileEntries()

func (*DefaultEntriesHolder) DumpEntries

func (h *DefaultEntriesHolder) DumpEntries(buffer *strings.Builder)

func (*DefaultEntriesHolder) EnableDebug

func (h *DefaultEntriesHolder) EnableDebug(debug bool)

func (*DefaultEntriesHolder) GetEntries

func (h *DefaultEntriesHolder) GetEntries(field *fieldDesc, assigns Values) (r CursorGroup, e error)

type DefaultLogger

type DefaultLogger struct {
}

DefaultLogger a console logger use fmt lib

func (*DefaultLogger) Debugf

func (l *DefaultLogger) Debugf(format string, v ...interface{})

func (*DefaultLogger) Errorf

func (l *DefaultLogger) Errorf(format string, v ...interface{})

func (*DefaultLogger) Infof

func (l *DefaultLogger) Infof(format string, v ...interface{})

type DocID

type DocID int64

type DocIDCollector

type DocIDCollector struct {
	// contains filtered or unexported fields
}

DocIDCollector Default Collector with removing duplicated doc

func NewDocIDCollector

func NewDocIDCollector() *DocIDCollector

func PickCollector

func PickCollector() *DocIDCollector

func (*DocIDCollector) Add

func (c *DocIDCollector) Add(docID DocID, _ ConjID)

func (*DocIDCollector) DocCount

func (c *DocIDCollector) DocCount() int

func (*DocIDCollector) GetDocIDs

func (c *DocIDCollector) GetDocIDs() (ids DocIDList)

func (*DocIDCollector) GetDocIDsInto

func (c *DocIDCollector) GetDocIDsInto(ids *DocIDList)

func (*DocIDCollector) Reset

func (c *DocIDCollector) Reset()

type DocIDList

type DocIDList []DocID

func (DocIDList) Contain

func (s DocIDList) Contain(id DocID) bool

func (DocIDList) Len

func (s DocIDList) Len() int

Len sort API

func (DocIDList) Less

func (s DocIDList) Less(i, j int) bool

func (DocIDList) Sub

func (s DocIDList) Sub(other DocIDList) (r DocIDList)

func (DocIDList) Swap

func (s DocIDList) Swap(i, j int)

type Document

type Document struct {
	ID   DocID          `json:"id"`   // 只支持int32最大值个Doc
	Cons []*Conjunction `json:"cons"` // conjunction之间的关系是或,具体描述可以看论文的表述
}

func NewDocument

func NewDocument(id DocID) *Document

func (*Document) AddConjunction

func (doc *Document) AddConjunction(cons ...*Conjunction)

AddConjunction 一组完整的expression, 必须是完整一个描述文档的DNF Bool表达的条件组合*/

func (*Document) AddConjunctions

func (doc *Document) AddConjunctions(conj *Conjunction, others ...*Conjunction)

func (*Document) JSONString

func (doc *Document) JSONString() string

func (*Document) String

func (doc *Document) String() string

String a more compacted string

type Entries

type Entries []EntryID

Entries a type define for sort option

func (Entries) DocString

func (s Entries) DocString() []string

func (Entries) Len

func (s Entries) Len() int

Len Entries sort API

func (Entries) Less

func (s Entries) Less(i, j int) bool

func (Entries) Swap

func (s Entries) Swap(i, j int)

type EntriesContainer

type EntriesContainer struct {
	// contains filtered or unexported fields
}

EntriesContainer for default Entries Holder, it can hold different field's entries, but for ACMatcher or other Holder, it may only hold entries for one field

func (*EntriesContainer) DumpString

func (c *EntriesContainer) DumpString(buf *strings.Builder)

type EntriesCursor

type EntriesCursor struct {
	// contains filtered or unexported fields
}

EntriesCursor represent a posting list for one Assign (age, 15): [1, 2, 5, 19, 22] cursor: ^

func NewEntriesCursor

func NewEntriesCursor(key QKey, entries Entries) *EntriesCursor

func (*EntriesCursor) DumpEntries

func (cur *EntriesCursor) DumpEntries(sb *strings.Builder)

func (*EntriesCursor) GetCurEntryID

func (sc *EntriesCursor) GetCurEntryID() EntryID

func (*EntriesCursor) LinearSkip

func (sc *EntriesCursor) LinearSkip(id EntryID) EntryID

func (*EntriesCursor) LinearSkipTo

func (sc *EntriesCursor) LinearSkipTo(id EntryID) EntryID

func (*EntriesCursor) Skip

func (sc *EntriesCursor) Skip(id EntryID) EntryID

func (*EntriesCursor) SkipTo

func (sc *EntriesCursor) SkipTo(id EntryID) EntryID

type EntriesHolder

type EntriesHolder interface {
	EnableDebug(debug bool)

	DumpEntries(buffer *strings.Builder)

	// CompileEntries finalize entries status for query, build or make sorted
	// according to the paper, entries must be sorted
	CompileEntries()

	GetEntries(field *fieldDesc, assigns Values) (CursorGroup, error)

	// AddFieldEID tokenize values and add it to holder container
	AddFieldEID(field *fieldDesc, values Values, eid EntryID) error
}

func NewDefaultEntriesHolder

func NewDefaultEntriesHolder() EntriesHolder

func NewEntriesHolder

func NewEntriesHolder(name string) EntriesHolder

type EntryID

type EntryID uint64

EntryID [-- ConjID(48bit) --|-- empty(15bit) -- | --incl/excl(1bit) --]

|--[(reserved(16)) | size(8bit) | index(8bit)  | docID(32bit)]

func NewEntryID

func NewEntryID(id ConjID, incl bool) EntryID

NewEntryID |-- ConjID(48bit) --|-- empty(15bit) -- | --incl/excl(1bit) --|

func (EntryID) DocString

func (entry EntryID) DocString() string

func (EntryID) GetConjID

func (entry EntryID) GetConjID() ConjID

func (EntryID) IsExclude

func (entry EntryID) IsExclude() bool

func (EntryID) IsInclude

func (entry EntryID) IsInclude() bool

func (EntryID) IsNULLEntry

func (entry EntryID) IsNULLEntry() bool

type FieldCursor

type FieldCursor struct {
	// contains filtered or unexported fields
}

FieldCursor for a boolean expression: {"tag", "in", [1, 2, 3]} tag_2: [ID5] tag_1: [ID1, ID2, ID7]

func NewFieldCursor

func NewFieldCursor(cursors ...*EntriesCursor) *FieldCursor

func (*FieldCursor) AddPostingList

func (sg *FieldCursor) AddPostingList(cursor *EntriesCursor)

func (*FieldCursor) DumpEntries

func (sg *FieldCursor) DumpEntries() string

func (*FieldCursor) GetCurConjID

func (sg *FieldCursor) GetCurConjID() ConjID

func (*FieldCursor) GetCurEntryID

func (sg *FieldCursor) GetCurEntryID() EntryID

func (*FieldCursor) ReachEnd

func (sg *FieldCursor) ReachEnd() bool

func (*FieldCursor) Skip

func (sg *FieldCursor) Skip(id EntryID) (newMin EntryID)

func (*FieldCursor) SkipTo

func (sg *FieldCursor) SkipTo(id EntryID) (newMin EntryID)

type FieldCursors

type FieldCursors []*FieldCursor

func (FieldCursors) Dump

func (s FieldCursors) Dump() string

func (FieldCursors) DumpCurrent

func (s FieldCursors) DumpCurrent() string

func (FieldCursors) Len

func (s FieldCursors) Len() int

Len FieldCursors sort API

func (FieldCursors) Less

func (s FieldCursors) Less(i, j int) bool

func (FieldCursors) Sort

func (s FieldCursors) Sort()

Sort golang's internal sort.Sort method have obvious overhead in performance.(runtime convTSlice) so here use a simple insert sort replace it. bz not much Element, may another quickSort here later

func (FieldCursors) Swap

func (s FieldCursors) Swap(i, j int)

type FieldOption

type FieldOption struct {
	Parser parser.FieldValueParser // will create a default parser if parser is nil

	Container string // specify Entries holder for all tokenized value Entries
}

type HolderBuilder

type HolderBuilder func() EntriesHolder

type IndexOpt

type IndexOpt func(ctx *retrieveContext)

func WithCollector

func WithCollector(fn ResultCollector) IndexOpt

WithCollector specify a user defined collector

func WithDumpEntries

func WithDumpEntries() IndexOpt

func WithStepDetail

func WithStepDetail() IndexOpt

type IndexerBuilder

type IndexerBuilder struct {
	// contains filtered or unexported fields
}

func NewCompactIndexerBuilder

func NewCompactIndexerBuilder() *IndexerBuilder

func NewIndexerBuilder

func NewIndexerBuilder() *IndexerBuilder

func (*IndexerBuilder) AddDocument

func (b *IndexerBuilder) AddDocument(doc *Document) error

func (*IndexerBuilder) BuildIndex

func (b *IndexerBuilder) BuildIndex() BEIndex

func (*IndexerBuilder) ConfigField

func (b *IndexerBuilder) ConfigField(field BEField, settings FieldOption)

type IndexerSettings

type IndexerSettings struct {
	FieldConfig map[BEField]FieldOption
}

type Key

type Key uint64

Key is the term represent field and its value, eg: <age,15> <field-8bit> | <value-56bit>

func NewKey

func NewKey(fieldID uint64, valueID uint64) Key

NewKey API

func (Key) GetFieldID

func (key Key) GetFieldID() uint64

func (Key) GetValueID

func (key Key) GetValueID() uint64

func (Key) String

func (key Key) String() string

type QKey

type QKey struct {
	// contains filtered or unexported fields
}

func (*QKey) String

func (key *QKey) String() string

type ResultCollector

type ResultCollector interface {
	Add(id DocID, conj ConjID)

	GetDocIDs() (ids DocIDList)

	GetDocIDsInto(ids *DocIDList)
}

type SizeGroupedBEIndex

type SizeGroupedBEIndex struct {
	// contains filtered or unexported fields
}

func (*SizeGroupedBEIndex) DumpEntries

func (bi *SizeGroupedBEIndex) DumpEntries() string

func (*SizeGroupedBEIndex) DumpEntriesSummary

func (bi *SizeGroupedBEIndex) DumpEntriesSummary() string

func (*SizeGroupedBEIndex) Retrieve

func (bi *SizeGroupedBEIndex) Retrieve(
	queries Assignments, opts ...IndexOpt) (result DocIDList, err error)

func (*SizeGroupedBEIndex) RetrieveWithCollector

func (bi *SizeGroupedBEIndex) RetrieveWithCollector(
	queries Assignments, collector ResultCollector, opts ...IndexOpt) (err error)

type Values

type Values []interface{}

func NewValues

func NewValues(o ...interface{}) (res Values)

NewValues panic if invalid value

func NewValues2

func NewValues2(v interface{}, o ...interface{}) (res Values)

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL