fst

package
v0.0.0-...-9649366 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 21, 2019 License: Apache-2.0 Imports: 27 Imported by: 0

README

FS Segment

  • Version 1.1: Adds support for a metadata proto object per Field. This is used to store an additional postings offset per Field to a PostingsList comprising the union of all known PostingsList across all known Terms per Field.
┌───────────────────────────────┐            ┌──────────────────────────────────────┐
│ FST Fields File               │            │ FST Terms File                       │
│-------------------------------│            │--------------------------------------│
│- Vellum V1 Format             │            │`n` records, each:                    │
│- []byte -> FST Terms Offset   ├─────┐      │  - metadata proto (`md-size` bytes)  │
└───────────────────────────────┘     │      │  - md-size (int64)                   │
                                      │      │  - fst payload (`fst size` bytes)    │
                                      │      │  - fst size (int64)                  │
                                      └─────▶│  - magic number (int64)              │
                                             │                                      │
                                             │Payload:                              │
                                             │(1) Vellum V1 FST                     ├─┐
                                             │[]byte -> Postings Offset             │ │
                                             │                                      │ │
                                             │(2) Metadata Proto Bytes              │ │
                                             │Field Postings Offset                 │ │
                                             └──────────────────────────────────────┘ │
                                                   ┌───────────────────────────────┐  │
                                                   │ Postings Data File            │  │
                                                   │-------------------------------│  │
                                                   │`n` records, each:             │  │
                                                   │  - payload (`size` bytes)     │  │
                                                   │  - size (int64)               │  │
                                                   │  - magic number (int64)       │◀─┘
                                                   │                               │
                                                   │Payload:                       │
                                                   │- Pilosa Bitset                ├──┐
            ┌───────────────────────────┐          │- List of doc.ID               │  │
            │ Documents Data File       │          └───────────────────────────────┘  │
            │-------------------------  │                                             │
            │'n' records, each:         │                ┌─────────────────────────┐  │
            │  - Magic Number (int64)   │                │ Documents Index File    │  │
            │  - Valid (1 byte)         │                │-------------------------│  │
            │  - Size (int64)           │                │- Magic Number (int64)   │  │
            │  - Payload (`size` bytes) │                │- Num docs (int64)       │  │
            └───────────────────────────┘        ┌───────│- Base Doc.ID `b` (int64)│◀─┘
                          ▲                      │       │- Doc `b` offset (int64) │
                          │                      │       │- Doc `b+1` offset       │
                          └──────────────────────┘       │...                      │
                                                         │- Doc `b+n-1` offset     │
                                                         └─────────────────────────┘

  • Version 1.0: Initial Release.

┌───────────────────────────────┐           ┌───────────────────────────────┐
│ FST Fields File               │           │ FST Terms File                │
│-------------------------------│           │-------------------------------│
│- Vellum V1 FST                │           │`n` records, each:             │
│- []byte -> FST Terms Offset   │─────┐     │  - payload (`size` bytes)     │
└───────────────────────────────┘     │     │  - size (int64)               │
                                      └────▶│  - magic number (int64)       │
                                            │                               │
                                            │Payload:                       │
                                            │- Vellum V1 FST                │
                                            │- []byte -> Postings Offset    │
                                            └───────────────────────────────┘
        ┌───────────────────────────────┐                   │
        │ Postings Data File            │                   │
        │-------------------------------│                   │
        │`n` records, each:             │                   │
        │  - payload (`size` bytes)     │                   │
        │  - size (int64)               │                   │
        │  - magic number (int64)       │◀──────────────────┘
        │                               │
        │Payload:                       │
        │- Pilosa Bitset                │
        │- List of doc.ID               │
        └──────────┬────────────────────┘
                   │
                   │
                   │
                   │       ┌──────────────────────────┐           ┌───────────────────────────┐
                   │       │ Documents Index File     │           │ Documents Data File       │
                   │       │--------------------------│           │-------------------------  │
                   │       │- Base Doc.ID `b` (uint64)│           │'n' records, each:         │
                   │       │- Doc `b` offset (uint64) │    ┌─────▶│  - ID (bytes)             │
                   │       │- Doc `b+1` offset        │    │      │  - Fields (bytes)         │
                   └──────▶│...                       ├────┘      └───────────────────────────┘
                           │- Doc `b+n-1` offset      │
                           └──────────────────────────┘

Documentation

Overview

Package fst is a generated GoMock package.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func ToTestSegment

func ToTestSegment(t *testing.T, s sgmt.MutableSegment, opts Options) sgmt.Segment

ToTestSegment returns a FST segment equivalent to the provide mutable segment.

Types

type MockSegment

type MockSegment struct {
	// contains filtered or unexported fields
}

MockSegment is a mock of Segment interface

func NewMockSegment

func NewMockSegment(ctrl *gomock.Controller) *MockSegment

NewMockSegment creates a new mock instance

func (*MockSegment) AllDocs

func (m *MockSegment) AllDocs() (index.IDDocIterator, error)

AllDocs mocks base method

func (*MockSegment) Close

func (m *MockSegment) Close() error

Close mocks base method

func (*MockSegment) ContainsField

func (m *MockSegment) ContainsField(arg0 []byte) (bool, error)

ContainsField mocks base method

func (*MockSegment) ContainsID

func (m *MockSegment) ContainsID(arg0 []byte) (bool, error)

ContainsID mocks base method

func (*MockSegment) Doc

func (m *MockSegment) Doc(arg0 postings.ID) (doc.Document, error)

Doc mocks base method

func (*MockSegment) Docs

func (m *MockSegment) Docs(arg0 postings.List) (doc.Iterator, error)

Docs mocks base method

func (*MockSegment) EXPECT

func (m *MockSegment) EXPECT() *MockSegmentMockRecorder

EXPECT returns an object that allows the caller to indicate expected use

func (*MockSegment) FieldsIterable

func (m *MockSegment) FieldsIterable() segment.FieldsIterable

FieldsIterable mocks base method

func (*MockSegment) MatchAll

func (m *MockSegment) MatchAll() (postings.MutableList, error)

MatchAll mocks base method

func (*MockSegment) MatchField

func (m *MockSegment) MatchField(arg0 []byte) (postings.List, error)

MatchField mocks base method

func (*MockSegment) MatchRegexp

func (m *MockSegment) MatchRegexp(arg0 []byte, arg1 index.CompiledRegex) (postings.List, error)

MatchRegexp mocks base method

func (*MockSegment) MatchTerm

func (m *MockSegment) MatchTerm(arg0, arg1 []byte) (postings.List, error)

MatchTerm mocks base method

func (*MockSegment) Reader

func (m *MockSegment) Reader() (index.Reader, error)

Reader mocks base method

func (*MockSegment) Size

func (m *MockSegment) Size() int64

Size mocks base method

func (*MockSegment) TermsIterable

func (m *MockSegment) TermsIterable() segment.TermsIterable

TermsIterable mocks base method

type MockSegmentMockRecorder

type MockSegmentMockRecorder struct {
	// contains filtered or unexported fields
}

MockSegmentMockRecorder is the mock recorder for MockSegment

func (*MockSegmentMockRecorder) AllDocs

func (mr *MockSegmentMockRecorder) AllDocs() *gomock.Call

AllDocs indicates an expected call of AllDocs

func (*MockSegmentMockRecorder) Close

func (mr *MockSegmentMockRecorder) Close() *gomock.Call

Close indicates an expected call of Close

func (*MockSegmentMockRecorder) ContainsField

func (mr *MockSegmentMockRecorder) ContainsField(arg0 interface{}) *gomock.Call

ContainsField indicates an expected call of ContainsField

func (*MockSegmentMockRecorder) ContainsID

func (mr *MockSegmentMockRecorder) ContainsID(arg0 interface{}) *gomock.Call

ContainsID indicates an expected call of ContainsID

func (*MockSegmentMockRecorder) Doc

func (mr *MockSegmentMockRecorder) Doc(arg0 interface{}) *gomock.Call

Doc indicates an expected call of Doc

func (*MockSegmentMockRecorder) Docs

func (mr *MockSegmentMockRecorder) Docs(arg0 interface{}) *gomock.Call

Docs indicates an expected call of Docs

func (*MockSegmentMockRecorder) FieldsIterable

func (mr *MockSegmentMockRecorder) FieldsIterable() *gomock.Call

FieldsIterable indicates an expected call of FieldsIterable

func (*MockSegmentMockRecorder) MatchAll

func (mr *MockSegmentMockRecorder) MatchAll() *gomock.Call

MatchAll indicates an expected call of MatchAll

func (*MockSegmentMockRecorder) MatchField

func (mr *MockSegmentMockRecorder) MatchField(arg0 interface{}) *gomock.Call

MatchField indicates an expected call of MatchField

func (*MockSegmentMockRecorder) MatchRegexp

func (mr *MockSegmentMockRecorder) MatchRegexp(arg0, arg1 interface{}) *gomock.Call

MatchRegexp indicates an expected call of MatchRegexp

func (*MockSegmentMockRecorder) MatchTerm

func (mr *MockSegmentMockRecorder) MatchTerm(arg0, arg1 interface{}) *gomock.Call

MatchTerm indicates an expected call of MatchTerm

func (*MockSegmentMockRecorder) Reader

func (mr *MockSegmentMockRecorder) Reader() *gomock.Call

Reader indicates an expected call of Reader

func (*MockSegmentMockRecorder) Size

func (mr *MockSegmentMockRecorder) Size() *gomock.Call

Size indicates an expected call of Size

func (*MockSegmentMockRecorder) TermsIterable

func (mr *MockSegmentMockRecorder) TermsIterable() *gomock.Call

TermsIterable indicates an expected call of TermsIterable

type MockWriter

type MockWriter struct {
	// contains filtered or unexported fields
}

MockWriter is a mock of Writer interface

func NewMockWriter

func NewMockWriter(ctrl *gomock.Controller) *MockWriter

NewMockWriter creates a new mock instance

func (*MockWriter) EXPECT

func (m *MockWriter) EXPECT() *MockWriterMockRecorder

EXPECT returns an object that allows the caller to indicate expected use

func (*MockWriter) MajorVersion

func (m *MockWriter) MajorVersion() int

MajorVersion mocks base method

func (*MockWriter) Metadata

func (m *MockWriter) Metadata() []byte

Metadata mocks base method

func (*MockWriter) MinorVersion

func (m *MockWriter) MinorVersion() int

MinorVersion mocks base method

func (*MockWriter) Reset

func (m *MockWriter) Reset(arg0 segment.Builder) error

Reset mocks base method

func (*MockWriter) WriteDocumentsData

func (m *MockWriter) WriteDocumentsData(arg0 io.Writer) error

WriteDocumentsData mocks base method

func (*MockWriter) WriteDocumentsIndex

func (m *MockWriter) WriteDocumentsIndex(arg0 io.Writer) error

WriteDocumentsIndex mocks base method

func (*MockWriter) WriteFSTFields

func (m *MockWriter) WriteFSTFields(arg0 io.Writer) error

WriteFSTFields mocks base method

func (*MockWriter) WriteFSTTerms

func (m *MockWriter) WriteFSTTerms(arg0 io.Writer) error

WriteFSTTerms mocks base method

func (*MockWriter) WritePostingsOffsets

func (m *MockWriter) WritePostingsOffsets(arg0 io.Writer) error

WritePostingsOffsets mocks base method

type MockWriterMockRecorder

type MockWriterMockRecorder struct {
	// contains filtered or unexported fields
}

MockWriterMockRecorder is the mock recorder for MockWriter

func (*MockWriterMockRecorder) MajorVersion

func (mr *MockWriterMockRecorder) MajorVersion() *gomock.Call

MajorVersion indicates an expected call of MajorVersion

func (*MockWriterMockRecorder) Metadata

func (mr *MockWriterMockRecorder) Metadata() *gomock.Call

Metadata indicates an expected call of Metadata

func (*MockWriterMockRecorder) MinorVersion

func (mr *MockWriterMockRecorder) MinorVersion() *gomock.Call

MinorVersion indicates an expected call of MinorVersion

func (*MockWriterMockRecorder) Reset

func (mr *MockWriterMockRecorder) Reset(arg0 interface{}) *gomock.Call

Reset indicates an expected call of Reset

func (*MockWriterMockRecorder) WriteDocumentsData

func (mr *MockWriterMockRecorder) WriteDocumentsData(arg0 interface{}) *gomock.Call

WriteDocumentsData indicates an expected call of WriteDocumentsData

func (*MockWriterMockRecorder) WriteDocumentsIndex

func (mr *MockWriterMockRecorder) WriteDocumentsIndex(arg0 interface{}) *gomock.Call

WriteDocumentsIndex indicates an expected call of WriteDocumentsIndex

func (*MockWriterMockRecorder) WriteFSTFields

func (mr *MockWriterMockRecorder) WriteFSTFields(arg0 interface{}) *gomock.Call

WriteFSTFields indicates an expected call of WriteFSTFields

func (*MockWriterMockRecorder) WriteFSTTerms

func (mr *MockWriterMockRecorder) WriteFSTTerms(arg0 interface{}) *gomock.Call

WriteFSTTerms indicates an expected call of WriteFSTTerms

func (*MockWriterMockRecorder) WritePostingsOffsets

func (mr *MockWriterMockRecorder) WritePostingsOffsets(arg0 interface{}) *gomock.Call

WritePostingsOffsets indicates an expected call of WritePostingsOffsets

type Options

type Options interface {
	// SetInstrumentOptions sets the instrument options.
	SetInstrumentOptions(value instrument.Options) Options

	// InstrumentOptions returns the instrument options.
	InstrumentOptions() instrument.Options

	// SetBytesPool sets the bytes pool.
	SetBytesPool(value pool.BytesPool) Options

	// BytesPool returns the bytes pool.
	BytesPool() pool.BytesPool

	// SetPostingsListPool sets the postings list pool.
	SetPostingsListPool(value postings.Pool) Options

	// PostingsListPool returns the postings list pool.
	PostingsListPool() postings.Pool

	// SetContextPool sets the contextPool.
	SetContextPool(value context.Pool) Options

	// ContextPool returns the contextPool.
	ContextPool() context.Pool
}

Options is a collection of knobs for a fs segment.

func NewOptions

func NewOptions() Options

NewOptions returns new options.

type Segment

type Segment interface {
	sgmt.Segment
	index.Readable
}

Segment represents a FST segment.

func NewSegment

func NewSegment(data SegmentData, opts Options) (Segment, error)

NewSegment returns a new Segment backed by the provided options. NB(prateek): this method only assumes ownership of the data if it returns a nil error, otherwise, the user is expected to handle the lifecycle of the input.

type SegmentData

type SegmentData struct {
	Version  Version
	Metadata []byte

	DocsData      []byte
	DocsIdxData   []byte
	PostingsData  []byte
	FSTTermsData  []byte
	FSTFieldsData []byte

	// DocsReader is an alternative to specifying
	// the docs data and docs idx data if the documents
	// already reside in memory and we want to use the
	// in memory references instead.
	DocsReader *docs.SliceReader

	Closer io.Closer
}

SegmentData represent the collection of required parameters to construct a Segment.

func (SegmentData) Validate

func (sd SegmentData) Validate() error

Validate validates the provided segment data, returning an error if it's not.

type Version

type Version struct {
	Major int
	Minor int
}

Version controls internal behaviour of the fst package.

var (
	// CurrentVersion describes the default current Version.
	CurrentVersion Version = Version{Major: 1, Minor: 1}

	// SupportedVersions lists all supported versions of the FST package.
	SupportedVersions = []Version{

		Version{Major: 1, Minor: 1},

		Version{Major: 1, Minor: 0},
	}
)

func (Version) Supported

func (v Version) Supported() error

Supported returns an error indicating if the version is supported.

type Writer

type Writer interface {
	// Reset sets the Writer to persist the provide segment.
	// NB(prateek): if provided segment is a mutable segment it must be sealed.
	Reset(s sgmt.Builder) error

	// MajorVersion is the major version for the writer.
	MajorVersion() int

	// MinorVersion is the minor version for the writer.
	MinorVersion() int

	// Metadata returns metadata about the writer.
	Metadata() []byte

	// WriteDocumentsData writes out the documents data to the provided writer.
	WriteDocumentsData(w io.Writer) error

	// WriteDocumentsIndex writes out the documents index to the provided writer.
	// NB(prateek): this must be called after WriteDocumentsData().
	WriteDocumentsIndex(w io.Writer) error

	// WritePostingsOffsets writes out the postings offset file to the provided
	// writer.
	WritePostingsOffsets(w io.Writer) error

	// WriteFSTTerms writes out the FSTTerms file using the provided writer.
	// NB(prateek): this must be called after WritePostingsOffsets().
	WriteFSTTerms(w io.Writer) error

	// WriteFSTFields writes out the FSTFields file using the provided writer.
	// NB(prateek): this must be called after WriteFSTTerm().
	WriteFSTFields(w io.Writer) error
}

Writer writes out a FST segment from the provided elements.

func NewWriter

func NewWriter(opts WriterOptions) (Writer, error)

NewWriter returns a new writer.

type WriterOptions

type WriterOptions struct {
	// DisableRegistry disables the FST builder node registry cache which can
	// de-duplicate transitions that are an exact match of each other during
	// a final compilation phase, this helps compress the FST by a significant
	// amount (e.g. 2x). You can disable this to speed up high fixed cost
	// lookups to during building of the FST however.
	DisableRegistry bool
}

WriterOptions is a set of options used when writing an FST.

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL