sour

package module

v0.1.0 Latest Latest Go to latest Published: Dec 5, 2021 License: Apache-2.0 Imports: 8 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/blugelabs/sour

Links

Open Source Insights

README ¶

sour

Will this bluge.Document match this bluge.Query? This library allows you to efficiently answer this question.

	s := sour.New(bluge.InMemoryOnlyConfig())

	s.Reset(bluge.NewDocument("id").
		AddField(bluge.NewKeywordField("name", "sour")))

	dmi, err := s.Search(context.Background(),
		bluge.NewTopNSearch(0, bluge.NewTermQuery("sour").SetField("name")).
		WithStandardAggregations())
	if err != nil {
		panic(err)
	}
	if dmi.Aggregations().Count() > 0 {
		fmt.Println("matches name sour")
	} else {
		fmt.Println("does not match name sour")
	}

Details

This implementation is NOT thread-safe. If you need to run multiple queries concurrently, you must use separate Sour instances.
Loading stored fields is not supported

Approach

Single document, data sizes are small.
Therefore, avoid heavy document analysis and complex data structures.
After regular document analysis is complete, use this structure in place.
Do not build more complicated structures like vellums or roaring bitmaps.
If additional structure is needed, prefer arrays which have good cache locality, and can be reused.
Avoid copying data, prefer sub-slicing, and brute-force processing over arrays.
Cache reusable parts of the query, as we expect the same query to be run over multiple documents.

License

Apache License Version 2.0

Documentation ¶

Overview ¶

Example ¶

package main

import (
	"context"
	"fmt"

	"github.com/blugelabs/bluge"
	"github.com/blugelabs/sour"
)

func main() {
	s := sour.New(bluge.InMemoryOnlyConfig())

	s.Reset(bluge.NewDocument("id").
		AddField(bluge.NewKeywordField("name", "sour")))

	dmi, err := s.Search(context.Background(),
		bluge.NewTopNSearch(0, bluge.NewTermQuery("sour").SetField("name")).
			WithStandardAggregations())
	if err != nil {
		panic(err)
	}
	if dmi.Aggregations().Count() > 0 {
		fmt.Println("matches name sour")
	} else {
		fmt.Println("does not match name sour")
	}
}

Output:

matches name sour

Index ¶

type CollectionStats
- func (c *CollectionStats) DocumentCount() uint64
- func (c *CollectionStats) Merge(segment.CollectionStats)
- func (c *CollectionStats) SumTotalTermFrequency() uint64
- func (c *CollectionStats) TotalDocumentCount() uint64
type DictEntry
- func (d *DictEntry) Count() uint64
- func (d *DictEntry) Term() string
type Dictionary
- func (d *Dictionary) Close() error
- func (d *Dictionary) Contains(key []byte) (bool, error)
type DictionaryIterator
- func NewDictionaryIteratorWithTerms(terms []string, include func(string) bool) *DictionaryIterator
- func NewFieldDictEmpty() *DictionaryIterator
- func (d *DictionaryIterator) Close() error
- func (d *DictionaryIterator) Next() (segment.DictionaryEntry, error)
type DocValueReader
- func (d *DocValueReader) VisitDocumentValues(number uint64, visitor segment.DocumentValueVisitor) error
type Location
- func (l Location) End() int
- func (l Location) Field() string
- func (l Location) Pos() int
- func (l Location) Size() int
- func (l Location) Start() int
type Posting
- func (p *Posting) Frequency() int
- func (p *Posting) Locations() []segment.Location
- func (p *Posting) Norm() float64
- func (p *Posting) Number() uint64
- func (p *Posting) SetNumber(num uint64)
- func (p *Posting) Size() int
type Sour
- func New(cfg bluge.Config) *Sour
- func NewWithDocument(cfg bluge.Config, doc *bluge.Document) *Sour
- func (s *Sour) Close() error
- func (s *Sour) CollectionStats(field string) (segment.CollectionStats, error)
- func (s *Sour) DictionaryIterator(field string, automaton segment.Automaton, start, end []byte) (segment.DictionaryIterator, error)
- func (s *Sour) DictionaryLookup(field string) (segment.DictionaryLookup, error)
- func (s *Sour) DocumentValueReader(fields []string) (segment.DocumentValueReader, error)
- func (s *Sour) Fields() []string
- func (s *Sour) PostingsIterator(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (segment.PostingsIterator, error)
- func (s *Sour) Reset(doc *bluge.Document)
- func (s *Sour) Search(ctx context.Context, req bluge.SearchRequest) (search.DocumentMatchIterator, error)
- func (s *Sour) SortedTermsForField(fieldName string) ([]string, error)
- func (s *Sour) TokenFreqsAndLen(fieldName string) (analysis.TokenFrequencies, int, error)
- func (s *Sour) VisitStoredFields(number uint64, visitor segment.StoredFieldVisitor) error
type TermFieldReader
- func NewTermFieldReaderEmpty() *TermFieldReader
- func NewTermFieldReaderFromTokenFreqAndLen(tf *analysis.TokenFreq, l int, ...) *TermFieldReader
- func (t *TermFieldReader) Advance(docNum uint64) (segment.Posting, error)
- func (t *TermFieldReader) Close() error
- func (t *TermFieldReader) Count() uint64
- func (t *TermFieldReader) Empty() bool
- func (t *TermFieldReader) Next() (segment.Posting, error)
- func (t *TermFieldReader) Size() int

Examples ¶

Package

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type CollectionStats ¶

type CollectionStats struct{}

func (*CollectionStats) DocumentCount ¶

func (c *CollectionStats) DocumentCount() uint64

DocumentCount returns the number of documents with at least one term for this field

func (*CollectionStats) Merge ¶

func (c *CollectionStats) Merge(segment.CollectionStats)

func (*CollectionStats) SumTotalTermFrequency ¶

func (c *CollectionStats) SumTotalTermFrequency() uint64

SumTotalTermFrequency returns to total number of tokens across all documents

func (*CollectionStats) TotalDocumentCount ¶

func (c *CollectionStats) TotalDocumentCount() uint64

type DictEntry ¶

type DictEntry struct {
	// contains filtered or unexported fields
}

func (*DictEntry) Count ¶

func (d *DictEntry) Count() uint64

func (*DictEntry) Term ¶

func (d *DictEntry) Term() string

type Dictionary ¶

type Dictionary struct {
	// contains filtered or unexported fields
}

func (*Dictionary) Close ¶

func (d *Dictionary) Close() error

func (*Dictionary) Contains ¶

func (d *Dictionary) Contains(key []byte) (bool, error)

type DictionaryIterator ¶

type DictionaryIterator struct {
	// contains filtered or unexported fields
}

func NewDictionaryIteratorWithTerms ¶

func NewDictionaryIteratorWithTerms(terms []string, include func(string) bool) *DictionaryIterator

func NewFieldDictEmpty ¶

func NewFieldDictEmpty() *DictionaryIterator

func (*DictionaryIterator) Close ¶

func (d *DictionaryIterator) Close() error

func (*DictionaryIterator) Next ¶

func (d *DictionaryIterator) Next() (segment.DictionaryEntry, error)

type DocValueReader ¶

type DocValueReader struct {
	// contains filtered or unexported fields
}

func (*DocValueReader) VisitDocumentValues ¶

func (d *DocValueReader) VisitDocumentValues(number uint64, visitor segment.DocumentValueVisitor) error

type Location ¶

type Location struct {
	// contains filtered or unexported fields
}

func (Location) End ¶

func (l Location) End() int

func (Location) Field ¶

func (l Location) Field() string

func (Location) Pos ¶

func (l Location) Pos() int

func (Location) Size ¶

func (l Location) Size() int

func (Location) Start ¶

func (l Location) Start() int

type Posting ¶

type Posting struct {
	// contains filtered or unexported fields
}

func (*Posting) Frequency ¶

func (p *Posting) Frequency() int

func (*Posting) Locations ¶

func (p *Posting) Locations() []segment.Location

func (*Posting) Norm ¶

func (p *Posting) Norm() float64

func (*Posting) Number ¶

func (p *Posting) Number() uint64

func (*Posting) SetNumber ¶

func (p *Posting) SetNumber(num uint64)

func (*Posting) Size ¶

func (p *Posting) Size() int

type Sour ¶

type Sour struct {
	// contains filtered or unexported fields
}

func New ¶

func New(cfg bluge.Config) *Sour

func NewWithDocument ¶

func NewWithDocument(cfg bluge.Config, doc *bluge.Document) *Sour

func (*Sour) Close ¶

func (s *Sour) Close() error

func (*Sour) CollectionStats ¶

func (s *Sour) CollectionStats(field string) (segment.CollectionStats, error)

func (*Sour) DictionaryIterator ¶

func (s *Sour) DictionaryIterator(field string, automaton segment.Automaton, start,
	end []byte) (segment.DictionaryIterator, error)

DictionaryIterator provides a way to explore the terms used in the specified field. You can optionally filter these terms by the provided Automaton, or start/end terms.

func (*Sour) DictionaryLookup ¶

func (s *Sour) DictionaryLookup(field string) (segment.DictionaryLookup, error)

func (*Sour) DocumentValueReader ¶

func (s *Sour) DocumentValueReader(fields []string) (segment.DocumentValueReader, error)

func (*Sour) Fields ¶

func (s *Sour) Fields() []string

func (*Sour) PostingsIterator ¶

func (s *Sour) PostingsIterator(term []byte, field string, includeFreq, includeNorm,
	includeTermVectors bool) (segment.PostingsIterator, error)

PostingsIterator provides a way to find information about all documents that use the specified term in the specified field.

func (*Sour) Reset ¶

func (s *Sour) Reset(doc *bluge.Document)

func (*Sour) Search ¶

func (s *Sour) Search(ctx context.Context, req bluge.SearchRequest) (search.DocumentMatchIterator, error)

func (*Sour) SortedTermsForField ¶

func (s *Sour) SortedTermsForField(fieldName string) ([]string, error)

func (*Sour) TokenFreqsAndLen ¶

func (s *Sour) TokenFreqsAndLen(fieldName string) (analysis.TokenFrequencies, int, error)

func (*Sour) VisitStoredFields ¶

func (s *Sour) VisitStoredFields(number uint64, visitor segment.StoredFieldVisitor) error

type TermFieldReader ¶

type TermFieldReader struct {
	// contains filtered or unexported fields
}

func NewTermFieldReaderEmpty ¶

func NewTermFieldReaderEmpty() *TermFieldReader

func NewTermFieldReaderFromTokenFreqAndLen ¶

func NewTermFieldReaderFromTokenFreqAndLen(tf *analysis.TokenFreq, l int, includeFreq, includeNorm,
	includeTermVectors bool) *TermFieldReader

func (*TermFieldReader) Advance ¶

func (t *TermFieldReader) Advance(docNum uint64) (segment.Posting, error)

Advance resets the enumeration at specified document or its immediate follower.

func (*TermFieldReader) Close ¶

func (t *TermFieldReader) Close() error

func (*TermFieldReader) Count ¶

func (t *TermFieldReader) Count() uint64

func (*TermFieldReader) Empty ¶

func (t *TermFieldReader) Empty() bool

func (*TermFieldReader) Next ¶

func (t *TermFieldReader) Next() (segment.Posting, error)

func (*TermFieldReader) Size ¶

func (t *TermFieldReader) Size() int

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL