tinysearch

package module
v0.0.0-...-90b00c5 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 25, 2020 License: MIT Imports: 16 Imported by: 0

README

tinysearch

reviewdog

Tiny full-text search engine for learning

  • Go
  • inverted index
  • tf-idf
  • MySQL8.x

How to dev

docker-compose up -d
# set env
export INDEX_DIR_PATH="./testdata/index"
# create index
go run ./cmd/tinysearch/main.go create ./testdata/document
# search by full-text search engine
go run ./cmd/tinysearch/main.go search "qurrel sir"

See Also

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Cursor

type Cursor struct {
	// contains filtered or unexported fields
}

func (*Cursor) DocId

func (c *Cursor) DocId() DocumentID

func (*Cursor) Empty

func (c *Cursor) Empty() bool

func (*Cursor) Next

func (c *Cursor) Next()

func (*Cursor) NextDoc

func (c *Cursor) NextDoc(id DocumentID)

func (*Cursor) Posting

func (c *Cursor) Posting() *Posting

func (*Cursor) String

func (c *Cursor) String() string

type DocumentID

type DocumentID int64

type DocumentStore

type DocumentStore struct {
	// contains filtered or unexported fields
}

func NewDocumentStore

func NewDocumentStore(db *sql.DB) *DocumentStore

type Engine

type Engine struct {
	// contains filtered or unexported fields
}

func NewSearchEngine

func NewSearchEngine(db *sql.DB) *Engine

func (*Engine) AddDocument

func (e *Engine) AddDocument(title string, reader io.Reader) error

func (*Engine) Flush

func (e *Engine) Flush() error

func (*Engine) Search

func (e *Engine) Search(query string, k int) ([]*SearchResult, error)

type Index

type Index struct {
	Dictionary     map[string]PostingsList
	TotalDocsCount int
}

func NewIndex

func NewIndex() *Index

NewIndex create a new index.

func (Index) String

func (idx Index) String() string

type IndexReader

type IndexReader struct {
	// contains filtered or unexported fields
}

func NewIndexReader

func NewIndexReader(path string) *IndexReader

type IndexWriter

type IndexWriter struct {
	// contains filtered or unexported fields
}

func NewIndexWriter

func NewIndexWriter(path string) *IndexWriter

func (*IndexWriter) Flush

func (w *IndexWriter) Flush(index *Index) error

type Indexer

type Indexer struct {
	// contains filtered or unexported fields
}

func NewIndexer

func NewIndexer(tokenizer *Tokenizer) *Indexer

type Posting

type Posting struct {
	DocID         DocumentID // ドキュメントID
	Positions     []int      // 用語の出現位置
	TermFrequency int        // ドキュメント内の用語の出現回数
}

func NewPosting

func NewPosting(docID DocumentID, positions ...int) *Posting

func (Posting) String

func (p Posting) String() string

type PostingsList

type PostingsList struct {
	*list.List
}

func NewPostingsList

func NewPostingsList(postings ...*Posting) PostingsList

func (PostingsList) Add

func (pl PostingsList) Add(new *Posting)

func (PostingsList) MarshalJSON

func (pl PostingsList) MarshalJSON() ([]byte, error)

func (PostingsList) OpenCursor

func (pl PostingsList) OpenCursor() *Cursor

func (PostingsList) String

func (pl PostingsList) String() string

func (*PostingsList) UnmarshalJSON

func (pl *PostingsList) UnmarshalJSON(b []byte) error

type ScoreDoc

type ScoreDoc struct {
	// contains filtered or unexported fields
}

ドキュメントIDそのドキュメントのスコアを保持する

func (ScoreDoc) String

func (d ScoreDoc) String() string

type SearchResult

type SearchResult struct {
	DocID DocumentID
	Score float64
	Title string
}

func (*SearchResult) String

func (r *SearchResult) String() string

type Searcher

type Searcher struct {
	// contains filtered or unexported fields
}

func NewSearcher

func NewSearcher(path string) *Searcher

func (*Searcher) SearchTopK

func (s *Searcher) SearchTopK(query []string, k int) *TopDocs

type Tokenizer

type Tokenizer struct{}

func NewTokenizer

func NewTokenizer() *Tokenizer

func (*Tokenizer) SplitFunc

func (t *Tokenizer) SplitFunc(data []byte, atEOF bool) (advance int,
	token []byte, err error)

func (*Tokenizer) TextToWordSequence

func (t *Tokenizer) TextToWordSequence(text string) []string

type TopDocs

type TopDocs struct {
	// contains filtered or unexported fields
}

searchTopKの検索結果を保持する

func (*TopDocs) String

func (t *TopDocs) String() string

Directories

Path Synopsis
cmd
tinysearch command

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL