search

package
v1.6.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 22, 2019 License: MIT Imports: 34 Imported by: 0

Documentation

Index

Constants

View Source
const (
	EVAL_SET_EXPECTATION_FIRST_COLUMN = 4
	EVAL_SET_EXPECTATION_LAST_COLUMN  = 8
)
View Source
const (
	SQ_SERVER_ERROR   = iota
	SQ_NO_EXPECTATION = iota
	SQ_BAD_STRUCTURE  = iota
	SQ_UNKNOWN        = iota
	SQ_REGULAR        = iota
	SQ_GOOD           = iota
)

Search quality enum. Order important, the lower (higher integer) the better.

View Source
const (
	CR_WIN            = iota
	CR_LOSS           = iota
	CR_SAME           = iota
	CR_NO_EXPECTATION = iota
	CR_ERROR          = iota
)

Compare results classification.

View Source
const (
	ET_NOT_SET       = -1
	ET_CONTENT_UNITS = iota
	ET_COLLECTIONS   = iota
	ET_LESSONS       = iota
	ET_PROGRAMS      = iota
	ET_SOURCES       = iota
	ET_EVENTS        = iota
	ET_LANDING_PAGE  = iota
	ET_BLOG_OR_TWEET = iota
	ET_EMPTY         = iota
	ET_FAILED_PARSE  = iota
	ET_BAD_STRUCTURE = iota
	ET_FAILED_SQL    = iota
)
View Source
const (
	FILTER_NAME_SOURCE       = "source"
	FILTER_NAME_TOPIC        = "topic"
	FILTER_NAME_CONTENT_TYPE = "contentType"
	PREFIX_LATEST            = "[latest]"
	BLOG_OR_TWEET_MARK       = "blog_or_tweet"
)
View Source
const (
	// Content boost.
	TITLE_BOOST       = 2.0
	DESCRIPTION_BOOST = 1.2

	// Max slop.
	SLOP = 100

	// Following two boosts may be agregated.
	// Boost for standard anylyzer, i.e., without stemming.
	STANDARD_BOOST = 1.2
	// Boost for exact phrase match, without slop.
	EXACT_BOOST = 1.5

	NUM_SUGGESTS = 100
)

Variables

View Source
var COMPARE_RESULTS_NAME = map[int]string{
	CR_WIN:   "Win",
	CR_LOSS:  "Loss",
	CR_SAME:  "Same",
	CR_ERROR: "Error",
}
View Source
var EXPECTATION_TO_NAME = map[int]string{
	ET_CONTENT_UNITS: "et_content_units",
	ET_COLLECTIONS:   "et_collections",
	ET_LESSONS:       "et_lessons",
	ET_PROGRAMS:      "et_programs",
	ET_SOURCES:       "et_sources",
	ET_BLOG_OR_TWEET: "et_blog_or_tweet",
	ET_LANDING_PAGE:  "et_landing_page",
	ET_EMPTY:         "et_empty",
	ET_FAILED_PARSE:  "et_failed_parse",
	ET_BAD_STRUCTURE: "et_bad_structure",
	ET_FAILED_SQL:    "et_failed_sql",
}
View Source
var EXPECTATION_URL_PATH = map[int]string{
	ET_CONTENT_UNITS: "cu",
	ET_COLLECTIONS:   "c",
	ET_LESSONS:       "lessons",
	ET_PROGRAMS:      "programs",
	ET_SOURCES:       "sources",
	ET_EVENTS:        "events",
}
View Source
var FLAT_REPORT_HEADERS = []string{
	"Language", "Query", "Weight", "Bucket", "Comment",
	"Expectation", "Parsed", "SearchQuality", "Rank"}
View Source
var SEARCH_QUALITY_BY_NAME = map[string]int{
	"Good":          SQ_GOOD,
	"Regular":       SQ_REGULAR,
	"Unknown":       SQ_UNKNOWN,
	"BadStructure":  SQ_BAD_STRUCTURE,
	"NoExpectation": SQ_NO_EXPECTATION,
	"ServerError":   SQ_SERVER_ERROR,
}
View Source
var SEARCH_QUALITY_NAME = map[int]string{
	SQ_GOOD:           "Good",
	SQ_REGULAR:        "Regular",
	SQ_UNKNOWN:        "Unknown",
	SQ_BAD_STRUCTURE:  "BadStructure",
	SQ_NO_EXPECTATION: "NoExpectation",
	SQ_SERVER_ERROR:   "ServerError",
}

Functions

func CompareResults added in v0.8.0

func CompareResults(base int, exp int) int

Returns compare results classification constant.

func CsvToString added in v1.1.8

func CsvToString(records [][]string) (error, string)

func ExpectationToString added in v0.9.5

func ExpectationToString(e Expectation) string

func FilterValueToUid added in v0.9.5

func FilterValueToUid(value string) string

func FoldGrammars added in v1.4.4

func FoldGrammars(first Grammars, second Grammars)

func GoodExpectations added in v0.9.5

func GoodExpectations(expectations []Expectation) int

func HitMatchesExpectation added in v0.9.5

func HitMatchesExpectation(hit *elastic.SearchHit, hitSource HitSource, e Expectation) bool

func NewResultsSearchRequest added in v1.0.5

func NewResultsSearchRequest(options SearchRequestOptions) *elastic.SearchRequest

func NewResultsSearchRequests added in v1.0.5

func NewResultsSearchRequests(options SearchRequestOptions) []*elastic.SearchRequest

func NewResultsSuggestRequest added in v1.0.5

func NewResultsSuggestRequest(resultTypes []string, index string, query Query, preference string) *elastic.SearchRequest

func NewResultsSuggestRequests added in v1.0.5

func NewResultsSuggestRequests(resultTypes []string, query Query, preference string) []*elastic.SearchRequest

func ResultsByExpectation added in v1.1.8

func ResultsByExpectation(queries []EvalQuery, results EvalResults) [][]string

func SuggestionHasOptions added in v0.7.1

func SuggestionHasOptions(ss elastic.SearchSuggest) bool

func TokenNodesToString added in v1.5.2

func TokenNodesToString(root []*TokenNode) string

func TokensMatch added in v1.5.2

func TokensMatch(a []*TokenNode, b [][]*TokenNode) bool

func TokensSearch added in v1.5.4

func TokensSearch(a []*TokenNode, b [][]*TokenNode) (string, error)

Searches tokens |a| inside tokens |b|, returns the matching part. Can be optimized? Current complexity is O(|a|^2 * |b|^2), where |a| is nubmer of tokens in the whole graph of |a|.

func TokensSingleMatch added in v1.5.4

func TokensSingleMatch(a []*TokenNode, b []*TokenNode) bool

func TokensSingleSearch added in v1.5.4

func TokensSingleSearch(a []*TokenNode, b []*TokenNode) (string, error)

func WriteResults added in v0.9.5

func WriteResults(path string, queries []EvalQuery, results EvalResults) error

func WriteResultsByExpectation added in v0.9.5

func WriteResultsByExpectation(path string, queries []EvalQuery, results EvalResults) ([][]string, error)

func WriteToCsv added in v0.9.5

func WriteToCsv(path string, records [][]string) error

func WriteVsGoldenHTML added in v1.5.0

func WriteVsGoldenHTML(vsGoldenHtml string, records [][]string, goldenRecords [][]string) error

Types

type ClassificationIntent added in v1.4.0

type ClassificationIntent struct {
	// Fields from result.
	ResultType string `json:"result_type"`
	MDB_UID    string `json:"mdb_uid"`
	Title      string `json:"title"`

	// Intent fields.
	ContentType    string                    `json:"content_type"`
	Exist          bool                      `json:"exist"`
	Score          *float64                  `json:"score,omitempty"`
	Explanation    elastic.SearchExplanation `json:"explanation,omitempty"`
	MaxScore       *float64                  `json:"max_score,omitempty"`
	MaxExplanation elastic.SearchExplanation `json:"max_explanation,omitempty"`
}

type CreatedSearchClicks added in v0.8.3

type CreatedSearchClicks []SearchClick

func (CreatedSearchClicks) Len added in v0.8.3

func (csc CreatedSearchClicks) Len() int

func (CreatedSearchClicks) Less added in v0.8.3

func (csc CreatedSearchClicks) Less(i, j int) bool

func (CreatedSearchClicks) Swap added in v0.8.3

func (csc CreatedSearchClicks) Swap(i, j int)

type CreatedSearchLogs added in v0.8.3

type CreatedSearchLogs []SearchLog

func (CreatedSearchLogs) Len added in v0.8.3

func (csl CreatedSearchLogs) Len() int

func (CreatedSearchLogs) Less added in v0.8.3

func (csl CreatedSearchLogs) Less(i, j int) bool

func (CreatedSearchLogs) Swap added in v0.8.3

func (csl CreatedSearchLogs) Swap(i, j int)

type ESEngine

type ESEngine struct {
	ExecutionTimeLog *TimeLogMap

	TokensCache *TokensCache
	// contains filtered or unexported fields
}

func NewESEngine

func NewESEngine(esc *elastic.Client, db *sql.DB, cache cache.CacheManager, grammars Grammars, tc *TokensCache) *ESEngine

func (*ESEngine) AddIntentSecondRound added in v1.0.5

func (e *ESEngine) AddIntentSecondRound(h *elastic.SearchHit, intent Intent, query Query) (error, *Intent, *Query)

func (*ESEngine) AddIntents added in v0.8.9

func (e *ESEngine) AddIntents(query *Query, preference string, size int, sortBy string) ([]Intent, error)

func (*ESEngine) DoSearch

func (e *ESEngine) DoSearch(ctx context.Context, query Query, sortBy string, from int, size int, preference string) (*QueryResult, error)

func (*ESEngine) GetSuggestions

func (e *ESEngine) GetSuggestions(ctx context.Context, query Query, preference string) (interface{}, error)

func (*ESEngine) IntentsToResults added in v0.9.5

func (e *ESEngine) IntentsToResults(query *Query) (error, map[string]*elastic.SearchResult)

func (*ESEngine) SearchGrammars added in v1.4.4

func (e *ESEngine) SearchGrammars(query *Query) ([]Intent, error)

func (*ESEngine) SuggestGrammars added in v1.5.4

func (e *ESEngine) SuggestGrammars(query *Query) (map[string][]string, error)

type ESManager added in v1.3.9

type ESManager struct {
	// contains filtered or unexported fields
}

func MakeESManager added in v1.3.9

func MakeESManager(url string) *ESManager

func (*ESManager) GetClient added in v1.3.9

func (esManager *ESManager) GetClient() (*elastic.Client, error)

func (*ESManager) Stop added in v1.3.9

func (esManager *ESManager) Stop()

type Engine

type Engine interface {
	GetSuggestions(ctx context.Context, query Query) (interface{}, error)
	DoSearch(ctx context.Context, query Query, from int, size int, preference string) (interface{}, error)
}

type EvalQuery added in v0.7.8

type EvalQuery struct {
	Language     string        `json:"language"`
	Query        string        `json:"query"`
	Weight       float64       `json:"weight,omitempty"`
	Bucket       string        `json:"bucket,omitempty"`
	Expectations []Expectation `json:"expectations"`
	Comment      string        `json:"comment,omitempty"`
}

func InitAndReadEvalSet added in v1.1.8

func InitAndReadEvalSet(evalSetPath string) ([]EvalQuery, error)

func ReadEvalSet added in v0.7.8

func ReadEvalSet(reader io.Reader, db *sql.DB) ([]EvalQuery, error)

type EvalResult added in v0.7.8

type EvalResult struct {
	SearchQuality []int `json:"search_quality"`
	Rank          []int `json:"rank"`
	// contains filtered or unexported fields
}

func EvaluateQuery added in v0.7.8

func EvaluateQuery(q EvalQuery, serverUrl string) EvalResult

type EvalResults added in v0.7.8

type EvalResults struct {
	Results       []EvalResult    `json:"results"`
	TotalUnique   uint64          `json:"total_unique"`
	TotalWeighted float64         `json:"total_weighted"`
	TotalErrors   uint64          `json:"total_errors"`
	UniqueMap     map[int]float64 `json:"unique_map"`
	WeightedMap   map[int]float64 `json:"weighted_map"`
}

func Eval added in v0.7.8

func Eval(queries []EvalQuery, serverUrl string) (EvalResults, map[int][]Loss, error)

type Expectation added in v0.8.0

type Expectation struct {
	Type    int      `json:"type"`
	Uid     string   `json:"uid,omitempty"`
	Filters []Filter `json:"filters,omitempty"`
	Source  string   `json:"source"`
}

func ParseExpectation added in v0.8.0

func ParseExpectation(e string, db *sql.DB) Expectation

Parses expectation described by result URL and converts to type (collections or content_units) and uid. Examples: https://kabbalahmedia.info/he/programs/cu/AsNLozeK ==> (content_units, AsNLozeK) https://kabbalahmedia.info/he/programs/c/fLWpcUjQ ==> (collections , fLWpcUjQ) https://kabbalahmedia.info/he/lessons/series/c/XZoflItG ==> (collections , XZoflItG) https://kabbalahmedia.info/he/lessons?source=bs_L2jMWyce_kB3eD83I ==> (lessons, nil, source=bs_L2jMWyce_kB3eD83I) https://kabbalahmedia.info/he/programs?topic=g3ml0jum_1nyptSIo_RWqjxgkj ==> (programs, nil, topic=g3ml0jum_1nyptSIo_RWqjxgkj) https://kabbalahmedia.info/he/sources/kB3eD83I ==> (source, kB3eD83I) [latest]https://kabbalahmedia.info/he/lessons?source=bs_qMUUn22b_hFeGidcS ==> (content_units, SLQOALyt) [latest]https://kabbalahmedia.info/he/programs?topic=g3ml0jum_1nyptSIo_RWqjxgkj ==> (content_units, erZIsm86) [latest]https://kabbalahmedia.info/he/programs/c/zf4lLwyI ==> (content_units, orMKRcNk) All events sub pages and years: https://kabbalahmedia.info/he/events/meals https://kabbalahmedia.info/he/events/friends-gatherings https://kabbalahmedia.info/he/events?year=2013

type Filter added in v0.8.9

type Filter struct {
	Name  string `json:"name"`
	Value string `json:"value"`
}

type Grammar added in v1.4.4

type Grammar struct {
	HitType  string
	Language string
	Intent   string
	Patterns [][]*TokenNode
	Filters  map[string][]string
	Esc      *elastic.Client
}

func (*Grammar) SearchGrammar added in v1.4.4

func (g *Grammar) SearchGrammar(query *Query, tc *TokensCache) (*Intent, error)

func (*Grammar) SuggestGrammar added in v1.5.4

func (g *Grammar) SuggestGrammar(query *Query, tc *TokensCache) (string, error)

type GrammarIntent added in v1.4.4

type GrammarIntent struct {
	LandingPage string `json:"landing_page,omitempty"`
}

type Grammars added in v1.4.4

type Grammars = map[string]map[string]*Grammar

func MakeGrammars added in v1.4.4

func MakeGrammars(grammarsDir string, esc *elastic.Client, tc *TokensCache) (Grammars, error)

func ReadGrammarFile added in v1.4.4

func ReadGrammarFile(grammarFile string, esc *elastic.Client, tc *TokensCache) (Grammars, error)

type HitSource added in v0.9.5

type HitSource struct {
	MdbUid      string `json:"mdb_uid"`
	ResultType  string `json:"result_type"`
	LandingPage string `json:"landing_page"`
}

type Intent added in v0.8.9

type Intent struct {
	Type     string      `json:"type"`
	Language string      `json:"language"`
	Value    interface{} `json:"value,omitempty"`
}

type Loss added in v0.8.9

type Loss struct {
	Expectation Expectation `json:"expectation,omitempty"`
	Query       EvalQuery   `json:"query,omitempty"`
	Unique      float64     `json:"unique,omitempty"`
	Weighted    float64     `json:"weighted,omitempty"`
}

type OriginalTokenNode added in v1.5.4

type OriginalTokenNode struct {
	OriginalWholePhrase *string
	SkippedPrefix       *Span  // Stopwords before this token that were skipped.
	OriginalPhrase      string // Original string that was tokenized.
	SkippedSuffix       *Span  // Will be set only for IsEnd = true nodes.

	TokenNode *TokenNode
	Parents   []*OriginalTokenNode
	Children  []*OriginalTokenNode
}

func (*OriginalTokenNode) OriginalFullPhraseToString added in v1.5.4

func (otn *OriginalTokenNode) OriginalFullPhraseToString() string

func (*OriginalTokenNode) SkippedPrefixToString added in v1.5.4

func (otn *OriginalTokenNode) SkippedPrefixToString() string

func (*OriginalTokenNode) SkippedSuffixToString added in v1.5.4

func (otn *OriginalTokenNode) SkippedSuffixToString() string

type PhrasesWithOrigin added in v1.5.4

type PhrasesWithOrigin struct {
	OriginalPhrases []string
	Phrases         []string
}

func OriginalTokenNodesToPhrases added in v1.5.4

func OriginalTokenNodesToPhrases(otns []*OriginalTokenNode) []PhrasesWithOrigin

func TokenNodesToPhrases added in v1.5.2

func TokenNodesToPhrases(root []*TokenNode) []PhrasesWithOrigin

func (*PhrasesWithOrigin) Join added in v1.5.4

func (p *PhrasesWithOrigin) Join(s string) string

func (*PhrasesWithOrigin) OriginalJoin added in v1.5.4

func (p *PhrasesWithOrigin) OriginalJoin() string

func (*PhrasesWithOrigin) ToString added in v1.5.4

func (p *PhrasesWithOrigin) ToString() string

type Query

type Query struct {
	Term          string              `json:"term,omitempty"`
	ExactTerms    []string            `json:"exact_terms,omitempty"`
	Original      string              `json:"original,omitempty"`
	Filters       map[string][]string `json:"filters,omitempty"`
	LanguageOrder []string            `json:"language_order,omitempty"`
	Deb           bool                `json:"deb,omitempty"`
	Intents       []Intent            `json:"intents,omitempty"`
}

func ParseQuery added in v1.4.4

func ParseQuery(q string) Query

Parses query and extracts terms and filters.

func (*Query) ToFullSimpleString added in v1.1.9

func (query *Query) ToFullSimpleString(sortBy string, from int, size int) string

func (*Query) ToSimpleString added in v1.1.9

func (query *Query) ToSimpleString() string

func (*Query) ToString added in v1.0.5

func (query *Query) ToString() string

type QueryResult added in v0.8.9

type QueryResult struct {
	SearchResult *elastic.SearchResult `json:"search_result,omitempty"`
	// TODO: Intents field below is deprecated and not being used.
	Intents []Intent `json:"intents,omitempty"`
}

type SearchClick added in v0.8.0

type SearchClick struct {
	SearchId   string    `json:"search_id"`
	Created    time.Time `json:"created"`
	LogType    string    `json:"log_type"`
	MdbUid     string    `json:"mdb_uid",omitempty`
	Index      string    `json:"index",omitempty`
	ResultType string    `json:"result_type",omitempty`
	Rank       uint32    `json:"rank",omitempty`
}

type SearchLog added in v0.7.7

type SearchLog struct {
	SearchId         string      `json:"search_id"`
	Created          time.Time   `json:"created"`
	LogType          string      `json:"log_type"`
	Query            Query       `json:"query"`
	QueryResult      interface{} `json:"query_result,omitempty"`
	Error            interface{} `json:"error,omitempty"`
	SortBy           string      `json:"sort_by,omitempty"`
	From             uint64      `json:"from,omitempty"`
	Size             uint64      `json:"size,omitempty"`
	Suggestion       string      `json:"suggestion,omitempty"`
	ExecutionTimeLog []TimeLog   `json:"execution_time_log,omitempty"`
}

type SearchLogger added in v0.7.7

type SearchLogger struct {
	// contains filtered or unexported fields
}

func MakeSearchLogger added in v0.7.7

func MakeSearchLogger(esManager *ESManager) *SearchLogger

func (*SearchLogger) GetAllClicks added in v0.8.3

func (searchLogger *SearchLogger) GetAllClicks() ([]SearchClick, error)

func (*SearchLogger) GetAllQueries added in v0.7.8

func (searchLogger *SearchLogger) GetAllQueries(s *elastic.SliceQuery) ([]SearchLog, error)

func (*SearchLogger) LogClick added in v0.8.0

func (searchLogger *SearchLogger) LogClick(mdbUid string, index string, resultType string, rank int, searchId string) error

func (*SearchLogger) LogSearch added in v0.7.7

func (searchLogger *SearchLogger) LogSearch(query Query, sortBy string, from int, size int, searchId string, suggestion string, res *QueryResult, executionTimeLog *TimeLogMap) error

func (*SearchLogger) LogSearchError added in v0.7.7

func (searchLogger *SearchLogger) LogSearchError(query Query, sortBy string, from int, size int, searchId string, suggestion string, searchErr interface{}, executionTimeLog *TimeLogMap) error

type SearchRequestOptions added in v1.0.5

type SearchRequestOptions struct {
	// contains filtered or unexported fields
}

type Span added in v1.5.4

type Span struct {
	Start int
	End   int
}

func MakeSpan added in v1.5.4

func MakeSpan(start, end int) *Span

type TimeLog added in v1.2.1

type TimeLog struct {
	Operation string `json:"operation"`
	Time      int64  `json:"time"`
}

type TimeLogMap added in v1.3.9

type TimeLogMap struct {
	// contains filtered or unexported fields
}

func NewTimeLogMap added in v1.3.9

func NewTimeLogMap() *TimeLogMap

func (*TimeLogMap) Load added in v1.3.9

func (c *TimeLogMap) Load(key string) (time.Duration, bool)

func (*TimeLogMap) Store added in v1.3.9

func (c *TimeLogMap) Store(key string, value time.Duration)

func (*TimeLogMap) ToMap added in v1.3.9

func (c *TimeLogMap) ToMap() map[string]time.Duration

type Token added in v1.5.2

type Token struct {
	Token          string `json:"token"`
	StartOffset    int    `json:"start_offset"`
	EndOffset      int    `json:"end_offset"`
	Type           string `json:"type"`
	Position       int    `json:"position"`
	PositionLength int    `json:"positionLength"`
}

type TokenNode added in v1.5.2

type TokenNode struct {
	Token    Token
	IsEnd    bool
	Parents  []*TokenNode
	Children []*TokenNode
	// May be several original phrases per token from several sources that were merged.
	OriginalTokenNodes []*OriginalTokenNode
}

func MakeTokenForest added in v1.5.2

func MakeTokenForest(tokens []Token, phrase string) []*TokenNode

func MakeTokensFromPhrase added in v1.5.2

func MakeTokensFromPhrase(phrase string, lang string, esc *elastic.Client, tc *TokensCache) ([]*TokenNode, error)

func MakeTokensFromPhraseIndex added in v1.5.2

func MakeTokensFromPhraseIndex(phrase string, lang string, esc *elastic.Client, index string, ctx context.Context) ([]*TokenNode, error)

type TokensCache added in v1.5.4

type TokensCache struct {
	// contains filtered or unexported fields
}

func MakeTokensCache added in v1.5.4

func MakeTokensCache(size int) *TokensCache

func (*TokensCache) Get added in v1.5.4

func (tc *TokensCache) Get(phrase string, lang string) []*TokenNode

func (*TokensCache) Has added in v1.5.4

func (tc *TokensCache) Has(phrase string, lang string) bool

func (*TokensCache) Set added in v1.5.4

func (tc *TokensCache) Set(phrase string, lang string, tokens []*TokenNode)

type TokensCacheElement added in v1.5.4

type TokensCacheElement struct {
	Phrase string
	Lang   string
	Tokens []*TokenNode
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL