recognize

package
v0.0.0-...-9fd8d7c Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Oct 22, 2025 License: MIT Imports: 21 Imported by: 0

Documentation

Index

Constants

View Source
const (
	// Default timeout for recognition operations
	DefaultRecognitionTimeout = 10 * time.Second

	// Maximum number of contexts to generate during matching
	// This prevents combinatorial explosion in complex patterns
	DefaultMaxContexts = 10000

	// Maximum contexts for rules containing permutations
	// Permutations can create exponential combinations, so we limit them more strictly
	MaxContextsForPermutations = 100

	// Maximum number of wildcard matches to generate
	// Wildcards can match in many positions, this limits the search space
	MaxWildcardMatches = 100

	// Interval for checking cancellation during list matching
	// Check every N values to balance responsiveness and overhead
	CancellationCheckInterval = 100

	// Maximum recursion depth for rule references
	// Prevents stack overflow from circular or deeply nested rules
	MaxRuleRecursionDepth = 20

	// Maximum number of matches per sentence
	// Limits memory usage for sentences that generate many matches
	MaxMatchesPerSentence = 100

	// Maximum number of results to return
	// Prevents excessive memory usage for queries with many matches
	MaxResultsToReturn = 1000

	// Default cache size for global result cache
	DefaultCacheSize = 500

	// Minimum values count to use FST (Finite State Transducer) in benchmarks
	MinValuesForFST = 100000
)

Configuration constants for recognition behavior

View Source
const (
	// PriorityNameFullMatch: Complete entity name match (highest priority)
	// Example: User says "living room light" and we have an entity named "living room light"
	PriorityNameFullMatch = 1000

	// PriorityAreaWithDeviceClass: Area + device_class combination
	// Example: "living room" + "light" device class
	PriorityAreaWithDeviceClass = 500

	// PriorityDomainWithContext: Domain with additional context fields
	// Example: "light" domain with area or other context
	PriorityDomainWithContext = 300

	// PriorityDomainOnly: Domain only (no additional context)
	// Example: Just "light" domain
	PriorityDomainOnly = 200

	// PriorityAreaOnly: Area only (lowest priority for entity matching)
	// Example: Just "living room" area
	PriorityAreaOnly = 100
)

Slot match quality priorities These scores determine which entity match is considered "better" when multiple entities could match the same slot

View Source
const (
	// Bonus for domain + fixed pattern matching
	// Example: domain with device_class in a fixed pattern
	BonusDomainFixedPattern = 50

	// Bonus for area + device_class + fixed pattern
	// Example: area, device_class, and name all in fixed positions
	BonusAreaDeviceClassFixed = 100
)

Domain match priority bonuses These are added to base priorities for specific matching patterns

View Source
const (
	// Priority for domain + device_class in fixed pattern
	// Base: PriorityNameFullMatch + BonusDomainFixedPattern
	PriorityDomainFixedPattern = PriorityNameFullMatch + BonusDomainFixedPattern // 1050

	// Priority for area + device_class + fixed pattern
	// Base: PriorityAreaWithDeviceClass + BonusAreaDeviceClassFixed
	PriorityAreaDeviceClassFixed = PriorityAreaWithDeviceClass + BonusAreaDeviceClassFixed // 600
)

Derived priorities (for clarity and maintainability)

View Source
const MissingEntity = "<missing>"

Variables

View Source
var (
	// DebugMode 控制是否输出调试信息
	DebugMode = os.Getenv("HASSIL_DEBUG") != ""

	// EnableResultCache 控制是否启用结果缓存(默认关闭)
	// 在高重复请求的场景下启用可以获得显著的性能提升(100x+)
	// 但会增加内存使用,且在请求多样化的场景下收益有限
	EnableResultCache = false
)
View Source
var GlobalResultCache = NewResultCache(500)

GlobalResultCache 全局结果缓存实例

View Source
var LanguageConfigs = map[string]*LanguageConfig{

	"zh-CN": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: true,
		AreaNormalizer:   normalizeChineseArea,
		NameNormalizer:   normalizeChineseName,
	},
	"zh-TW": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: true,
		AreaNormalizer:   normalizeChineseArea,
		NameNormalizer:   normalizeChineseName,
	},
	"zh-HK": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: true,
		AreaNormalizer:   normalizeChineseArea,
		NameNormalizer:   normalizeChineseName,
	},
	"ja": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: false,
		AreaNormalizer:   trimWhitespaceOnly,
		NameNormalizer:   trimWhitespaceOnly,
	},
	"ko": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: false,
		AreaNormalizer:   trimWhitespaceOnly,
		NameNormalizer:   trimWhitespaceOnly,
	},
	"ar": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: false,
		AreaNormalizer:   trimWhitespaceOnly,
		NameNormalizer:   trimWhitespaceOnly,
	},
	"he": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: false,
		AreaNormalizer:   trimWhitespaceOnly,
		NameNormalizer:   trimWhitespaceOnly,
	},
	"th": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: false,
		AreaNormalizer:   trimWhitespaceOnly,
		NameNormalizer:   trimWhitespaceOnly,
	},

	"hu": {
		UseRuneSlicing:   false,
		IgnoreWhitespace: false,
		AreaNormalizer:   normalizeHungarianArea,
		NameNormalizer:   trimWhitespaceOnly,
	},
	"fi": {
		UseRuneSlicing:   false,
		IgnoreWhitespace: false,
		AreaNormalizer:   normalizeFinnishArea,
		NameNormalizer:   trimWhitespaceOnly,
	},
	"hr": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: false,
		AreaNormalizer:   trimWhitespaceOnly,
		NameNormalizer:   trimWhitespaceOnly,
	},
	"sl": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: false,
		AreaNormalizer:   trimWhitespaceOnly,
		NameNormalizer:   trimWhitespaceOnly,
	},

	"el": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: false,
		AreaNormalizer:   trimWhitespaceOnly,
		NameNormalizer:   trimWhitespaceOnly,
	},
	"ru": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: false,
		AreaNormalizer:   trimWhitespaceOnly,
		NameNormalizer:   trimWhitespaceOnly,
	},
	"vi": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: false,
		AreaNormalizer:   trimWhitespaceOnly,
		NameNormalizer:   trimWhitespaceOnly,
	},
	"ne": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: false,
		AreaNormalizer:   trimWhitespaceOnly,
		NameNormalizer:   trimWhitespaceOnly,
	},

	"pl": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: false,
		AreaNormalizer:   trimWhitespaceOnly,
		NameNormalizer:   trimWhitespaceOnly,
	},
	"lt": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: false,
		AreaNormalizer:   trimWhitespaceOnly,
		NameNormalizer:   trimWhitespaceOnly,
	},

	"nb": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: false,
		AreaNormalizer:   trimWhitespaceOnly,
		NameNormalizer:   trimWhitespaceOnly,
	},
	"da": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: false,
		AreaNormalizer:   trimWhitespaceOnly,
		NameNormalizer:   trimWhitespaceOnly,
	},
	"sv": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: false,
		AreaNormalizer:   trimWhitespaceOnly,
		NameNormalizer:   trimWhitespaceOnly,
	},
	"is": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: false,
		AreaNormalizer:   trimWhitespaceOnly,
		NameNormalizer:   trimWhitespaceOnly,
	},

	"it": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: false,
		AreaNormalizer:   trimWhitespaceOnly,
		NameNormalizer:   trimWhitespaceOnly,
	},
	"pt-BR": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: false,
		AreaNormalizer:   trimWhitespaceOnly,
		NameNormalizer:   trimWhitespaceOnly,
	},
	"de": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: false,
		AreaNormalizer:   trimWhitespaceOnly,
		NameNormalizer:   trimWhitespaceOnly,
	},
	"de-CH": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: false,
		AreaNormalizer:   trimWhitespaceOnly,
		NameNormalizer:   trimWhitespaceOnly,
	},
	"es": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: false,
		AreaNormalizer:   trimWhitespaceOnly,
		NameNormalizer:   trimWhitespaceOnly,
	},
	"sk": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: false,
		AreaNormalizer:   trimWhitespaceOnly,
		NameNormalizer:   trimWhitespaceOnly,
	},
	"sr-Latn": {
		UseRuneSlicing:   true,
		IgnoreWhitespace: false,
		AreaNormalizer:   trimWhitespaceOnly,
		NameNormalizer:   trimWhitespaceOnly,
	},
}

Global language configuration registry

Functions

func ClearGlobalTextSlotCache

func ClearGlobalTextSlotCache()

ClearGlobalTextSlotCache clears the global cache

func HashContext

func HashContext(ctx map[string]interface{}) string

HashContext 计算上下文的哈希值

func IsMatch

func IsMatch(
	text string,
	intentsObj *intents.Intents,
	slotLists map[string]intents.SlotList,
	expansionRules map[string]*expression.Sentence,
	skipWords []string,
	intentContext map[string]interface{},
	language string,
) bool

IsMatch checks if text matches any intent

func RecognizeAll

func RecognizeAll(
	text string,
	intentsObj *intents.Intents,
	slotLists map[string]intents.SlotList,
	expansionRules map[string]*expression.Sentence,
	skipWords []string,
	intentContext map[string]interface{},
	defaultResponse string,
	allowUnmatchedEntities bool,
	language string,
) chan *RecognizeResult

RecognizeAll yields all matches of input text against a collection of intents

func RecognizeAllWithContext

func RecognizeAllWithContext(
	ctx context.Context,
	text string,
	intentsObj *intents.Intents,
	slotLists map[string]intents.SlotList,
	expansionRules map[string]*expression.Sentence,
	skipWords []string,
	intentContext map[string]interface{},
	defaultResponse string,
	allowUnmatchedEntities bool,
	language string,
) chan *RecognizeResult

RecognizeAllWithContext yields all matches with context support

func SetMinValuesForFST

func SetMinValuesForFST(minValues int)

SetMinValuesForFST sets the minimum number of values to enable FST optimization

Types

type CacheKey

type CacheKey struct {
	Text        string
	IntentsHash string
	ContextHash string
	Language    string
}

CacheKey 识别结果的缓存键

func (CacheKey) String

func (k CacheKey) String() string

String 返回缓存键的字符串表示

type CachedTextSlotList

type CachedTextSlotList struct {
	// contains filtered or unexported fields
}

CachedTextSlotList represents a cached FST version of a TextSlotList

func (*CachedTextSlotList) CanUseFST

func (c *CachedTextSlotList) CanUseFST() bool

CanUseFST returns whether FST optimization is available

func (*CachedTextSlotList) FastMatch

func (c *CachedTextSlotList) FastMatch(text string, settings *MatchSettings) []int

FastMatch attempts to use FST for fast exact matching Returns matched value indices, or nil if no match

func (*CachedTextSlotList) FindCandidates

func (c *CachedTextSlotList) FindCandidates(text string, settings *MatchSettings) []TextSlotMatchCandidate

FindCandidates finds all possible candidates from the text using FST This is more efficient than trying each value individually

func (*CachedTextSlotList) GetValue

func (c *CachedTextSlotList) GetValue(index int) *intents.TextSlotValue

GetValue returns the value at given index

func (*CachedTextSlotList) Size

func (c *CachedTextSlotList) Size() int

Size returns the number of values

type LanguageConfig

type LanguageConfig struct {
	// String processing strategy
	UseRuneSlicing   bool // Whether to use rune-based slicing (for multi-byte languages)
	IgnoreWhitespace bool // Whether whitespace is ignored in matching

	// Normalization functions for different slot types
	AreaNormalizer func(string) string // For area/floor slots
	NameNormalizer func(string) string // For name slots
}

LanguageConfig defines language-specific processing strategies

func GetLanguageConfig

func GetLanguageConfig(language string) *LanguageConfig

GetLanguageConfig returns the configuration for a given language Falls back to default ASCII config if language not found

type MatchContext

type MatchContext struct {
	Text              string
	Entities          []*models.MatchEntity
	IntentContext     map[string]interface{}
	IsStartOfWord     bool
	UnmatchedEntities []models.UnmatchedEntity
	CloseWildcards    bool
	CloseUnmatched    bool
	TextChunksMatched int
	IntentSentence    *expression.Sentence
	IntentData        *intents.IntentData
	Captures          []*models.MatchCapture
	RuleDepth         int             // 扩展规则递归深度
	CancelCtx         context.Context // 用于取消/超时控制
}

MatchContext represents the context passed to MatchExpression

func MatchExpression

func MatchExpression(settings *MatchSettings, context *MatchContext, expr expression.Expression) []*MatchContext

MatchExpression yields matching contexts for an expression

func NewMatchContext

func NewMatchContext(text string, closeWildcards, closeUnmatched bool) *MatchContext

NewMatchContext creates a new match context with post-initialization

func (*MatchContext) CopyContext

func (ctx *MatchContext) CopyContext(text string, isStartOfWord bool, closeWildcards, closeUnmatched bool) *MatchContext

CopyContext creates a copy of the context with modifications

func (*MatchContext) GetOpenEntity

func (ctx *MatchContext) GetOpenEntity() *models.UnmatchedTextEntity

GetOpenEntity returns the last open unmatched text entity or nil

func (*MatchContext) GetOpenWildcard

func (ctx *MatchContext) GetOpenWildcard() *models.MatchEntity

GetOpenWildcard returns the last open wildcard or nil

func (*MatchContext) IsMatch

func (ctx *MatchContext) IsMatch() bool

IsMatch returns true if no text is left that isn't just whitespace or punctuation

type MatchLimits

type MatchLimits struct {
	// Maximum contexts to generate (0 = use DefaultMaxContexts)
	MaxContexts int

	// Maximum wildcard matches (0 = use MaxWildcardMatches)
	MaxWildcardMatches int

	// Maximum recursion depth (0 = use MaxRuleRecursionDepth)
	MaxRecursionDepth int

	// Maximum results to return (0 = use MaxResultsToReturn)
	MaxResults int
}

MatchLimits holds configuration for limiting match generation Use this to customize behavior for specific use cases

func DefaultMatchLimits

func DefaultMatchLimits() MatchLimits

DefaultMatchLimits returns the default match limits

func StrictMatchLimits

func StrictMatchLimits() MatchLimits

StrictMatchLimits returns stricter limits for complex patterns Use this when dealing with patterns that could cause combinatorial explosion

type MatchSettings

type MatchSettings struct {
	SlotLists              map[string]intents.SlotList
	ExpansionRules         map[string]*expression.Sentence
	IgnoreWhitespace       bool
	AllowUnmatchedEntities bool
	Language               string
	MaxContexts            int // 限制生成的上下文数量,0表示无限制
}

MatchSettings contains settings used in MatchExpression

type RecognizeResult

type RecognizeResult struct {
	Intent                *intents.Intent
	IntentData            *intents.IntentData
	Entities              map[string]*models.MatchEntity
	EntitiesList          []*models.MatchEntity
	Response              string
	Context               map[string]interface{}
	UnmatchedEntities     map[string]models.UnmatchedEntity
	UnmatchedEntitiesList []models.UnmatchedEntity
	TextChunksMatched     int
	IntentSentence        *expression.Sentence
	IntentMetadata        map[string]interface{}
	Captures              map[string]*models.MatchCapture
	CapturesList          []*models.MatchCapture
}

RecognizeResult represents the result of intent recognition

func Recognize

func Recognize(
	text string,
	intentsObj *intents.Intents,
	slotLists map[string]intents.SlotList,
	expansionRules map[string]*expression.Sentence,
	skipWords []string,
	intentContext map[string]interface{},
	defaultResponse string,
	allowUnmatchedEntities bool,
	language string,
) *RecognizeResult

Recognize returns the first match of input text against a collection of intents

func RecognizeBest

func RecognizeBest(
	text string,
	intentsObj *intents.Intents,
	slotLists map[string]intents.SlotList,
	expansionRules map[string]*expression.Sentence,
	skipWords []string,
	intentContext map[string]interface{},
	defaultResponse string,
	allowUnmatchedEntities bool,
	language string,
) *RecognizeResult

RecognizeBest returns the best match with the following priorities: 1. The result that has "bestMetadataKey" in its metadata (if provided) 2. The result that has an entity for "bestSlotName" and longest text (if provided) 3. The result that matches the most literal text

func RecognizeBestWithOptions

func RecognizeBestWithOptions(
	text string,
	intentsObj *intents.Intents,
	slotLists map[string]intents.SlotList,
	expansionRules map[string]*expression.Sentence,
	skipWords []string,
	intentContext map[string]interface{},
	defaultResponse string,
	allowUnmatchedEntities bool,
	language string,
	bestMetadataKey string,
	bestSlotName string,
) *RecognizeResult

RecognizeBestWithOptions returns the best match with customizable prioritization

func RecognizeWithContext

func RecognizeWithContext(
	ctx context.Context,
	text string,
	intentsObj *intents.Intents,
	slotLists map[string]intents.SlotList,
	expansionRules map[string]*expression.Sentence,
	skipWords []string,
	intentContext map[string]interface{},
	defaultResponse string,
	allowUnmatchedEntities bool,
	language string,
) *RecognizeResult

RecognizeWithContext returns the first match with a context for cancellation/timeout

type ResultCache

type ResultCache struct {
	// contains filtered or unexported fields
}

ResultCache LRU 缓存,用于缓存识别结果

func NewResultCache

func NewResultCache(capacity int) *ResultCache

NewResultCache 创建新的结果缓存

func (*ResultCache) Clear

func (c *ResultCache) Clear()

Clear 清空缓存

func (*ResultCache) Get

func (c *ResultCache) Get(key CacheKey) (*RecognizeResult, bool)

Get 从缓存获取结果

func (*ResultCache) Put

func (c *ResultCache) Put(key CacheKey, result *RecognizeResult)

Put 将结果放入缓存

func (*ResultCache) Stats

func (c *ResultCache) Stats() (hits, misses uint64, size int)

Stats 返回缓存统计信息

type SlotNormalizer

type SlotNormalizer struct {
	// contains filtered or unexported fields
}

SlotNormalizer applies language-specific normalization to slot values

func NewSlotNormalizer

func NewSlotNormalizer(config *LanguageConfig) *SlotNormalizer

NewSlotNormalizer creates a new SlotNormalizer with the given language configuration

func (*SlotNormalizer) NormalizeSlotValue

func (sn *SlotNormalizer) NormalizeSlotValue(slotName, rawValue string) string

NormalizeSlotValue applies appropriate normalization based on slot type

Different slot types require different normalization strategies:

  • area/floor: Language-specific normalization (remove case suffixes, particles, etc.)
  • name: Light normalization only (trim whitespace, optionally remove particles)
  • other: No normalization (keep as-is)

This separation prevents errors like applying Hungarian case suffix removal to entity names like "telefon" (which would incorrectly become "telef").

type TextSlicer

type TextSlicer struct {
	// contains filtered or unexported fields
}

TextSlicer handles language-aware text extraction from matched patterns

func NewTextSlicer

func NewTextSlicer(config *LanguageConfig) *TextSlicer

NewTextSlicer creates a new TextSlicer with the given language configuration

func (*TextSlicer) ExtractMatchedText

func (ts *TextSlicer) ExtractMatchedText(remainingText, matchText string) string

ExtractMatchedText extracts the portion of text that was matched by a slot

Parameters:

  • remainingText: The original text being matched (may contain whitespace)
  • matchText: The text after slot matching (whitespace may be removed if IgnoreWhitespace=true)

Returns: The portion of remainingText that was consumed by the slot

type TextSlotListFSTCache

type TextSlotListFSTCache struct {
	// contains filtered or unexported fields
}

TextSlotListFSTCache caches FST representations of TextSlotLists for fast exact matching

func GetGlobalTextSlotCache

func GetGlobalTextSlotCache() *TextSlotListFSTCache

GetGlobalTextSlotCache returns the global cache instance

func (*TextSlotListFSTCache) Clear

func (c *TextSlotListFSTCache) Clear()

Clear clears the cache

func (*TextSlotListFSTCache) GetCachedTextSlotList

func (c *TextSlotListFSTCache) GetCachedTextSlotList(list *intents.TextSlotList, settings *MatchSettings) *CachedTextSlotList

GetCachedTextSlotList returns a cached FST version of the TextSlotList

type TextSlotMatchCandidate

type TextSlotMatchCandidate struct {
	Value     *intents.TextSlotValue
	MatchText string // The text that matched
}

TextSlotMatchCandidate represents a candidate match from FST

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL