Documentation
¶
Index ¶
- Constants
- Variables
- func ClearGlobalTextSlotCache()
- func HashContext(ctx map[string]interface{}) string
- func IsMatch(text string, intentsObj *intents.Intents, ...) bool
- func RecognizeAll(text string, intentsObj *intents.Intents, ...) chan *RecognizeResult
- func RecognizeAllWithContext(ctx context.Context, text string, intentsObj *intents.Intents, ...) chan *RecognizeResult
- func SetMinValuesForFST(minValues int)
- type CacheKey
- type CachedTextSlotList
- func (c *CachedTextSlotList) CanUseFST() bool
- func (c *CachedTextSlotList) FastMatch(text string, settings *MatchSettings) []int
- func (c *CachedTextSlotList) FindCandidates(text string, settings *MatchSettings) []TextSlotMatchCandidate
- func (c *CachedTextSlotList) GetValue(index int) *intents.TextSlotValue
- func (c *CachedTextSlotList) Size() int
- type LanguageConfig
- type MatchContext
- type MatchLimits
- type MatchSettings
- type RecognizeResult
- func Recognize(text string, intentsObj *intents.Intents, ...) *RecognizeResult
- func RecognizeBest(text string, intentsObj *intents.Intents, ...) *RecognizeResult
- func RecognizeBestWithOptions(text string, intentsObj *intents.Intents, ...) *RecognizeResult
- func RecognizeWithContext(ctx context.Context, text string, intentsObj *intents.Intents, ...) *RecognizeResult
- type ResultCache
- type SlotNormalizer
- type TextSlicer
- type TextSlotListFSTCache
- type TextSlotMatchCandidate
Constants ¶
const ( // Default timeout for recognition operations DefaultRecognitionTimeout = 10 * time.Second // Maximum number of contexts to generate during matching // This prevents combinatorial explosion in complex patterns DefaultMaxContexts = 10000 // Maximum contexts for rules containing permutations // Permutations can create exponential combinations, so we limit them more strictly MaxContextsForPermutations = 100 // Maximum number of wildcard matches to generate // Wildcards can match in many positions, this limits the search space MaxWildcardMatches = 100 // Interval for checking cancellation during list matching // Check every N values to balance responsiveness and overhead CancellationCheckInterval = 100 // Maximum recursion depth for rule references // Prevents stack overflow from circular or deeply nested rules MaxRuleRecursionDepth = 20 // Maximum number of matches per sentence // Limits memory usage for sentences that generate many matches MaxMatchesPerSentence = 100 // Maximum number of results to return // Prevents excessive memory usage for queries with many matches MaxResultsToReturn = 1000 // Default cache size for global result cache DefaultCacheSize = 500 // Minimum values count to use FST (Finite State Transducer) in benchmarks MinValuesForFST = 100000 )
Configuration constants for recognition behavior
const ( // PriorityNameFullMatch: Complete entity name match (highest priority) // Example: User says "living room light" and we have an entity named "living room light" PriorityNameFullMatch = 1000 // PriorityAreaWithDeviceClass: Area + device_class combination // Example: "living room" + "light" device class PriorityAreaWithDeviceClass = 500 // PriorityDomainWithContext: Domain with additional context fields // Example: "light" domain with area or other context PriorityDomainWithContext = 300 // PriorityDomainOnly: Domain only (no additional context) // Example: Just "light" domain PriorityDomainOnly = 200 // PriorityAreaOnly: Area only (lowest priority for entity matching) // Example: Just "living room" area PriorityAreaOnly = 100 )
Slot match quality priorities These scores determine which entity match is considered "better" when multiple entities could match the same slot
const ( // Bonus for domain + fixed pattern matching // Example: domain with device_class in a fixed pattern BonusDomainFixedPattern = 50 // Bonus for area + device_class + fixed pattern // Example: area, device_class, and name all in fixed positions BonusAreaDeviceClassFixed = 100 )
Domain match priority bonuses These are added to base priorities for specific matching patterns
const ( // Priority for domain + device_class in fixed pattern // Base: PriorityNameFullMatch + BonusDomainFixedPattern PriorityDomainFixedPattern = PriorityNameFullMatch + BonusDomainFixedPattern // 1050 // Priority for area + device_class + fixed pattern // Base: PriorityAreaWithDeviceClass + BonusAreaDeviceClassFixed PriorityAreaDeviceClassFixed = PriorityAreaWithDeviceClass + BonusAreaDeviceClassFixed // 600 )
Derived priorities (for clarity and maintainability)
const MissingEntity = "<missing>"
Variables ¶
var ( // DebugMode 控制是否输出调试信息 DebugMode = os.Getenv("HASSIL_DEBUG") != "" // EnableResultCache 控制是否启用结果缓存(默认关闭) // 在高重复请求的场景下启用可以获得显著的性能提升(100x+) // 但会增加内存使用,且在请求多样化的场景下收益有限 EnableResultCache = false )
var GlobalResultCache = NewResultCache(500)
GlobalResultCache 全局结果缓存实例
var LanguageConfigs = map[string]*LanguageConfig{ "zh-CN": { UseRuneSlicing: true, IgnoreWhitespace: true, AreaNormalizer: normalizeChineseArea, NameNormalizer: normalizeChineseName, }, "zh-TW": { UseRuneSlicing: true, IgnoreWhitespace: true, AreaNormalizer: normalizeChineseArea, NameNormalizer: normalizeChineseName, }, "zh-HK": { UseRuneSlicing: true, IgnoreWhitespace: true, AreaNormalizer: normalizeChineseArea, NameNormalizer: normalizeChineseName, }, "ja": { UseRuneSlicing: true, IgnoreWhitespace: false, AreaNormalizer: trimWhitespaceOnly, NameNormalizer: trimWhitespaceOnly, }, "ko": { UseRuneSlicing: true, IgnoreWhitespace: false, AreaNormalizer: trimWhitespaceOnly, NameNormalizer: trimWhitespaceOnly, }, "ar": { UseRuneSlicing: true, IgnoreWhitespace: false, AreaNormalizer: trimWhitespaceOnly, NameNormalizer: trimWhitespaceOnly, }, "he": { UseRuneSlicing: true, IgnoreWhitespace: false, AreaNormalizer: trimWhitespaceOnly, NameNormalizer: trimWhitespaceOnly, }, "th": { UseRuneSlicing: true, IgnoreWhitespace: false, AreaNormalizer: trimWhitespaceOnly, NameNormalizer: trimWhitespaceOnly, }, "hu": { UseRuneSlicing: false, IgnoreWhitespace: false, AreaNormalizer: normalizeHungarianArea, NameNormalizer: trimWhitespaceOnly, }, "fi": { UseRuneSlicing: false, IgnoreWhitespace: false, AreaNormalizer: normalizeFinnishArea, NameNormalizer: trimWhitespaceOnly, }, "hr": { UseRuneSlicing: true, IgnoreWhitespace: false, AreaNormalizer: trimWhitespaceOnly, NameNormalizer: trimWhitespaceOnly, }, "sl": { UseRuneSlicing: true, IgnoreWhitespace: false, AreaNormalizer: trimWhitespaceOnly, NameNormalizer: trimWhitespaceOnly, }, "el": { UseRuneSlicing: true, IgnoreWhitespace: false, AreaNormalizer: trimWhitespaceOnly, NameNormalizer: trimWhitespaceOnly, }, "ru": { UseRuneSlicing: true, IgnoreWhitespace: false, AreaNormalizer: trimWhitespaceOnly, NameNormalizer: trimWhitespaceOnly, }, "vi": { UseRuneSlicing: true, IgnoreWhitespace: false, AreaNormalizer: trimWhitespaceOnly, NameNormalizer: trimWhitespaceOnly, }, "ne": { UseRuneSlicing: true, IgnoreWhitespace: false, AreaNormalizer: trimWhitespaceOnly, NameNormalizer: trimWhitespaceOnly, }, "pl": { UseRuneSlicing: true, IgnoreWhitespace: false, AreaNormalizer: trimWhitespaceOnly, NameNormalizer: trimWhitespaceOnly, }, "lt": { UseRuneSlicing: true, IgnoreWhitespace: false, AreaNormalizer: trimWhitespaceOnly, NameNormalizer: trimWhitespaceOnly, }, "nb": { UseRuneSlicing: true, IgnoreWhitespace: false, AreaNormalizer: trimWhitespaceOnly, NameNormalizer: trimWhitespaceOnly, }, "da": { UseRuneSlicing: true, IgnoreWhitespace: false, AreaNormalizer: trimWhitespaceOnly, NameNormalizer: trimWhitespaceOnly, }, "sv": { UseRuneSlicing: true, IgnoreWhitespace: false, AreaNormalizer: trimWhitespaceOnly, NameNormalizer: trimWhitespaceOnly, }, "is": { UseRuneSlicing: true, IgnoreWhitespace: false, AreaNormalizer: trimWhitespaceOnly, NameNormalizer: trimWhitespaceOnly, }, "it": { UseRuneSlicing: true, IgnoreWhitespace: false, AreaNormalizer: trimWhitespaceOnly, NameNormalizer: trimWhitespaceOnly, }, "pt-BR": { UseRuneSlicing: true, IgnoreWhitespace: false, AreaNormalizer: trimWhitespaceOnly, NameNormalizer: trimWhitespaceOnly, }, "de": { UseRuneSlicing: true, IgnoreWhitespace: false, AreaNormalizer: trimWhitespaceOnly, NameNormalizer: trimWhitespaceOnly, }, "de-CH": { UseRuneSlicing: true, IgnoreWhitespace: false, AreaNormalizer: trimWhitespaceOnly, NameNormalizer: trimWhitespaceOnly, }, "es": { UseRuneSlicing: true, IgnoreWhitespace: false, AreaNormalizer: trimWhitespaceOnly, NameNormalizer: trimWhitespaceOnly, }, "sk": { UseRuneSlicing: true, IgnoreWhitespace: false, AreaNormalizer: trimWhitespaceOnly, NameNormalizer: trimWhitespaceOnly, }, "sr-Latn": { UseRuneSlicing: true, IgnoreWhitespace: false, AreaNormalizer: trimWhitespaceOnly, NameNormalizer: trimWhitespaceOnly, }, }
Global language configuration registry
Functions ¶
func ClearGlobalTextSlotCache ¶
func ClearGlobalTextSlotCache()
ClearGlobalTextSlotCache clears the global cache
func IsMatch ¶
func IsMatch( text string, intentsObj *intents.Intents, slotLists map[string]intents.SlotList, expansionRules map[string]*expression.Sentence, skipWords []string, intentContext map[string]interface{}, language string, ) bool
IsMatch checks if text matches any intent
func RecognizeAll ¶
func RecognizeAll( text string, intentsObj *intents.Intents, slotLists map[string]intents.SlotList, expansionRules map[string]*expression.Sentence, skipWords []string, intentContext map[string]interface{}, defaultResponse string, allowUnmatchedEntities bool, language string, ) chan *RecognizeResult
RecognizeAll yields all matches of input text against a collection of intents
func RecognizeAllWithContext ¶
func RecognizeAllWithContext( ctx context.Context, text string, intentsObj *intents.Intents, slotLists map[string]intents.SlotList, expansionRules map[string]*expression.Sentence, skipWords []string, intentContext map[string]interface{}, defaultResponse string, allowUnmatchedEntities bool, language string, ) chan *RecognizeResult
RecognizeAllWithContext yields all matches with context support
func SetMinValuesForFST ¶
func SetMinValuesForFST(minValues int)
SetMinValuesForFST sets the minimum number of values to enable FST optimization
Types ¶
type CachedTextSlotList ¶
type CachedTextSlotList struct {
// contains filtered or unexported fields
}
CachedTextSlotList represents a cached FST version of a TextSlotList
func (*CachedTextSlotList) CanUseFST ¶
func (c *CachedTextSlotList) CanUseFST() bool
CanUseFST returns whether FST optimization is available
func (*CachedTextSlotList) FastMatch ¶
func (c *CachedTextSlotList) FastMatch(text string, settings *MatchSettings) []int
FastMatch attempts to use FST for fast exact matching Returns matched value indices, or nil if no match
func (*CachedTextSlotList) FindCandidates ¶
func (c *CachedTextSlotList) FindCandidates(text string, settings *MatchSettings) []TextSlotMatchCandidate
FindCandidates finds all possible candidates from the text using FST This is more efficient than trying each value individually
func (*CachedTextSlotList) GetValue ¶
func (c *CachedTextSlotList) GetValue(index int) *intents.TextSlotValue
GetValue returns the value at given index
func (*CachedTextSlotList) Size ¶
func (c *CachedTextSlotList) Size() int
Size returns the number of values
type LanguageConfig ¶
type LanguageConfig struct {
// String processing strategy
UseRuneSlicing bool // Whether to use rune-based slicing (for multi-byte languages)
IgnoreWhitespace bool // Whether whitespace is ignored in matching
// Normalization functions for different slot types
AreaNormalizer func(string) string // For area/floor slots
NameNormalizer func(string) string // For name slots
}
LanguageConfig defines language-specific processing strategies
func GetLanguageConfig ¶
func GetLanguageConfig(language string) *LanguageConfig
GetLanguageConfig returns the configuration for a given language Falls back to default ASCII config if language not found
type MatchContext ¶
type MatchContext struct {
Text string
Entities []*models.MatchEntity
IntentContext map[string]interface{}
IsStartOfWord bool
UnmatchedEntities []models.UnmatchedEntity
CloseWildcards bool
CloseUnmatched bool
TextChunksMatched int
IntentSentence *expression.Sentence
IntentData *intents.IntentData
Captures []*models.MatchCapture
RuleDepth int // 扩展规则递归深度
CancelCtx context.Context // 用于取消/超时控制
}
MatchContext represents the context passed to MatchExpression
func MatchExpression ¶
func MatchExpression(settings *MatchSettings, context *MatchContext, expr expression.Expression) []*MatchContext
MatchExpression yields matching contexts for an expression
func NewMatchContext ¶
func NewMatchContext(text string, closeWildcards, closeUnmatched bool) *MatchContext
NewMatchContext creates a new match context with post-initialization
func (*MatchContext) CopyContext ¶
func (ctx *MatchContext) CopyContext(text string, isStartOfWord bool, closeWildcards, closeUnmatched bool) *MatchContext
CopyContext creates a copy of the context with modifications
func (*MatchContext) GetOpenEntity ¶
func (ctx *MatchContext) GetOpenEntity() *models.UnmatchedTextEntity
GetOpenEntity returns the last open unmatched text entity or nil
func (*MatchContext) GetOpenWildcard ¶
func (ctx *MatchContext) GetOpenWildcard() *models.MatchEntity
GetOpenWildcard returns the last open wildcard or nil
func (*MatchContext) IsMatch ¶
func (ctx *MatchContext) IsMatch() bool
IsMatch returns true if no text is left that isn't just whitespace or punctuation
type MatchLimits ¶
type MatchLimits struct {
// Maximum contexts to generate (0 = use DefaultMaxContexts)
MaxContexts int
// Maximum wildcard matches (0 = use MaxWildcardMatches)
MaxWildcardMatches int
// Maximum recursion depth (0 = use MaxRuleRecursionDepth)
MaxRecursionDepth int
// Maximum results to return (0 = use MaxResultsToReturn)
MaxResults int
}
MatchLimits holds configuration for limiting match generation Use this to customize behavior for specific use cases
func DefaultMatchLimits ¶
func DefaultMatchLimits() MatchLimits
DefaultMatchLimits returns the default match limits
func StrictMatchLimits ¶
func StrictMatchLimits() MatchLimits
StrictMatchLimits returns stricter limits for complex patterns Use this when dealing with patterns that could cause combinatorial explosion
type MatchSettings ¶
type MatchSettings struct {
SlotLists map[string]intents.SlotList
ExpansionRules map[string]*expression.Sentence
IgnoreWhitespace bool
AllowUnmatchedEntities bool
Language string
MaxContexts int // 限制生成的上下文数量,0表示无限制
}
MatchSettings contains settings used in MatchExpression
type RecognizeResult ¶
type RecognizeResult struct {
Intent *intents.Intent
IntentData *intents.IntentData
Entities map[string]*models.MatchEntity
EntitiesList []*models.MatchEntity
Response string
Context map[string]interface{}
UnmatchedEntities map[string]models.UnmatchedEntity
UnmatchedEntitiesList []models.UnmatchedEntity
TextChunksMatched int
IntentSentence *expression.Sentence
IntentMetadata map[string]interface{}
Captures map[string]*models.MatchCapture
CapturesList []*models.MatchCapture
}
RecognizeResult represents the result of intent recognition
func Recognize ¶
func Recognize( text string, intentsObj *intents.Intents, slotLists map[string]intents.SlotList, expansionRules map[string]*expression.Sentence, skipWords []string, intentContext map[string]interface{}, defaultResponse string, allowUnmatchedEntities bool, language string, ) *RecognizeResult
Recognize returns the first match of input text against a collection of intents
func RecognizeBest ¶
func RecognizeBest( text string, intentsObj *intents.Intents, slotLists map[string]intents.SlotList, expansionRules map[string]*expression.Sentence, skipWords []string, intentContext map[string]interface{}, defaultResponse string, allowUnmatchedEntities bool, language string, ) *RecognizeResult
RecognizeBest returns the best match with the following priorities: 1. The result that has "bestMetadataKey" in its metadata (if provided) 2. The result that has an entity for "bestSlotName" and longest text (if provided) 3. The result that matches the most literal text
func RecognizeBestWithOptions ¶
func RecognizeBestWithOptions( text string, intentsObj *intents.Intents, slotLists map[string]intents.SlotList, expansionRules map[string]*expression.Sentence, skipWords []string, intentContext map[string]interface{}, defaultResponse string, allowUnmatchedEntities bool, language string, bestMetadataKey string, bestSlotName string, ) *RecognizeResult
RecognizeBestWithOptions returns the best match with customizable prioritization
func RecognizeWithContext ¶
func RecognizeWithContext( ctx context.Context, text string, intentsObj *intents.Intents, slotLists map[string]intents.SlotList, expansionRules map[string]*expression.Sentence, skipWords []string, intentContext map[string]interface{}, defaultResponse string, allowUnmatchedEntities bool, language string, ) *RecognizeResult
RecognizeWithContext returns the first match with a context for cancellation/timeout
type ResultCache ¶
type ResultCache struct {
// contains filtered or unexported fields
}
ResultCache LRU 缓存,用于缓存识别结果
func (*ResultCache) Get ¶
func (c *ResultCache) Get(key CacheKey) (*RecognizeResult, bool)
Get 从缓存获取结果
func (*ResultCache) Put ¶
func (c *ResultCache) Put(key CacheKey, result *RecognizeResult)
Put 将结果放入缓存
func (*ResultCache) Stats ¶
func (c *ResultCache) Stats() (hits, misses uint64, size int)
Stats 返回缓存统计信息
type SlotNormalizer ¶
type SlotNormalizer struct {
// contains filtered or unexported fields
}
SlotNormalizer applies language-specific normalization to slot values
func NewSlotNormalizer ¶
func NewSlotNormalizer(config *LanguageConfig) *SlotNormalizer
NewSlotNormalizer creates a new SlotNormalizer with the given language configuration
func (*SlotNormalizer) NormalizeSlotValue ¶
func (sn *SlotNormalizer) NormalizeSlotValue(slotName, rawValue string) string
NormalizeSlotValue applies appropriate normalization based on slot type
Different slot types require different normalization strategies:
- area/floor: Language-specific normalization (remove case suffixes, particles, etc.)
- name: Light normalization only (trim whitespace, optionally remove particles)
- other: No normalization (keep as-is)
This separation prevents errors like applying Hungarian case suffix removal to entity names like "telefon" (which would incorrectly become "telef").
type TextSlicer ¶
type TextSlicer struct {
// contains filtered or unexported fields
}
TextSlicer handles language-aware text extraction from matched patterns
func NewTextSlicer ¶
func NewTextSlicer(config *LanguageConfig) *TextSlicer
NewTextSlicer creates a new TextSlicer with the given language configuration
func (*TextSlicer) ExtractMatchedText ¶
func (ts *TextSlicer) ExtractMatchedText(remainingText, matchText string) string
ExtractMatchedText extracts the portion of text that was matched by a slot
Parameters:
- remainingText: The original text being matched (may contain whitespace)
- matchText: The text after slot matching (whitespace may be removed if IgnoreWhitespace=true)
Returns: The portion of remainingText that was consumed by the slot
type TextSlotListFSTCache ¶
type TextSlotListFSTCache struct {
// contains filtered or unexported fields
}
TextSlotListFSTCache caches FST representations of TextSlotLists for fast exact matching
func GetGlobalTextSlotCache ¶
func GetGlobalTextSlotCache() *TextSlotListFSTCache
GetGlobalTextSlotCache returns the global cache instance
func (*TextSlotListFSTCache) GetCachedTextSlotList ¶
func (c *TextSlotListFSTCache) GetCachedTextSlotList(list *intents.TextSlotList, settings *MatchSettings) *CachedTextSlotList
GetCachedTextSlotList returns a cached FST version of the TextSlotList
type TextSlotMatchCandidate ¶
type TextSlotMatchCandidate struct {
Value *intents.TextSlotValue
MatchText string // The text that matched
}
TextSlotMatchCandidate represents a candidate match from FST