context

package

v0.14.0 Latest Latest Go to latest Published: Jun 3, 2026 License: MIT Imports: 20 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/blackwell-systems/knowing

Links

Open Source Insights

Documentation ¶

Overview ¶

Package context implements graph-aware context packing for AI agent consumption.

Package context provides equivalence class retrieval for bridging the vocabulary gap between natural-language task descriptions and code symbol names.

An equivalence class maps a concept (like "TRANSITIVE_IMPACT") to multiple phrases that developers use to describe it ("blast radius", "impact analysis", "downstream callers") and the specific symbols/tools those phrases should resolve to ("TransitiveCallers", "BlastRadius", "blast_radius").

Index ¶

Variables
func BuildAdjacencyCache(ctx stdctx.Context, store types.GraphStore) error
func BuildFTSQueryExported(keywords []string) string
func CommunityFilteredRWR(ctx stdctx.Context, store types.GraphStore, seeds []types.Hash, alpha float64, ...) (map[types.Hash]float64, error)
func ComputeHITS(ctx stdctx.Context, store types.GraphStore, nodes []types.Hash, maxIter int) (map[types.Hash]HITSScores, error)
func DecomposeCompoundsExported(keywords []string) string
func DecomposeCompoundsTargetedExported(keywords []string) string
func DetectRepoLanguageExported(ctx stdctx.Context, store types.GraphStore) string
func EstimateNodeTokens(n types.Node) int
func EstimateNodeTokensForFormat(n types.Node, format string) int
func EstimateTokens(text string) int
func ExtractPathTermsExported(desc string) []string
func FormatContextBlock(block *ContextBlock, format string) (string, error)
func IsVocabWorthy(kw string) bool
func NormalizeKeywords(taskDescription string) string
func RWRCacheStats() (hits, misses int64)
func RandomWalkWithRestart(ctx stdctx.Context, store types.GraphStore, seeds []types.Hash, alpha float64, ...) (map[types.Hash]float64, error)
func RandomWalkWithRestartWeighted(ctx stdctx.Context, store types.GraphStore, seeds []types.Hash, ...) (map[types.Hash]float64, map[types.Hash]int, error)
func SetSweepParams(p SweepParams)
type AutoConceptGenerator
- func NewAutoConceptGenerator(store types.GraphStore) *AutoConceptGenerator
- func (g *AutoConceptGenerator) Generate(ctx stdctx.Context) []EquivalenceClass
type BM25Searcher
type ContextBlock
- func PackIntoBudget(ranked []RankedSymbol, budget int, format string) *ContextBlock
type ContextEdge
type ContextEngine
- func NewContextEngine(store types.GraphStore) *ContextEngine
- func (e *ContextEngine) DisablePersistentCache()
- func (e *ContextEngine) ExplainSymbol(ctx stdctx.Context, task string, symbolQuery string) (*ExplainResult, error)
- func (e *ContextEngine) ForFiles(ctx stdctx.Context, opts FileOptions) (*ContextBlock, error)
- func (e *ContextEngine) ForPR(ctx stdctx.Context, opts PROptions) (*ContextBlock, error)
- func (e *ContextEngine) ForTask(ctx stdctx.Context, opts TaskOptions) (*ContextBlock, error)
- func (e *ContextEngine) SetCache(c *cache.SubgraphCache)
- func (e *ContextEngine) SetImplicitFeedback(f *ImplicitFeedback)
- func (e *ContextEngine) SetNodeCount(n int)
- func (e *ContextEngine) SetSession(st *SessionTracker)
- func (e *ContextEngine) SetTaskMemory(tm *TaskMemory)
- func (e *ContextEngine) SetVector(vs VectorSearcher)
type EquivMatchExported
- func MatchEquivalenceClassesLangExported(task string, classes []EquivalenceClass, lang string) []EquivMatchExported
type EquivalenceClass
- func LanguageEquivalenceClassesExported() []EquivalenceClass
- func SeedEquivalenceClassesExported() []EquivalenceClass
- func UniversalEquivalenceClassesExported() []EquivalenceClass
type ExplainResult
type FeedbackProvider
type FeedbackRecorder
type FileOptions
type HITSScores
type ImplicitFeedback
- func NewImplicitFeedback() *ImplicitFeedback
- func (f *ImplicitFeedback) AttributedCount() int
- func (f *ImplicitFeedback) DetectUsed(content string) []types.Hash
- func (f *ImplicitFeedback) Expire() []types.Hash
- func (f *ImplicitFeedback) ExpireAndReport(onUnused func(hash types.Hash))
- func (f *ImplicitFeedback) FlushAll() (unused []types.Hash, used []UsedSymbol)
- func (f *ImplicitFeedback) FlushUnused() []types.Hash
- func (f *ImplicitFeedback) PendingCount() int
- func (f *ImplicitFeedback) RegisterReturned(symbols []RankedSymbol)
- func (f *ImplicitFeedback) Reset()
- func (f *ImplicitFeedback) UsedSymbolNames(hashes []types.Hash) []UsedSymbol
type KeywordSet
- func ExtractKeywordSet(desc string) KeywordSet
- func ExtractKeywordSetExported(desc string) KeywordSet
- func (ks KeywordSet) All() []string
- func (ks KeywordSet) IsEmpty() bool
- func (ks KeywordSet) Primary() []string
type PROptions
type PackDiff
- func CompareContextPacks(old, new *ContextBlock) PackDiff
type PendingAttribution
type RankedSymbol
- func RankSymbols(symbols []ScoringInput, hitsScores ...map[types.Hash]HITSScores) []RankedSymbol
type ScoreComponents
type ScoringInput
type SessionTracker
- func NewSessionTracker() *SessionTracker
- func (st *SessionTracker) Count() int
- func (st *SessionTracker) Record(hash types.Hash)
- func (st *SessionTracker) RecordBatch(hashes []types.Hash)
- func (st *SessionTracker) Reset()
- func (st *SessionTracker) SessionBoosts(hashes []types.Hash) map[types.Hash]float64
type StoreSearcher
type SweepParams
type TaskMemory
- func NewTaskMemory(db *sql.DB) *TaskMemory
- func (tm *TaskMemory) Count(ctx context.Context) int
- func (tm *TaskMemory) Recall(ctx context.Context, queryKeywords []string) (map[types.Hash]float64, error)
- func (tm *TaskMemory) Record(ctx context.Context, keywords string, symbolHash types.Hash, score float64) error
- func (tm *TaskMemory) RecordBatch(ctx context.Context, keywords string, symbolHashes []types.Hash, score float64) error
type TaskOptions
type UsedSymbol
type VectorReRanker
type VectorSearcher
type VocabProvider
type VocabProviderWithCounts
type VocabRecorder

Constants ¶

This section is empty.

Variables ¶

View Source

var AdaptiveDensity bool

AdaptiveDensity: when true, automatically enable hub dampening and type-seed preference based on graph density (nodes/edges ratio from the store). Dense graphs (>50K nodes or edges/nodes > 5) get hub dampening at threshold 50 and type-seed preference enabled. This eliminates the need for manual env var tuning.

View Source

var AdaptiveSeedCount bool

AdaptiveSeedCount, when true, increases maxSeeds based on GraphNodeCount. On large graphs (>40K nodes), more seeds compensate for higher disconnection rates where ground truth symbols are further from any individual seed. Default false; auto-enabled when AdaptiveDensity is true.

View Source

var BFSMaxDepth int

BFSMaxDepth controls the BFS expansion depth for adjacency map construction. Default 4. On dense graphs (>50K nodes), reducing to 2-3 limits how many nodes enter the RWR walk, preventing probability mass dilution. 0 means use default (4).

View Source

var CoherenceBonus = 0.0

CoherenceBonus controls the density boost for symbols that share a file with already-packed symbols. Range [0.0, 1.0]. At 0.0 (default), packing is purely density-ranked (current behavior). At 0.3, a symbol co-located with a packed symbol gets a 30% density boost, favoring coherent subgraphs over scattered high-scoring singletons. Set via BENCH_COHERENCE_BONUS for experiments.

View Source

var ExcludeEdgeTypes map[string]bool

ExcludeEdgeTypes is a set of edge types to exclude from adjacency map construction. When non-nil, edges of these types are skipped during BFS expansion AND during RWR iteration. Used for ablation studies (diagnosing which edge types cause dilution). Set via bench adapter or CLI; nil means all edge types are included.

View Source

var FeedbackNegWeight = 0.05

View Source

var FeedbackPosWeight = 0.25

FeedbackPosWeight controls how strongly positive feedback boosts symbol ranking. FeedbackNegWeight controls the negative penalty for symbols marked "not useful". Asymmetric: boost is stronger than penalty to avoid over-penalizing symbols that were incorrectly marked. Values tuned via automated sweep (TestFeedbackWeightSweep): 7x4 grid search found pos=0.25/neg=0.05 optimal (P@10 34%->44%, R@10 46%->60%).

View Source

var GraphNodeCount int

GraphNodeCount is set by the adapter/engine when the graph size is known. Used by AdaptiveDensity to decide thresholds. 0 means unknown.

View Source

var HubDampeningThreshold int

HubDampeningThreshold: after RWR, penalize nodes with in-degree above this threshold by dividing their score by sqrt(in-degree/threshold). 0 means disabled. Targets hub nodes that absorb probability regardless of query (Disposable, Event, etc.)

View Source

var PackStrategy string

PackStrategy controls the packing algorithm used by packIntoBudget. "density" (default): density-ranked with RWR proximity weighting. "file-grouped": group symbols by file, pack densest files first. "top-k": take highest-scored symbols until budget exhausted. Override with BENCH_PACK_STRATEGY env var.

View Source

var PreferTypeSeeds bool

PreferTypeSeeds: when true, BM25/tiered results are reordered to prioritize type/interface/class nodes over methods/functions as RWR seeds. On dense graphs, types are better seeds because RWR walks from them to methods via contains edges.

View Source

var RWRCacheEnabled = true

RWRCacheEnabled controls whether RWR result caching is active. Disable for benchmarks that need fresh walks every time.

View Source

var ReRankOriginalWeight = 0.0

ReRankOriginalWeight controls the blend between original RWR score and embedding similarity in the re-ranker. Range [0.0, 1.0]. Higher = more conservative (preserves original ranking/MRR). Lower = more aggressive re-ranking (better recall, worse MRR). Default 0.0 (pure re-rank by embedding similarity). Validated on full 167-task corpus: P@10 0.207 -> 0.242 (+17%), R@10 +18.3%, MRR +8.1%. All metrics improved. Set via BENCH_RERANK_WEIGHT for parameter sweep experiments.

Functions ¶

func BuildAdjacencyCache ¶ added in v0.8.0

func BuildAdjacencyCache(ctx stdctx.Context, store types.GraphStore) error

BuildAdjacencyCache builds the full adjacency map and stores it as a compact binary blob (base64-encoded) in the notes table. Format: [num_edges:4 LE] followed by num_edges records of [source:32][target:32][type_id:1] = 65 bytes per edge. Call after indexing. Subsequent RWR queries load this cache in one read instead of per-node edge queries.

func BuildFTSQueryExported ¶ added in v0.13.0

func BuildFTSQueryExported(keywords []string) string

BuildFTSQueryExported wraps buildFTSQuery for external use.

func CommunityFilteredRWR ¶ added in v0.7.0

func CommunityFilteredRWR(ctx stdctx.Context, store types.GraphStore, seeds []types.Hash, alpha float64, maxIter int, communityIDs map[int]bool) (map[types.Hash]float64, error)

CommunityFilteredRWR is like RandomWalkWithRestart but constrains the BFS adjacency pre-load to nodes in the specified communities. When communityIDs is nil, the walk is unconstrained (identical to RandomWalkWithRestart).

func ComputeHITS ¶

func ComputeHITS(ctx stdctx.Context, store types.GraphStore, nodes []types.Hash, maxIter int) (map[types.Hash]HITSScores, error)

ComputeHITS runs the HITS (Hyperlink-Induced Topic Search) algorithm on a subgraph defined by the given node hashes. It computes authority scores (nodes that are heavily pointed to) and hub scores (nodes that point to many authorities).

In the context of code graphs:

Authority = heavily called functions, core types, key interfaces
Hub = orchestrators, entry points, functions that wire things together

Parameters:

nodes: the subgraph to analyze (typically top-200 RWR results)
store: graph store for edge lookups
maxIter: iterations (5-10 is typical for convergence)

Returns a map from node hash to HITS scores.

func DecomposeCompoundsExported ¶ added in v0.14.0

func DecomposeCompoundsExported(keywords []string) string

DecomposeCompoundsExported wraps decomposeCompounds for external use.

func DecomposeCompoundsTargetedExported ¶ added in v0.14.0

func DecomposeCompoundsTargetedExported(keywords []string) string

DecomposeCompoundsTargetedExported wraps decomposeCompoundsTargeted for external use.

func DetectRepoLanguageExported ¶ added in v0.14.0

func DetectRepoLanguageExported(ctx stdctx.Context, store types.GraphStore) string

DetectRepoLanguageExported wraps detectRepoLanguage for external use.

func EstimateNodeTokens ¶

func EstimateNodeTokens(n types.Node) int

EstimateNodeTokens estimates the token cost of including a node's full representation in context output. Uses format-aware scaling when format is provided via EstimateNodeTokensForFormat.

func EstimateNodeTokensForFormat ¶ added in v0.2.0

func EstimateNodeTokensForFormat(n types.Node, format string) int

EstimateNodeTokensForFormat estimates token cost with format-aware scaling. GCF uses local IDs and positional encoding, producing ~84% fewer tokens than JSON for the same symbol data.

func EstimateTokens ¶

func EstimateTokens(text string) int

EstimateTokens returns an approximate token count for a given text string. Uses the heuristic that code averages ~4 characters per token.

func ExtractPathTermsExported ¶ added in v0.13.0

func ExtractPathTermsExported(desc string) []string

ExtractPathTermsExported wraps extractPathTerms for external use.

func FormatContextBlock ¶

func FormatContextBlock(block *ContextBlock, format string) (string, error)

FormatContextBlock renders a ContextBlock into the requested format. Supported formats: "xml" (default), "markdown", "json". Returns an error for unknown formats.

func IsVocabWorthy ¶ added in v0.14.0

func IsVocabWorthy(kw string) bool

IsVocabWorthy is the exported entry point for vocab filtering. Used by benchmark adapters to apply the same filtering as production.

func NormalizeKeywords ¶ added in v0.2.0

func NormalizeKeywords(taskDescription string) string

NormalizeKeywords extracts and normalizes keywords from a task description for storage and matching. Reuses the existing keyword extraction logic but returns a space-joined string suitable for LIKE matching.

func RWRCacheStats ¶ added in v0.14.0

func RWRCacheStats() (hits, misses int64)

RWRCacheStats returns the current hit/miss counts.

func RandomWalkWithRestart ¶

func RandomWalkWithRestart(ctx stdctx.Context, store types.GraphStore, seeds []types.Hash, alpha float64, maxIter int) (map[types.Hash]float64, error)

RandomWalkWithRestart computes relevance scores for all nodes reachable from the seed set by simulating random walks that restart at seed nodes with probability alpha. The stationary distribution assigns higher scores to nodes that are structurally close to the seeds and highly connected.

Parameters:

seeds: initial nodes to start walks from (uniform weight)
alpha: restart probability (0.2 means 20% chance of returning to a seed each step)
maxIter: maximum iterations (20 is typical for convergence)
store: graph store for edge lookups

Returns a map from node hash to relevance score (0.0 to 1.0, normalized). RandomWalkWithRestart runs RWR with uniform seed weights. For weighted seeds (prioritizing specific keywords), use RandomWalkWithRestartWeighted.

func RandomWalkWithRestartWeighted ¶ added in v0.7.1

func RandomWalkWithRestartWeighted(ctx stdctx.Context, store types.GraphStore, seeds []types.Hash, seedWeights map[types.Hash]float64, alpha float64, maxIter int) (map[types.Hash]float64, map[types.Hash]int, error)

RandomWalkWithRestartWeighted runs RWR with per-seed restart weights. Seeds with higher weights receive more probability mass on restart, causing the walk to spend more time in their neighborhood. This differentiates specific seeds (high weight) from generic ones (low weight).

Results are cached in the notes table keyed by (sorted seeds + weights + alpha + snapshot hash). On cache hit, the BFS, adjacency load, and iteration are skipped entirely. Cache misses compute fresh results and store them.

Weights are normalized to sum to 1.0 internally.

func SetSweepParams ¶ added in v0.8.0

func SetSweepParams(p SweepParams)

SetSweepParams sets the global sweep parameters for the retrieval pipeline. Pass a zero-value struct to reset to defaults.

Types ¶

type AutoConceptGenerator ¶ added in v0.2.0

type AutoConceptGenerator struct {
	// contains filtered or unexported fields
}

AutoGeneratedConcepts scans all symbols in the graph and generates equivalence classes from naming patterns. This provides repo-specific vocabulary without hand curation.

Strategies:

Handler pattern: "handleBlastRadius" -> concept "blast radius" -> target symbol
Tool pattern: "blastRadiusTool" -> concept "blast radius tool" -> target symbol
Package grouping: all symbols in "search/" -> concept phrases from package name
Composite names: "IncrementalReindex" -> "incremental reindex" as searchable phrase

func NewAutoConceptGenerator ¶ added in v0.2.0

func NewAutoConceptGenerator(store types.GraphStore) *AutoConceptGenerator

NewAutoConceptGenerator creates a generator backed by the given store.

func (*AutoConceptGenerator) Generate ¶ added in v0.2.0

func (g *AutoConceptGenerator) Generate(ctx stdctx.Context) []EquivalenceClass

Generate scans the graph and produces equivalence classes from symbol naming patterns.

type BM25Searcher ¶ added in v0.2.0

type BM25Searcher interface {
	SearchBM25Nodes(ctx stdctx.Context, query string, limit int) ([]types.Node, error)
}

BM25Searcher is implemented by stores that support full-text BM25 search. Returns nodes ordered by BM25 relevance (best matches first).

type ContextBlock ¶

type ContextBlock struct {
	Symbols     []RankedSymbol
	Edges       []ContextEdge
	Format      string
	TokensUsed  int
	TokenBudget int
	// PackRoot is the content-addressed identity of this context pack.
	// Computed from hash(task_normalized, snapshot_root, selected_node_hashes).
	// Two identical queries against the same graph state produce the same PackRoot,
	// enabling deduplication, citation, and cross-session replay.
	PackRoot types.Hash
}

ContextBlock is the result of a context query: a ranked list of symbols that fit within a token budget, plus the edges between them.

func PackIntoBudget ¶ added in v0.14.0

func PackIntoBudget(ranked []RankedSymbol, budget int, format string) *ContextBlock

PackIntoBudget is the exported entry point for the packing algorithm. Used by the context packing benchmark to compare strategies.

type ContextEdge ¶

type ContextEdge struct {
	Source   string // qualified name of source
	Target   string // qualified name of target
	EdgeType string
}

ContextEdge is an edge between two symbols in the context block.

type ContextEngine ¶

type ContextEngine struct {
	// contains filtered or unexported fields
}

ContextEngine queries the knowing knowledge graph to produce task-specific, token-budgeted context blocks ranked by graph relationships and runtime traffic.

func NewContextEngine ¶

func NewContextEngine(store types.GraphStore) *ContextEngine

NewContextEngine creates a ContextEngine backed by the given GraphStore. If the store implements FeedbackProvider, feedback-aware reranking is enabled.

func (*ContextEngine) DisablePersistentCache ¶ added in v0.10.0

func (e *ContextEngine) DisablePersistentCache()

DisablePersistentCache prevents the engine from reading/writing cached packs in the notes table. Used in benchmarks to ensure fresh retrieval on every query.

func (*ContextEngine) ExplainSymbol ¶ added in v0.2.0

func (e *ContextEngine) ExplainSymbol(ctx stdctx.Context, task string, symbolQuery string) (*ExplainResult, error)

ExplainSymbol runs the full retrieval pipeline for a task and returns a detailed scoring breakdown for a specific symbol. If the symbol is not in the results, it still returns whatever information is available (e.g., "not found in seed set, not reached by RWR").

func (*ContextEngine) ForFiles ¶

func (e *ContextEngine) ForFiles(ctx stdctx.Context, opts FileOptions) (*ContextBlock, error)

ForFiles produces blast-radius context weighted by runtime observations for a set of changed files.

func (*ContextEngine) ForPR ¶

func (e *ContextEngine) ForPR(ctx stdctx.Context, opts PROptions) (*ContextBlock, error)

ForPR produces relationship-aware context for a pull request. It identifies all symbols in the changed files, runs RWR from them to find the broader impact neighborhood, and includes blast radius (callers of changed symbols) as distance-1 context. This is the highest-value context call: one invocation at PR-open time surfaces the full structural impact.

func (*ContextEngine) ForTask ¶

func (e *ContextEngine) ForTask(ctx stdctx.Context, opts TaskOptions) (*ContextBlock, error)

ForTask produces ranked context for a task description by finding relevant symbols in the knowledge graph, scoring them, and packing them within the token budget.

func (*ContextEngine) SetCache ¶ added in v0.3.0

func (e *ContextEngine) SetCache(c *cache.SubgraphCache)

SetCache attaches a SubgraphCache for result memoization. When set, ForTask checks the cache before running retrieval and stores the result after a cache miss. Cache keys are derived from the normalized task description so that identical queries skip the full retrieval pipeline. Pass nil to disable caching.

func (*ContextEngine) SetImplicitFeedback ¶ added in v0.14.0

func (e *ContextEngine) SetImplicitFeedback(f *ImplicitFeedback)

SetImplicitFeedback attaches an implicit feedback tracker to the engine. When set, ForTask automatically flushes unused symbols from the previous call (recording negative feedback) and registers new returned symbols for attribution. This enables noise demotion: symbols returned but never used by the agent get penalized on future queries.

func (*ContextEngine) SetNodeCount ¶ added in v0.12.0

func (e *ContextEngine) SetNodeCount(n int)

SetNodeCount sets the per-engine node count for density-adaptive retrieval. When set (> 0), overrides the global GraphNodeCount. Thread-safe: each engine has its own field, no global mutation needed.

func (*ContextEngine) SetSession ¶ added in v0.2.0

func (e *ContextEngine) SetSession(st *SessionTracker)

SetSession attaches a session tracker to the engine. When set, symbols returned by previous queries in this session receive a boost on subsequent queries. Pass nil to disable session-aware boosting.

func (*ContextEngine) SetTaskMemory ¶ added in v0.2.0

func (e *ContextEngine) SetTaskMemory(tm *TaskMemory)

SetTaskMemory attaches a task memory for passive retrieval learning. When set, past task-symbol associations boost future queries with similar keywords.

func (*ContextEngine) SetVector ¶ added in v0.2.0

func (e *ContextEngine) SetVector(vs VectorSearcher)

SetVector attaches a vector search backend to the engine.

type EquivMatchExported ¶ added in v0.14.0

type EquivMatchExported struct {
	Class   EquivalenceClass
	Targets []string
}

EquivMatchExported is an exported version of equivalenceMatch for debug tools.

func MatchEquivalenceClassesLangExported ¶ added in v0.14.0

func MatchEquivalenceClassesLangExported(task string, classes []EquivalenceClass, lang string) []EquivMatchExported

MatchEquivalenceClassesLangExported wraps matchEquivalenceClassesLang for external use. Returns exported match structs.

type EquivalenceClass ¶ added in v0.2.0

type EquivalenceClass struct {
	Concept    string   // canonical concept ID (e.g., "TRANSITIVE_IMPACT")
	Phrases    []string // natural-language phrases that refer to this concept
	Targets    []string // symbol/tool identifiers to boost when phrases match
	TargetType string   // "symbol", "mcp_tool", "edge_type", "workflow", "file"
	Weight     float64  // source strength (seed: 1.0, graph: 0.7, feedback: 0.5)
	Source     string   // "seed", "graph", "feedback", "generated"
	Lang       string   // language scope: "go", "python", "typescript", "ruby", "java", "csharp", "rust", "" (universal)
}

EquivalenceClass maps a concept to its natural-language phrases and code targets.

func LanguageEquivalenceClassesExported ¶ added in v0.14.0

func LanguageEquivalenceClassesExported() []EquivalenceClass

LanguageEquivalenceClassesExported wraps languageEquivalenceClasses for external use.

func SeedEquivalenceClassesExported ¶ added in v0.14.0

func SeedEquivalenceClassesExported() []EquivalenceClass

SeedEquivalenceClassesExported wraps seedEquivalenceClasses for external use.

func UniversalEquivalenceClassesExported ¶ added in v0.14.0

func UniversalEquivalenceClassesExported() []EquivalenceClass

UniversalEquivalenceClassesExported wraps universalEquivalenceClasses for external use.

type ExplainResult ¶ added in v0.2.0

type ExplainResult struct {
	Symbol        types.Node
	Rank          int     // 1-indexed position in the ranked results
	TotalScore    float64 // final score after all components
	TotalSymbols  int     // total symbols considered
	Components    ScoreComponents
	HITSAuthority float64  // raw HITS authority score (0 if HITS not run)
	HITSHub       float64  // raw HITS hub score
	HITSAdjust    float64  // net HITS adjustment applied to total
	RWRScore      float64  // raw Random Walk with Restart score
	IsSeed        bool     // was this a direct keyword match (distance=0)?
	SeedChannel   string   // which channel found this symbol ("tiered", "bm25", "equiv", "rwr")
	SeedTier      string   // for tiered matches: "exact", "prefix", "substring", "path"
	EquivMatches  []string // equivalence classes that matched (concept names)
	Keywords      []string // extracted keywords from the task description
	MaxCallers    int      // max caller count in the candidate set (normalization denominator)
	CallerProxy   int      // RWR-derived caller proxy for this symbol
}

ExplainResult is the full scoring breakdown for a symbol in the context of a task query. Every field that contributed to the final score is exposed.

type FeedbackProvider ¶

type FeedbackProvider interface {
	FeedbackBoosts(ctx stdctx.Context, hashes []types.Hash, neighborhoodRoots map[types.Hash]types.Hash, cluster ...types.Hash) (map[types.Hash]float64, error)
}

FeedbackProvider is implemented by stores that support feedback queries.

type FeedbackRecorder ¶ added in v0.14.0

type FeedbackRecorder interface {
	RecordFeedback(ctx stdctx.Context, symbolHash types.Hash, sessionID string, useful bool, neighborhoodRoot types.Hash, cluster types.Hash) error
}

FeedbackRecorder writes feedback to persistent storage. Separated from FeedbackProvider (reads) so engines can record implicit feedback without depending on the full store interface.

type FileOptions ¶

type FileOptions struct {
	Files       []string // relative file paths
	RepoURL     string   // repo URL for resolving file hashes
	TokenBudget int      // default 50000
	Format      string   // "xml", "markdown", "json"
}

FileOptions configures a file-based context query.

type HITSScores ¶

type HITSScores struct {
	Authority float64
	Hub       float64
}

HITSScores holds the authority and hub scores for a node.

type ImplicitFeedback ¶ added in v0.7.1

type ImplicitFeedback struct {
	// contains filtered or unexported fields
}

ImplicitFeedback tracks symbols returned by context_for_task and detects when the agent subsequently uses them (in Edit tool calls, file references, etc). When a returned symbol is "used," positive feedback is auto-recorded.

This closes the feedback loop without requiring explicit agent cooperation: the agent just uses context naturally, and the system learns which symbols were actually useful.

Attribution window: symbols remain attributable for 10 minutes after being returned. After that, the association expires (the agent may be working on something else). A new context_for_task call resets the window.

func NewImplicitFeedback ¶ added in v0.7.1

func NewImplicitFeedback() *ImplicitFeedback

NewImplicitFeedback creates a new implicit feedback tracker.

func (*ImplicitFeedback) AttributedCount ¶ added in v0.7.1

func (f *ImplicitFeedback) AttributedCount() int

AttributedCount returns the number of symbols that have been implicitly attributed.

func (*ImplicitFeedback) DetectUsed ¶ added in v0.7.1

func (f *ImplicitFeedback) DetectUsed(content string) []types.Hash

DetectUsed scans tool call content (e.g., Edit old_string, file paths) for references to pending symbols. Returns the hashes of symbols that appear to have been used by the agent.

Detection strategy:

Extract identifiers from the content (CamelCase words, snake_case, dotted paths)
Match against the name index of pending symbols
Only match symbols within the attribution window
Skip symbols already attributed this session

func (*ImplicitFeedback) Expire ¶ added in v0.7.1

func (f *ImplicitFeedback) Expire() []types.Hash

Expire removes symbols that have exceeded the attribution window and returns the hashes of symbols that expired WITHOUT being used. These represent "returned but not useful" symbols that should receive negative feedback.

The negative signal is what makes implicit feedback work: it's not enough to boost used symbols; we must also penalize unused ones so they rank lower next time. Without this asymmetry, positive-only feedback doesn't shift rankings.

func (*ImplicitFeedback) ExpireAndReport ¶ added in v0.7.1

func (f *ImplicitFeedback) ExpireAndReport(onUnused func(hash types.Hash))

ExpireAndReport is like Expire but also accepts a callback for processing unused symbols. This avoids the caller needing to hold onto the returned slice when immediate processing is preferred.

func (*ImplicitFeedback) FlushAll ¶ added in v0.14.0

func (f *ImplicitFeedback) FlushAll() (unused []types.Hash, used []UsedSymbol)

FlushAll returns both unused and used symbols, then clears state. Used symbols include their short names for vocabulary association recording.

func (*ImplicitFeedback) FlushUnused ¶ added in v0.7.1

func (f *ImplicitFeedback) FlushUnused() []types.Hash

FlushUnused forces expiration of ALL pending symbols and returns unused ones. Use at end of a context_for_task call cycle (new query replaces old context) or at session end. This ensures timely negative feedback without waiting for the attribution window.

Also clears the attributed set for the flushed batch so the same symbols can be re-attributed in the next cycle. Each context_for_task call starts a fresh attribution cycle.

func (*ImplicitFeedback) PendingCount ¶ added in v0.7.1

func (f *ImplicitFeedback) PendingCount() int

PendingCount returns the number of symbols awaiting attribution.

func (*ImplicitFeedback) RegisterReturned ¶ added in v0.7.1

func (f *ImplicitFeedback) RegisterReturned(symbols []RankedSymbol)

RegisterReturned records symbols that were just returned by context_for_task. These become candidates for implicit attribution when the agent subsequently references them in tool calls.

func (*ImplicitFeedback) Reset ¶ added in v0.7.1

func (f *ImplicitFeedback) Reset()

Reset clears all pending attributions and the attributed set. Call when the session context shifts significantly.

func (*ImplicitFeedback) UsedSymbolNames ¶ added in v0.14.0

func (f *ImplicitFeedback) UsedSymbolNames(hashes []types.Hash) []UsedSymbol

UsedSymbolNames returns the short names for a set of symbol hashes by looking up their pending attributions. Used by MCP server to record vocab associations without needing to re-resolve hashes to names.

type KeywordSet ¶ added in v0.7.0

type KeywordSet struct {
	// Exact: backtick-quoted identifiers from the task description.
	// These are explicit symbol references (e.g., `before_request`).
	Exact []string
	// Compounds: multi-part identifiers detected by structure (snake_case,
	// CamelCase, dotted) or generated from bigram joining.
	Compounds []string
	// Components: individual words split from identifiers, abbreviation
	// expansions, and priority terms. Used as fallback when compounds
	// yield insufficient results.
	Components []string
}

KeywordSet separates extracted keywords by specificity tier. Tiered search queries these in priority order: Exact first, then Compounds, then Components only as fallback. This prevents split components like "before" and "request" from drowning out the actual compound "before_request".

func ExtractKeywordSet ¶ added in v0.8.0

func ExtractKeywordSet(desc string) KeywordSet

ExtractKeywordSet is the exported entry point for keyword extraction. Used by benchmarks and tooling that need structured access to extracted keywords.

func ExtractKeywordSetExported ¶ added in v0.13.0

func ExtractKeywordSetExported(desc string) KeywordSet

ExtractKeywordSetExported wraps extractKeywordSet for external use.

func (KeywordSet) All ¶ added in v0.7.0

func (ks KeywordSet) All() []string

All returns all keywords in priority order (exact, compounds, components). Used by callers that don't need structured access.

func (KeywordSet) IsEmpty ¶ added in v0.7.0

func (ks KeywordSet) IsEmpty() bool

IsEmpty returns true if no keywords were extracted.

func (KeywordSet) Primary ¶ added in v0.7.0

func (ks KeywordSet) Primary() []string

Primary returns the highest-priority keywords (exact + compounds). These should be queried first by tiered search.

type PROptions ¶

type PROptions struct {
	Files       []string // changed file paths (relative to repo root)
	RepoURL     string   // repo URL for resolving file hashes
	TokenBudget int      // default 8000 (larger than per-edit, used once per PR)
	Format      string   // "xml", "markdown", "json", "gcf"
}

PROptions configures a PR context query.

type PackDiff ¶ added in v0.4.0

type PackDiff struct {
	// OldPackRoot is the PackRoot of the first pack.
	OldPackRoot types.Hash
	// NewPackRoot is the PackRoot of the second pack.
	NewPackRoot types.Hash

	// AddedSymbols are in new but not in old.
	AddedSymbols []string
	// RemovedSymbols are in old but not in new.
	RemovedSymbols []string
	// CommonSymbols are in both.
	CommonSymbols []string

	// Identical is true if the packs have the same symbols (PackRoots may
	// still differ if token budgets differ).
	Identical bool
}

PackDiff describes the difference between two context packs.

func CompareContextPacks ¶ added in v0.4.0

func CompareContextPacks(old, new *ContextBlock) PackDiff

CompareContextPacks computes the symmetric difference between two context blocks. This answers "what changed in the context this agent would see?"

type PendingAttribution ¶ added in v0.7.1

type PendingAttribution struct {
	Hash       types.Hash
	Name       string // short name (last component of qualified name)
	QualName   string // full qualified name
	ReturnedAt time.Time
	Score      float64 // ranking score when returned (higher = more confident attribution)
}

PendingAttribution represents a symbol awaiting implicit attribution.

type RankedSymbol ¶

type RankedSymbol struct {
	Node        types.Node
	Score       float64
	Components  ScoreComponents
	Provenance  string
	Distance    int     // binary 0/1 for scoring
	BFSDistance int     // actual BFS hop count for proximity-weighted packing
	RWRScore    float64 // raw normalized RWR score (0-1), proxy for seed proximity in packing
}

RankedSymbol is a graph node paired with its computed relevance score and score breakdown.

func RankSymbols ¶

func RankSymbols(symbols []ScoringInput, hitsScores ...map[types.Hash]HITSScores) []RankedSymbol

RankSymbols scores each symbol by a weighted formula incorporating blast radius, confidence, recency, and graph distance, then returns them sorted by score descending. Blast radius is normalized relative to the max in the input set, ensuring the full 0.0-1.0 range is used regardless of codebase size.

If HITS scores are provided (non-nil map), authority scores are factored into the ranking, promoting structurally important nodes (heavily called) over leaf functions.

type ScoreComponents ¶

type ScoreComponents struct {
	BlastRadius float64
	Confidence  float64
	Recency     float64
	Distance    float64
	Feedback    float64
	Session     float64
}

ScoreComponents breaks down a symbol's score into its weighted components.

type ScoringInput ¶

type ScoringInput struct {
	Node               types.Node
	CallerCount        int     // number of transitive callers (blast radius)
	Confidence         float64 // provenance tier confidence (0.0-1.0)
	LastObserved       int64   // unix timestamp of last runtime observation (0 = static only)
	DistanceFromTarget int     // hops from the task target symbol (binary 0/1 for scoring)
	BFSDistance        int     // actual BFS hop count from nearest seed (for packing proximity)
	RWRScore           float64 // raw normalized RWR score (0-1), proxy for seed proximity
	FeedbackBoost      float64 // 0.0 = no feedback, >0 = positive signal (0.0-1.0)
	SessionBoost       float64 // 0.0 = not seen this session, >0 = recently accessed (0.0-2.0)
	IsTestFile         bool    // true if the symbol is from a test file (deprioritized unless task is about testing)
}

ScoringInput provides the raw data needed to compute a symbol's relevance score.

type SessionTracker ¶ added in v0.2.0

type SessionTracker struct {
	// contains filtered or unexported fields
}

SessionTracker records which symbols were returned by the context engine during the current session. Subsequent queries boost these symbols and their graph neighbors, implementing the "session-aware retrieval" pattern where repeated interactions surface increasingly relevant context.

Design informed by competitive analysis:

Exponential decay (3-minute half-life for AI sessions, not days)
Capped boost multiplier (max 2.0x, prevents runaway dominance)
Tracks both returned symbols and queried files
Thread-safe for concurrent MCP tool calls

func NewSessionTracker ¶ added in v0.2.0

func NewSessionTracker() *SessionTracker

NewSessionTracker creates a tracker for the current session.

func (*SessionTracker) Count ¶ added in v0.2.0

func (st *SessionTracker) Count() int

Count returns the number of unique symbols tracked this session.

func (*SessionTracker) Record ¶ added in v0.2.0

func (st *SessionTracker) Record(hash types.Hash)

Record marks a symbol as accessed at the current time. Call this for every symbol returned in a context result.

func (*SessionTracker) RecordBatch ¶ added in v0.2.0

func (st *SessionTracker) RecordBatch(hashes []types.Hash)

RecordBatch marks multiple symbols as accessed.

func (*SessionTracker) Reset ¶ added in v0.2.0

func (st *SessionTracker) Reset()

Reset clears all session history.

func (*SessionTracker) SessionBoosts ¶ added in v0.2.0

func (st *SessionTracker) SessionBoosts(hashes []types.Hash) map[types.Hash]float64

SessionBoosts returns a boost multiplier for each requested hash based on how recently and frequently it was accessed this session. Values range from 0.0 (never accessed) to maxBoost (frequently/recently accessed). The boost decays exponentially from each access timestamp.

type StoreSearcher ¶ added in v0.12.0

type StoreSearcher interface {
	LoadAndSearchFromStore(ctx stdctx.Context, query string, k int) ([]types.Hash, error)
}

StoreSearcher provides brute-force cosine search from persisted vectors. Optional interface; when the HNSW index is empty (no embedding phase), the gap-fill falls back to this for O(n) search from SQLite-cached vectors.

type SweepParams ¶ added in v0.8.0

type SweepParams struct {
	Alpha       float64 // RWR restart probability (default 0.2)
	MaxIter     int     // RWR iterations (default 20)
	ScoreCutoff float64 // min RWR score threshold (default 0.02)
	MaxSeeds    int     // max RWR seeds (default 15)
	RRFk        float64 // RRF constant (default 60)
	BlastW      float64 // blast radius ranking weight
	ConfW       float64 // confidence ranking weight
	RecencyW    float64 // recency ranking weight
	DistanceW   float64 // distance ranking weight
	TestPenalty float64 // test file penalty multiplier (default 0.3, -1 means use default)
}

SweepParams holds tunable parameters for the retrieval pipeline. Used by the parameter sweep benchmark to test different configurations. Zero values mean "use default".

type TaskMemory ¶ added in v0.2.0

type TaskMemory struct {
	// contains filtered or unexported fields
}

TaskMemory persists which symbols were useful for which tasks, enabling the retrieval pipeline to learn from past agent interactions. Over time, the system develops per-repo vocabulary: "when a developer asks about X, these symbols tend to be what they actually need."

The memory is passive: it records what symbols were returned by context_for_task and later accessed in the session (via SessionTracker). No explicit user action required.

func NewTaskMemory ¶ added in v0.2.0

func NewTaskMemory(db *sql.DB) *TaskMemory

NewTaskMemory creates a task memory backed by the given database. The database must have the task_memory table (migration 008).

func (*TaskMemory) Count ¶ added in v0.2.0

func (tm *TaskMemory) Count(ctx context.Context) int

Count returns the number of stored task-symbol associations.

func (*TaskMemory) Recall ¶ added in v0.2.0

func (tm *TaskMemory) Recall(ctx context.Context, queryKeywords []string) (map[types.Hash]float64, error)

Recall finds symbols that were useful for tasks with similar keywords. Uses keyword overlap: the more query keywords match stored task keywords, the stronger the signal. Returns a map of symbol hash to boost score.

func (*TaskMemory) Record ¶ added in v0.2.0

func (tm *TaskMemory) Record(ctx context.Context, keywords string, symbolHash types.Hash, score float64) error

Record stores a (keywords, symbol) association from a completed task. Call this when a symbol returned by context_for_task was later accessed by the agent (positive signal) or when explicit feedback is given.

func (*TaskMemory) RecordBatch ¶ added in v0.2.0

func (tm *TaskMemory) RecordBatch(ctx context.Context, keywords string, symbolHashes []types.Hash, score float64) error

RecordBatch stores multiple associations at once.

type TaskOptions ¶

type TaskOptions struct {
	TaskDescription string
	TokenBudget     int    // default 50000
	Format          string // "xml", "markdown", "json"
	DBPath          string // path to knowing.db (for CLI usage)
	RepoURL         string // optional: scope search to this repo (filters out cross-repo noise)
}

TaskOptions configures a task-based context query.

type UsedSymbol ¶ added in v0.14.0

type UsedSymbol struct {
	Hash types.Hash
	Name string // short symbol name for vocab association
}

UsedSymbol represents a symbol that was positively attributed (agent used it).

type VectorReRanker ¶ added in v0.10.0

type VectorReRanker interface {
	// ReRank embeds the query and each candidate text, returns indices sorted by
	// descending cosine similarity to the query.
	ReRank(ctx stdctx.Context, query string, candidates []string) ([]int, error)
	// ReRankScores embeds the query and each candidate, returns cosine similarity
	// scores (0.0-1.0) for each candidate at its original index position.
	ReRankScores(ctx stdctx.Context, query string, candidates []string) ([]float64, error)
	// ReRankByHashes re-ranks using cached vectors looked up by node hash.
	// Only embeds the query (1 inference call). Falls back to embedding
	// candidates on cache miss. Returns scores at original index positions.
	ReRankByHashes(ctx stdctx.Context, query string, hashes []types.Hash, fallbackTexts []string) ([]float64, error)
}

VectorReRanker re-ranks candidates by embedding similarity to a query. Optional interface; if the VectorSearcher also implements this, the engine uses it to re-rank RWR output before packing.

type VectorSearcher ¶ added in v0.2.0

type VectorSearcher interface {
	// EmbedAndSearch embeds the query text and returns the k nearest symbol node hashes.
	EmbedAndSearch(ctx stdctx.Context, query string, k int) ([]types.Hash, error)
}

VectorSearcher provides semantic nearest-neighbor search over symbol embeddings.

type VocabProvider ¶ added in v0.14.0

type VocabProvider interface {
	// LearnedVocabTargets returns symbol names associated with any of the given
	// keywords where the association count >= minCount. Returns a map of
	// keyword -> []symbolName for easy consumption.
	LearnedVocabTargets(ctx stdctx.Context, keywords []string, minCount int) (map[string][]string, error)
}

VocabProvider reads learned keyword -> symbol associations.

type VocabProviderWithCounts ¶ added in v0.14.0

type VocabProviderWithCounts interface {
	VocabProvider
	LearnedVocabDetails(ctx stdctx.Context, keywords []string, minCount int, subgraphRoots ...map[types.Hash]types.Hash) (map[string][]struct {
		SymbolName string
		Count      int
	}, error)
}

VocabProviderWithCounts extends VocabProvider with count-aware lookups. When available, allows confidence-weighted scoring based on observation count. Uses anonymous struct to avoid import cycles (store can't import context).

type VocabRecorder ¶ added in v0.14.0

type VocabRecorder interface {
	RecordVocabAssociation(ctx stdctx.Context, keyword string, symbolName string, symbolHash types.Hash, subgraphRoot ...types.Hash) error
}

VocabRecorder writes learned keyword -> symbol associations. The optional subgraphRoot ties the association to the symbol's package state at recording time. When the package changes (Merkle root differs), the association expires automatically.

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL