Documentation
¶
Index ¶
- Constants
- func CheckSummaryThresholds(r *Result, opts Options) []string
- func FormatJSON(r *Result) ([]byte, error)
- func FormatText(r *Result) string
- type AgentMetrics
- type ApproxTokenCounter
- type ArtifactGrade
- type ArtifactReason
- type BaselineMetrics
- type BudgetEvent
- type BudgetReport
- type CaseAgentMetrics
- type CaseFile
- type CaseResult
- type CaseSpec
- type CommandCaseOutput
- type CommandRunner
- type ConceptMissDiagnostic
- type CorpusSlice
- type CorpusSummary
- type Diagnostics
- type ExpectedArtifact
- type ExtensionDiagnostic
- type FalsePositiveDiagnostic
- type FalsePositiveExample
- type File
- type GradeCounts
- type GraphCandidate
- type GraphContext
- type GraphContextGroup
- type GraphDiagnostics
- type GraphSuppression
- type IndexCacheReport
- type LaneCounts
- type LaneMetric
- type MissClassDiagnostic
- type Options
- type ParetoSummary
- type PhaseTelemetry
- type PricingProfile
- type ProfiledTokenCounter
- type Result
- type RoleDiagnostic
- type SuccessCriteria
- type SufficiencyResult
- type Summary
- type TokenBudgetSufficiency
- type TokenCounter
- type TokenizerProfile
- type UnindexedDocumentDiagnostic
Constants ¶
View Source
const ( LaneDocsPlans = "docs_plans" LaneTestCase = "test_case" LaneCodeComment = "code_comment" LaneSourceContextOther = "source_context_other" LanePackedSections = "packed_sections" )
View Source
const ( CanonicalLaneIntent = "intent" CanonicalLaneModel = "model" CanonicalLaneProtocol = "protocol" CanonicalLaneTemplate = "template" CanonicalLaneTrace = "trace" CanonicalLaneSourceContext = "source_context" CanonicalLaneUnknown = "unknown" )
View Source
const ( CorpusSourceFilesystemFixture = "filesystem_fixture" CorpusSourceSQLiteIndex = "sqlite_index" ProductPathLabOnly = "lab_only" ProductPathIndexedHarness = "indexed_harness" ProductPathLiveCLICommand = "live_cli_command" )
Variables ¶
This section is empty.
Functions ¶
func CheckSummaryThresholds ¶
func FormatJSON ¶
func FormatText ¶
Types ¶
type AgentMetrics ¶
type AgentMetrics struct {
MustHitAt1 float64 `json:"must_hit_at_1"`
MustHitAt3 float64 `json:"must_hit_at_3"`
MustHitAt5 float64 `json:"must_hit_at_5"`
MustHitAt10 float64 `json:"must_hit_at_10"`
MeanFirstMustRank float64 `json:"mean_first_must_rank,omitempty"`
MeanFirstUsefulRank float64 `json:"mean_first_useful_rank,omitempty"`
ContextSufficiencyAtTokenBudget []TokenBudgetSufficiency `json:"context_sufficiency_at_token_budget"`
LowPrecisionSufficientCases int `json:"low_precision_sufficient_cases"`
}
type ApproxTokenCounter ¶
type ApproxTokenCounter struct{}
func (ApproxTokenCounter) Count ¶
func (ApproxTokenCounter) Count(text string) int
func (ApproxTokenCounter) Name ¶
func (ApproxTokenCounter) Name() string
func (ApproxTokenCounter) Profile ¶
func (ApproxTokenCounter) Profile() TokenizerProfile
type ArtifactGrade ¶
type ArtifactGrade struct {
Path string `json:"path"`
Lane string `json:"lane"`
CanonicalLane string `json:"canonical_lane,omitempty"`
Grade string `json:"grade"`
Weight float64 `json:"weight"`
Exact bool `json:"exact"`
SameCluster bool `json:"same_cluster,omitempty"`
HardNegative bool `json:"hard_negative,omitempty"`
}
type ArtifactReason ¶
type BaselineMetrics ¶
type BaselineMetrics struct {
Name string `json:"name"`
FileScope string `json:"file_scope"`
IncludesSourceCandidates bool `json:"includes_source_candidates"`
Tokens int `json:"tokens"`
ArtifactCount int `json:"artifact_count"`
Artifacts []string `json:"artifacts"`
RelevantIncluded int `json:"relevant_included"`
IrrelevantCount int `json:"irrelevant_count"`
}
type BudgetEvent ¶
type BudgetReport ¶
type BudgetReport struct {
MaxCorpusFiles int `json:"max_corpus_files,omitempty"`
MaxSourceFiles int `json:"max_source_files,omitempty"`
MaxTestCaseArtifacts int `json:"max_test_case_artifacts,omitempty"`
MaxCodeComments int `json:"max_code_comments,omitempty"`
MaxCaseSeconds int `json:"max_case_seconds,omitempty"`
Applied []BudgetEvent `json:"applied,omitempty"`
}
type CaseAgentMetrics ¶
type CaseAgentMetrics struct {
IncludedArtifacts int `json:"included_artifacts"`
ExactRelevantArtifacts int `json:"exact_relevant_artifacts"`
SameClusterArtifacts int `json:"same_cluster_artifacts"`
HardNegativeArtifacts int `json:"hard_negative_artifacts"`
StrictPrecision float64 `json:"strict_precision"`
GradedPrecision float64 `json:"graded_precision"`
PenalizedUtilityPrecision float64 `json:"penalized_utility_precision"`
FirstMustRank int `json:"first_must_rank,omitempty"`
FirstUsefulRank int `json:"first_useful_rank,omitempty"`
MustHitAt1 bool `json:"must_hit_at_1"`
MustHitAt3 bool `json:"must_hit_at_3"`
MustHitAt5 bool `json:"must_hit_at_5"`
MustHitAt10 bool `json:"must_hit_at_10"`
GradeCounts GradeCounts `json:"grade_counts"`
LaneCounts LaneCounts `json:"lane_counts"`
}
type CaseResult ¶
type CaseResult struct {
ID string `json:"id"`
Query string `json:"query"`
CaseDurationMS int64 `json:"case_duration_ms,omitempty"`
CaseBudgetExceeded bool `json:"case_budget_exceeded,omitempty"`
CaseBudgetSeconds int `json:"case_budget_seconds,omitempty"`
DevSpecsTokens int `json:"devspecs_tokens"`
FullPlanningTokens int `json:"full_planning_tokens"`
AllMarkdownTokens int `json:"all_markdown_tokens"`
FullCandidateCorpusTokens int `json:"full_candidate_corpus_tokens"`
QueryFileBaselineTokens int `json:"query_file_baseline_tokens"`
PreBudgetDevSpecsTokens int `json:"pre_budget_devspecs_tokens,omitempty"`
ContextTokenBudget int `json:"context_token_budget,omitempty"`
ContextBudgetDroppedCount int `json:"context_budget_dropped_count,omitempty"`
ContextBudgetDroppedArtifacts []string `json:"context_budget_dropped_artifacts,omitempty"`
TokenReductionVsFullPlanning float64 `json:"token_reduction_vs_full_planning"`
TokenReductionVsAllMarkdown float64 `json:"token_reduction_vs_all_markdown"`
TokenReductionVsFullCandidate float64 `json:"token_reduction_vs_full_candidate_corpus"`
TokenReductionVsQueryFile float64 `json:"token_reduction_vs_query_file_baseline"`
ExpectedRelevantCount int `json:"expected_relevant_count"`
RelevantRetrieved int `json:"relevant_retrieved"`
ArtifactRecall float64 `json:"artifact_recall"`
MustExpectedCount int `json:"must_expected_count"`
MustRelevantRetrieved int `json:"must_relevant_retrieved"`
MustHaveRecall float64 `json:"must_have_recall"`
HelpfulExpectedCount int `json:"helpful_expected_count"`
HelpfulRelevantRetrieved int `json:"helpful_relevant_retrieved"`
HelpfulRecall float64 `json:"helpful_recall"`
BackgroundExpectedCount int `json:"background_expected_count"`
BackgroundRelevantRetrieved int `json:"background_relevant_retrieved"`
BackgroundRecall float64 `json:"background_recall"`
ArtifactsIncluded []string `json:"artifacts_included"`
ArtifactReasons []ArtifactReason `json:"artifact_reasons"`
PackDiagnostics *retrieval.RoleGroupedPack `json:"pack_diagnostics,omitempty"`
PackSummary *retrieval.PackSummary `json:"pack_summary,omitempty"`
GraphContext *GraphContext `json:"graph_context,omitempty"`
GraphDiagnostics *GraphDiagnostics `json:"graph_diagnostics,omitempty"`
GraphContextArtifacts []string `json:"graph_context_artifacts,omitempty"`
GraphContextArtifactReasons []ArtifactReason `json:"graph_context_artifact_reasons,omitempty"`
GraphContextDevSpecsTokens int `json:"graph_context_devspecs_tokens,omitempty"`
GraphContextRelevantIncluded []string `json:"graph_context_relevant_included,omitempty"`
GraphContextIrrelevantIncluded []string `json:"graph_context_irrelevant_included,omitempty"`
GraphContextArtifactPrecision float64 `json:"graph_context_artifact_precision,omitempty"`
GraphContextAgentMetrics *CaseAgentMetrics `json:"graph_context_agent_metrics,omitempty"`
GraphContextArtifactGrades []ArtifactGrade `json:"graph_context_artifact_grades,omitempty"`
GraphAssistedRelevantIncluded []string `json:"graph_assisted_relevant_included,omitempty"`
RelatedArtifacts []string `json:"related_artifacts,omitempty"`
RelatedArtifactReasons []ArtifactReason `json:"related_artifact_reasons,omitempty"`
RelatedDevSpecsTokens int `json:"related_devspecs_tokens,omitempty"`
RelatedRelevantIncluded []string `json:"related_relevant_included,omitempty"`
RelatedIrrelevantIncluded []string `json:"related_irrelevant_included,omitempty"`
RelatedArtifactPrecision float64 `json:"related_artifact_precision,omitempty"`
RelatedAgentMetrics CaseAgentMetrics `json:"related_agent_metrics,omitempty"`
RelatedArtifactGrades []ArtifactGrade `json:"related_artifact_grades,omitempty"`
CombinedTieredArtifacts []string `json:"combined_tiered_artifacts,omitempty"`
CombinedTieredDevSpecsTokens int `json:"combined_tiered_devspecs_tokens,omitempty"`
CombinedTieredContextSufficiency SufficiencyResult `json:"combined_tiered_context_sufficiency,omitempty"`
PackedSectionArtifacts []string `json:"packed_section_artifacts,omitempty"`
PackedSectionCount int `json:"packed_section_count,omitempty"`
SectionSelectedArtifacts []string `json:"section_selected_artifacts,omitempty"`
SectionSelectedCount int `json:"section_selected_count,omitempty"`
FullFileArtifactCount int `json:"full_file_artifact_count,omitempty"`
TestCaseArtifactCount int `json:"test_case_artifact_count,omitempty"`
CodeCommentArtifactCount int `json:"code_comment_artifact_count,omitempty"`
RelevantIncluded []string `json:"relevant_included"`
IrrelevantIncluded []string `json:"irrelevant_included"`
ArtifactPrecision float64 `json:"artifact_precision"`
MissedExpectedRelevant []string `json:"missed_expected_relevant"`
MissedMustConceptDiagnostics []ConceptMissDiagnostic `json:"missed_must_concept_diagnostics,omitempty"`
PrimaryFalsePositiveDiagnostics []FalsePositiveExample `json:"primary_false_positive_diagnostics,omitempty"`
UnexpectedExcludedHits []string `json:"unexpected_excluded_hits"`
ExpectedAvailableCount int `json:"expected_available_count"`
ExpectedMissingFromCorpus []string `json:"expected_missing_from_corpus,omitempty"`
MissedAfterDiscovery []string `json:"missed_after_discovery,omitempty"`
DiscoveryCoverage float64 `json:"discovery_coverage"`
RetrievalCoverageOfDiscovered float64 `json:"retrieval_coverage_of_discovered"`
ContextSufficiency SufficiencyResult `json:"context_sufficiency"`
AgentMetrics CaseAgentMetrics `json:"agent_metrics"`
ArtifactGrades []ArtifactGrade `json:"artifact_grades,omitempty"`
Baselines []BaselineMetrics `json:"baselines"`
ThresholdFailures []string `json:"threshold_failures,omitempty"`
}
type CaseSpec ¶
type CaseSpec struct {
ID string `yaml:"id" json:"id"`
Query string `yaml:"query" json:"query"`
ExpectedRelevant []ExpectedArtifact `yaml:"expected_relevant" json:"expected_relevant"`
ExpectedExcluded []string `yaml:"expected_excluded" json:"expected_excluded"`
ExpectedStatus map[string]string `yaml:"expected_status" json:"expected_status,omitempty"`
SuccessCriteria SuccessCriteria `yaml:"success_criteria" json:"success_criteria,omitempty"`
}
type CommandCaseOutput ¶
type CommandCaseOutput struct {
Artifacts []retrieval.Candidate
Context string
ArtifactReasons []ArtifactReason
GraphContext *GraphContext
GraphDiagnostics *GraphDiagnostics
GraphContextArtifacts []retrieval.Candidate
GraphContextArtifactReasons []ArtifactReason
}
type CommandRunner ¶
type CommandRunner func(fixtureAbs string, cases []CaseSpec) (map[string]CommandCaseOutput, error)
type ConceptMissDiagnostic ¶
type ConceptMissDiagnostic struct {
ExpectedPath string `json:"expected_path"`
InCandidatePool bool `json:"in_candidate_pool"`
ConceptRank int `json:"concept_rank,omitempty"`
ConceptScore float64 `json:"concept_score,omitempty"`
MatchedCompacts []string `json:"matched_compacts,omitempty"`
MatchedPhrases []string `json:"matched_phrases,omitempty"`
MatchedPathTerms []string `json:"matched_path_terms,omitempty"`
GlossaryMatches []string `json:"glossary_matches,omitempty"`
GlossaryEvidence []string `json:"glossary_evidence,omitempty"`
}
type CorpusSlice ¶
type CorpusSummary ¶
type CorpusSummary struct {
PlanningArtifacts CorpusSlice `json:"planning_artifacts"`
MarkdownFiles CorpusSlice `json:"markdown_files"`
SourceContextCandidates CorpusSlice `json:"source_context_candidates"`
FullCandidateCorpus CorpusSlice `json:"full_candidate_corpus"`
}
type Diagnostics ¶
type Diagnostics struct {
ExpectedRelevantCount int `json:"expected_relevant_count"`
ExpectedAvailableCount int `json:"expected_available_count"`
ExpectedMissingFromCorpusCount int `json:"expected_missing_from_corpus_count"`
MissedAfterDiscoveryCount int `json:"missed_after_discovery_count"`
DiscoveryCoverage float64 `json:"discovery_coverage"`
RetrievalCoverageOfDiscovered float64 `json:"retrieval_coverage_of_discovered"`
ExpectedMissingFromCorpus []string `json:"expected_missing_from_corpus,omitempty"`
MissedAfterDiscovery []string `json:"missed_after_discovery,omitempty"`
RoleSummaries []RoleDiagnostic `json:"role_summaries,omitempty"`
MissClassSummaries []MissClassDiagnostic `json:"miss_class_summaries,omitempty"`
FalsePositiveSummaries []FalsePositiveDiagnostic `json:"false_positive_summaries,omitempty"`
ExtensionSummaries []ExtensionDiagnostic `json:"extension_summaries,omitempty"`
UnindexedDocumentSummaries []UnindexedDocumentDiagnostic `json:"unindexed_document_summaries,omitempty"`
OpenSpec *openspecmetrics.Metrics `json:"openspec,omitempty"`
}
type ExpectedArtifact ¶
type ExpectedArtifact struct {
Path string `yaml:"path" json:"path"`
Importance string `yaml:"importance" json:"importance"`
}
func (*ExpectedArtifact) UnmarshalYAML ¶
func (a *ExpectedArtifact) UnmarshalYAML(value *yaml.Node) error
type ExtensionDiagnostic ¶
type ExtensionDiagnostic struct {
Extension string `json:"extension"`
Role string `json:"role"`
Expected int `json:"expected"`
ExactRetrieved int `json:"exact_retrieved"`
MissingFromCorpus int `json:"missing_from_corpus"`
MissedAfterDiscovery int `json:"missed_after_discovery"`
PrimaryFalsePositive int `json:"primary_false_positive"`
PrimaryFalsePositiveGrades GradeCounts `json:"primary_false_positive_grades,omitempty"`
Examples []string `json:"examples,omitempty"`
}
type FalsePositiveDiagnostic ¶
type FalsePositiveDiagnostic struct {
Class string `json:"class"`
QueryType string `json:"query_type"`
Lane string `json:"lane"`
Role string `json:"role"`
ReasonClass string `json:"reason_class"`
GradeCounts GradeCounts `json:"grade_counts"`
Count int `json:"count"`
Examples []FalsePositiveExample `json:"examples,omitempty"`
}
type FalsePositiveExample ¶
type FalsePositiveExample struct {
CaseID string `json:"case_id"`
QueryType string `json:"query_type"`
Path string `json:"path"`
Position int `json:"position"`
Lane string `json:"lane"`
Role string `json:"role"`
Grade string `json:"grade"`
Weight float64 `json:"weight"`
ReasonClass string `json:"reason_class"`
Reasons []string `json:"reasons,omitempty"`
}
type GradeCounts ¶
type GraphCandidate ¶
type GraphCandidate struct {
ID string `json:"id,omitempty"`
ShortID string `json:"short_id,omitempty"`
Path string `json:"path,omitempty"`
SourcePath string `json:"source_path,omitempty"`
Kind string `json:"kind,omitempty"`
Subtype string `json:"subtype,omitempty"`
Title string `json:"title,omitempty"`
Role string `json:"role,omitempty"`
RoleReason string `json:"role_reason,omitempty"`
SeedPath string `json:"seed_path,omitempty"`
AdmissionEdgeType string `json:"admission_edge_type,omitempty"`
Confidence float64 `json:"confidence,omitempty"`
Weight float64 `json:"weight,omitempty"`
SourceSignal string `json:"source_signal,omitempty"`
CompanionDerived bool `json:"companion_derived,omitempty"`
Receipt string `json:"receipt,omitempty"`
SupportReceipts []string `json:"support_receipts,omitempty"`
}
type GraphContext ¶
type GraphContext struct {
Mode string `json:"mode"`
EvidenceMode string `json:"evidence_mode,omitempty"`
Title string `json:"title,omitempty"`
CandidateCount int `json:"candidate_count"`
SuppressedCount int `json:"suppressed_count,omitempty"`
Counts map[string]int `json:"counts,omitempty"`
Groups []GraphContextGroup `json:"groups,omitempty"`
Notes []string `json:"notes,omitempty"`
}
type GraphContextGroup ¶
type GraphContextGroup struct {
Role string `json:"role"`
Title string `json:"title"`
Items []GraphCandidate `json:"items"`
}
type GraphDiagnostics ¶
type GraphDiagnostics struct {
Mode string `json:"mode"`
SeedCount int `json:"seed_count"`
CandidateCount int `json:"candidate_count"`
SuppressedCount int `json:"suppressed_count,omitempty"`
Counts map[string]int `json:"counts,omitempty"`
Candidates []GraphCandidate `json:"candidates,omitempty"`
Suppressed []GraphSuppression `json:"suppressed,omitempty"`
Notes []string `json:"notes,omitempty"`
}
type GraphSuppression ¶
type IndexCacheReport ¶
type IndexCacheReport struct {
Enabled bool `json:"enabled"`
Hit bool `json:"hit"`
Key string `json:"key,omitempty"`
Path string `json:"path,omitempty"`
SchemaVersion int `json:"schema_version,omitempty"`
Reason string `json:"reason,omitempty"`
CorpusFingerprint string `json:"corpus_fingerprint,omitempty"`
ProvenanceFingerprint string `json:"provenance_fingerprint,omitempty"`
}
type LaneCounts ¶
type LaneMetric ¶
type LaneMetric struct {
Lane string `json:"lane"`
Cases int `json:"cases"`
CasesWithIncluded int `json:"cases_with_included"`
CasesWithExpected int `json:"cases_with_expected"`
IncludedArtifacts int `json:"included_artifacts"`
ExactRelevantArtifacts int `json:"exact_relevant_artifacts"`
SameClusterArtifacts int `json:"same_cluster_artifacts"`
HardNegativeArtifacts int `json:"hard_negative_artifacts"`
ExpectedArtifacts int `json:"expected_artifacts"`
GradedRelevanceWeight float64 `json:"graded_relevance_weight"`
StrictPrecision float64 `json:"strict_precision,omitempty"`
GradedPrecision float64 `json:"graded_precision,omitempty"`
Recall float64 `json:"recall,omitempty"`
PackedSectionCount int `json:"packed_section_count,omitempty"`
}
type MissClassDiagnostic ¶
type Options ¶
type Options struct {
JSON bool
MinRecall *float64
MinMeanRecall *float64
MinMustRecall *float64
MinSufficiency *float64
MinReductionFull *float64
CorpusSource string
CommandUnderTest string
FindRuntime string
CommandRunner CommandRunner
TokenCounter TokenCounter
Retriever retrieval.Retriever
TestCaseArtifacts bool
CodeCommentArtifacts bool
DisableSectionAwareRetrieval bool
ExperimentalBalancedEvidence bool
ExperimentalBudgetedPacking bool
ExperimentalConceptBackfill bool
ExperimentalGlossaryConcepts bool
ExperimentalTieredConceptOutput bool
ExperimentalAnchorFirstRanking bool
ExperimentalAnchorFirstMode string
ExperimentalSupportDocs bool
PackDiagnostics bool
GraphDiagnostics bool
ContextTokenBudget int
IndexCacheDir string
RefreshIndexCache bool
MaxCorpusFiles int
MaxSourceFiles int
MaxTestCaseArtifacts int
MaxCodeComments int
MaxCaseSeconds int
ProgressWriter io.Writer
ProgressInterval time.Duration
}
type ParetoSummary ¶
type ParetoSummary struct {
MeanTokenReductionVsFullPlanning float64 `json:"mean_token_reduction_vs_full_planning"`
MeanTokenReductionVsQueryFileBaseline float64 `json:"mean_token_reduction_vs_query_file_baseline"`
MeanArtifactRecall float64 `json:"mean_artifact_recall"`
MeanMustHaveRecall float64 `json:"mean_must_have_recall"`
MeanArtifactPrecision float64 `json:"mean_artifact_precision"`
MeanGradedPrecision float64 `json:"mean_graded_precision"`
MeanPenalizedUtilityPrecision float64 `json:"mean_penalized_utility_precision"`
ContextSufficiencyPassRate float64 `json:"context_sufficiency_pass_rate"`
}
type PhaseTelemetry ¶
type PricingProfile ¶
type ProfiledTokenCounter ¶
type ProfiledTokenCounter interface {
TokenCounter
Profile() TokenizerProfile
}
type Result ¶
type Result struct {
Fixture string `json:"fixture"`
FixtureVersion string `json:"fixture_version"`
EvalStage string `json:"eval_stage"`
CorpusSource string `json:"corpus_source"`
ProductPath string `json:"product_path"`
CommandUnderTest string `json:"command_under_test,omitempty"`
FindRuntime string `json:"find_runtime,omitempty"`
Retriever string `json:"retriever"`
TokenCounter string `json:"token_counter"`
TokenizerProfile TokenizerProfile `json:"tokenizer_profile"`
PricingProfile PricingProfile `json:"pricing_profile"`
ResultsFile string `json:"results_file,omitempty"`
Corpus CorpusSummary `json:"corpus"`
Summary Summary `json:"summary"`
Diagnostics Diagnostics `json:"diagnostics"`
AgentMetrics AgentMetrics `json:"agent_metrics"`
LaneMetrics []LaneMetric `json:"lane_metrics"`
CanonicalLanes []LaneMetric `json:"canonical_lane_metrics,omitempty"`
MetricNotes map[string]string `json:"metric_notes,omitempty"`
PhaseTelemetry []PhaseTelemetry `json:"phase_telemetry,omitempty"`
IndexCache *IndexCacheReport `json:"index_cache,omitempty"`
Budgets BudgetReport `json:"budgets,omitempty"`
Cases []CaseResult `json:"cases"`
}
type RoleDiagnostic ¶
type RoleDiagnostic struct {
Role string `json:"role"`
Expected int `json:"expected"`
ExpectedAvailable int `json:"expected_available"`
Retrieved int `json:"retrieved"`
IrrelevantRetrieved int `json:"irrelevant_retrieved"`
MissingFromCorpus int `json:"missing_from_corpus"`
MissedAfterDiscovery int `json:"missed_after_discovery"`
DiscoveryCoverage float64 `json:"discovery_coverage"`
RetrievalCoverageOfDiscovered float64 `json:"retrieval_coverage_of_discovered"`
}
type SuccessCriteria ¶
type SuccessCriteria struct {
MustContainTerms []string `yaml:"must_contain_terms" json:"must_contain_terms,omitempty"`
MustContainArtifacts []string `yaml:"must_contain_artifacts" json:"must_contain_artifacts,omitempty"`
MustNotContainTerms []string `yaml:"must_not_contain_terms" json:"must_not_contain_terms,omitempty"`
MustNotContainArtifacts []string `yaml:"must_not_contain_artifacts" json:"must_not_contain_artifacts,omitempty"`
LegacyMustNotContain []string `yaml:"must_not_contain" json:"must_not_contain,omitempty"`
}
func (SuccessCriteria) Configured ¶
func (c SuccessCriteria) Configured() bool
type SufficiencyResult ¶
type SufficiencyResult struct {
Configured bool `json:"configured"`
Passed bool `json:"passed"`
MissingTerms []string `json:"missing_terms"`
MissingArtifacts []string `json:"missing_artifacts"`
ForbiddenTermsPresent []string `json:"forbidden_terms_present"`
ForbiddenArtifactsPresent []string `json:"forbidden_artifacts_present"`
Failures []string `json:"failures"`
}
type Summary ¶
type Summary struct {
Cases int `json:"cases"`
MedianTokenReductionVsFullPlanning float64 `json:"median_token_reduction_vs_full_planning"`
MeanTokenReductionVsFullPlanning float64 `json:"mean_token_reduction_vs_full_planning"`
MedianTokenReductionVsQueryFileBaseline float64 `json:"median_token_reduction_vs_query_file_baseline"`
MeanTokenReductionVsQueryFileBaseline float64 `json:"mean_token_reduction_vs_query_file_baseline"`
MeanArtifactRecall float64 `json:"mean_artifact_recall"`
MeanMustHaveRecall float64 `json:"mean_must_have_recall"`
MeanHelpfulRecall float64 `json:"mean_helpful_recall"`
MeanBackgroundRecall float64 `json:"mean_background_recall"`
MeanArtifactPrecision float64 `json:"mean_artifact_precision"`
MeanGradedPrecision float64 `json:"mean_graded_precision"`
MeanPenalizedUtilityPrecision float64 `json:"mean_penalized_utility_precision"`
GradeCounts GradeCounts `json:"grade_counts"`
RelatedCases int `json:"related_cases,omitempty"`
RelatedArtifactCount int `json:"related_artifact_count,omitempty"`
RelatedRelevantCount int `json:"related_relevant_count,omitempty"`
MeanRelatedArtifactPrecision float64 `json:"mean_related_artifact_precision,omitempty"`
MeanRelatedGradedPrecision float64 `json:"mean_related_graded_precision,omitempty"`
RelatedGradeCounts GradeCounts `json:"related_grade_counts,omitempty"`
GraphContextCases int `json:"graph_context_cases,omitempty"`
GraphContextArtifactCount int `json:"graph_context_artifact_count,omitempty"`
GraphContextRelevantCount int `json:"graph_context_relevant_count,omitempty"`
GraphAssistedRelevantCount int `json:"graph_assisted_relevant_count,omitempty"`
MeanGraphContextArtifactPrecision float64 `json:"mean_graph_context_artifact_precision,omitempty"`
MeanGraphContextGradedPrecision float64 `json:"mean_graph_context_graded_precision,omitempty"`
GraphContextGradeCounts GradeCounts `json:"graph_context_grade_counts,omitempty"`
PackDiagnosticCases int `json:"pack_diagnostic_cases,omitempty"`
PackIncludedArtifactCount int `json:"pack_included_artifact_count,omitempty"`
PackExcludedNoiseCount int `json:"pack_excluded_noise_count,omitempty"`
MeanPackIncludedArtifacts float64 `json:"mean_pack_included_artifacts,omitempty"`
MeanPackRoleDiversity float64 `json:"mean_pack_role_diversity,omitempty"`
PackCasesWithBackgroundDecisions int `json:"pack_cases_with_background_decisions,omitempty"`
PackCasesWithImplementation int `json:"pack_cases_with_implementation_surface,omitempty"`
PackCasesWithBehaviorTests int `json:"pack_cases_with_behavior_tests,omitempty"`
PackCasesWithConfigSchema int `json:"pack_cases_with_config_schema,omitempty"`
PackCasesWithOpenWork int `json:"pack_cases_with_open_work,omitempty"`
PackCasesWithSupportingContext int `json:"pack_cases_with_supporting_context,omitempty"`
PackCasesWithExcludedNoise int `json:"pack_cases_with_excluded_noise,omitempty"`
CombinedTieredContextSufficiencyCases int `json:"combined_tiered_context_sufficiency_cases,omitempty"`
CombinedTieredContextSufficiencyPassed int `json:"combined_tiered_context_sufficiency_passed,omitempty"`
CombinedTieredContextSufficiencyPassRate float64 `json:"combined_tiered_context_sufficiency_pass_rate,omitempty"`
ContextSufficiencyCases int `json:"context_sufficiency_cases"`
ContextSufficiencyPassed int `json:"context_sufficiency_passed"`
ContextSufficiencyPassRate float64 `json:"context_sufficiency_pass_rate"`
AgentMetrics AgentMetrics `json:"agent_metrics"`
Pareto ParetoSummary `json:"pareto"`
WorstRecallCase string `json:"worst_recall_case"`
LargestTokenContextCase string `json:"largest_token_context_case"`
FailedThresholdCount int `json:"failed_threshold_count,omitempty"`
}
type TokenBudgetSufficiency ¶
type TokenCounter ¶
type TokenizerProfile ¶
type TokenizerProfile struct {
Name string `json:"name"`
Provider string `json:"provider"`
Model string `json:"model,omitempty"`
Approximation string `json:"approximation,omitempty"`
Pricing PricingProfile `json:"pricing,omitempty"`
}
Click to show internal directories.
Click to hide internal directories.