Documentation
¶
Index ¶
- Variables
- type Comparator
- type ComparisonReport
- type CoordinatedRunner
- func (r *CoordinatedRunner) PreviewExecution(exp *Experiment) *parallel.ExecutionPreview
- func (r *CoordinatedRunner) RunCoordinated(ctx context.Context, exp *Experiment, targetBranch string) (*parallel.ExecutionReport, error)
- func (r *CoordinatedRunner) SetConflictHandler(fn func(parallel.ConflictEvent))
- func (r *CoordinatedRunner) SetMergeHandler(fn func(parallel.MergeEvent))
- func (r *CoordinatedRunner) SetPartitionHandler(fn func(parallel.PartitionEvent))
- type CriterionEvaluation
- type CriterionType
- type Dependencies
- type Experiment
- type ExperimentStatus
- type Ranking
- type ReplayConfig
- type Replayer
- type Reporter
- type Run
- type RunMetrics
- type RunStatus
- type Runner
- type RunnerConfig
- type Store
- func (s *Store) CreateExperiment(exp *Experiment) error
- func (s *Store) FindExperimentByName(name string) (*Experiment, error)
- func (s *Store) GetExperiment(id string) (*Experiment, error)
- func (s *Store) GetRun(runID string) (*Run, error)
- func (s *Store) ListEvaluationsByExperiment(experimentID string) (map[string][]CriterionEvaluation, error)
- func (s *Store) ListExperiments(limit int, status ExperimentStatus) ([]Experiment, error)
- func (s *Store) ListRuns(experimentID string) ([]Run, error)
- func (s *Store) ReplaceEvaluations(runID string, evals []CriterionEvaluation) error
- func (s *Store) SaveRun(run *Run) error
- func (s *Store) UpdateExperimentStatus(id string, status ExperimentStatus, completedAt *time.Time) error
- type SuccessCriterion
- type Task
- type TerminalReporter
- type Variant
- type VariantReport
Constants ¶
This section is empty.
Variables ¶
ErrStoreUnavailable indicates the experiment store is not configured.
Functions ¶
This section is empty.
Types ¶
type Comparator ¶
type Comparator struct {
// contains filtered or unexported fields
}
Comparator analyzes experiment results and computes rankings.
func NewComparator ¶
func NewComparator(store *Store) *Comparator
NewComparator constructs a comparator for experiment results.
func (*Comparator) Compare ¶
func (c *Comparator) Compare(exp *Experiment) (*ComparisonReport, error)
Compare loads runs + evaluations and produces a comparison report.
type ComparisonReport ¶
type ComparisonReport struct {
ExperimentID string
Variants []VariantReport
Rankings []Ranking
Summary string
}
ComparisonReport summarizes results for an experiment.
type CoordinatedRunner ¶
type CoordinatedRunner struct {
*Runner
// contains filtered or unexported fields
}
CoordinatedRunner wraps Runner with conflict-aware scheduling.
func NewCoordinatedRunner ¶
func NewCoordinatedRunner(cfg RunnerConfig, deps Dependencies, repoPath string) (*CoordinatedRunner, error)
NewCoordinatedRunner creates a runner with conflict-aware coordination.
func (*CoordinatedRunner) PreviewExecution ¶
func (r *CoordinatedRunner) PreviewExecution(exp *Experiment) *parallel.ExecutionPreview
PreviewExecution returns a preview of how tasks would be scheduled.
func (*CoordinatedRunner) RunCoordinated ¶
func (r *CoordinatedRunner) RunCoordinated(ctx context.Context, exp *Experiment, targetBranch string) (*parallel.ExecutionReport, error)
RunCoordinated executes the experiment with conflict-aware scheduling. Tasks with overlapping file scopes are automatically serialized into waves.
func (*CoordinatedRunner) SetConflictHandler ¶
func (r *CoordinatedRunner) SetConflictHandler(fn func(parallel.ConflictEvent))
SetConflictHandler sets a callback for conflict events.
func (*CoordinatedRunner) SetMergeHandler ¶
func (r *CoordinatedRunner) SetMergeHandler(fn func(parallel.MergeEvent))
SetMergeHandler sets a callback for merge events.
func (*CoordinatedRunner) SetPartitionHandler ¶
func (r *CoordinatedRunner) SetPartitionHandler(fn func(parallel.PartitionEvent))
SetPartitionHandler sets a callback for partition events.
type CriterionEvaluation ¶
type CriterionEvaluation struct {
ID int64
RunID string
CriterionID int64
Passed bool
Score float64
Details string
EvaluatedAt time.Time
}
CriterionEvaluation records evaluation results for a run.
func EvaluateCriteria ¶
func EvaluateCriteria(ctx context.Context, worktreePath string, workingDir string, output string, criteria []SuccessCriterion) []CriterionEvaluation
EvaluateCriteria evaluates success criteria for a run and returns evaluations.
type CriterionType ¶
type CriterionType string
CriterionType defines supported evaluation types.
const ( CriterionTestPass CriterionType = "test_pass" CriterionFileExists CriterionType = "file_exists" CriterionContains CriterionType = "contains" CriterionCommand CriterionType = "command" CriterionManual CriterionType = "manual" )
type Dependencies ¶
type Dependencies struct {
Config *config.Config
ModelManager *model.Manager
ProjectContext *projectcontext.ProjectContext
Telemetry *telemetry.Hub
Notify *notify.Manager
Worktree parallel.WorktreeManager
Store *Store
}
Dependencies bundles the shared dependencies for the runner.
type Experiment ¶
type Experiment struct {
ID string
Name string
Description string
Hypothesis string
Task Task
Variants []Variant
Criteria []SuccessCriterion
Status ExperimentStatus
CreatedAt time.Time
CompletedAt *time.Time
}
Experiment groups variants for a single comparison run.
type ExperimentStatus ¶
type ExperimentStatus string
ExperimentStatus captures lifecycle state for an experiment.
const ( ExperimentPending ExperimentStatus = "pending" ExperimentRunning ExperimentStatus = "running" ExperimentCompleted ExperimentStatus = "completed" ExperimentFailed ExperimentStatus = "failed" ExperimentCancelled ExperimentStatus = "cancelled" )
type ReplayConfig ¶
type ReplayConfig struct {
SourceSessionID string
NewModelID string
NewProviderID string
NewSystemPrompt *string
NewTemperature *float64
DeterministicTools bool
}
ReplayConfig specifies how to replay a session.
type Replayer ¶
type Replayer struct {
// contains filtered or unexported fields
}
Replayer replays a stored session with new configuration.
func NewReplayer ¶
NewReplayer constructs a replayer.
type Reporter ¶
type Reporter struct {
// contains filtered or unexported fields
}
Reporter formats experiment results for humans.
func NewReporterWithComparator ¶
func NewReporterWithComparator(comparator *Comparator) *Reporter
NewReporterWithComparator creates a reporter that can compare stored runs.
func (*Reporter) ComparisonMarkdown ¶
func (r *Reporter) ComparisonMarkdown(exp *Experiment) (string, error)
ComparisonMarkdown renders a markdown report from persisted experiment runs.
func (*Reporter) MarkdownTable ¶
func (r *Reporter) MarkdownTable(exp *Experiment, results []*parallel.AgentResult) string
MarkdownTable renders a markdown summary table for the experiment results.
type Run ¶
type Run struct {
ID string
ExperimentID string
VariantID string
SessionID string
Branch string
Status RunStatus
Output string
Files []string
Metrics RunMetrics
Error *string
StartedAt time.Time
CompletedAt *time.Time
}
Run captures a single execution of a variant.
type RunMetrics ¶
type RunMetrics struct {
DurationMs int64
PromptTokens int
CompletionTokens int
TotalCost float64
ToolCalls int
ToolSuccesses int
ToolFailures int
FilesModified int
LinesChanged int
}
RunMetrics captures measurable outcomes.
type Runner ¶
type Runner struct {
// contains filtered or unexported fields
}
Runner executes experiments across multiple variants.
func NewRunner ¶
func NewRunner(cfg RunnerConfig, deps Dependencies) (*Runner, error)
NewRunner constructs a runner with the required dependencies.
func (*Runner) RunExperiment ¶
func (r *Runner) RunExperiment(ctx context.Context, exp *Experiment) ([]*parallel.AgentResult, error)
RunExperiment executes all variants and returns their results.
type RunnerConfig ¶
RunnerConfig controls experiment execution behavior.
type Store ¶
type Store struct {
// contains filtered or unexported fields
}
Store manages experiment persistence.
func NewStoreFromStorage ¶
NewStoreFromStorage constructs an experiment store from the main storage store.
func (*Store) CreateExperiment ¶
func (s *Store) CreateExperiment(exp *Experiment) error
CreateExperiment persists a new experiment along with variants and criteria.
func (*Store) FindExperimentByName ¶
func (s *Store) FindExperimentByName(name string) (*Experiment, error)
FindExperimentByName loads the most recent experiment with the given name.
func (*Store) GetExperiment ¶
func (s *Store) GetExperiment(id string) (*Experiment, error)
GetExperiment loads a single experiment with variants and criteria.
func (*Store) ListEvaluationsByExperiment ¶
func (s *Store) ListEvaluationsByExperiment(experimentID string) (map[string][]CriterionEvaluation, error)
ListEvaluationsByExperiment returns evaluations keyed by run ID.
func (*Store) ListExperiments ¶
func (s *Store) ListExperiments(limit int, status ExperimentStatus) ([]Experiment, error)
ListExperiments returns recent experiments, optionally filtered by status.
func (*Store) ReplaceEvaluations ¶
func (s *Store) ReplaceEvaluations(runID string, evals []CriterionEvaluation) error
ReplaceEvaluations overwrites evaluations for a run.
func (*Store) UpdateExperimentStatus ¶
func (s *Store) UpdateExperimentStatus(id string, status ExperimentStatus, completedAt *time.Time) error
UpdateExperimentStatus updates experiment status and completion timestamp.
type SuccessCriterion ¶
type SuccessCriterion struct {
ID int64
Name string
Type CriterionType
Target string
Weight float64
}
SuccessCriterion defines how to evaluate a run.
type Task ¶
type Task struct {
Prompt string
Context map[string]string
WorkingDir string
Timeout time.Duration
Files []string // Explicit file paths for scope conflict detection
Scope []string // Glob patterns for scope conflict detection (e.g., "pkg/auth/...")
}
Task describes what each variant should execute.
type TerminalReporter ¶
type TerminalReporter struct {
// contains filtered or unexported fields
}
TerminalReporter renders experiment results with colors and charts.
func NewTerminalReporter ¶
func NewTerminalReporter(comparator *Comparator) *TerminalReporter
NewTerminalReporter creates a reporter for terminal output.
func NewTerminalReporterWithOutput ¶
func NewTerminalReporterWithOutput(out io.Writer, comparator *Comparator) *TerminalReporter
NewTerminalReporterWithOutput creates a reporter with custom output.
func (*TerminalReporter) RenderCompact ¶
func (r *TerminalReporter) RenderCompact(exp *Experiment) error
RenderCompact renders a compact one-line summary per variant.
func (*TerminalReporter) RenderReport ¶
func (r *TerminalReporter) RenderReport(exp *Experiment) error
RenderReport renders a full experiment report with charts.
func (*TerminalReporter) SetNoColor ¶
func (r *TerminalReporter) SetNoColor(noColor bool)
SetNoColor disables color output.
type Variant ¶
type Variant struct {
ID string
Name string
ModelID string
ProviderID string
SystemPrompt *string
Temperature *float64
MaxTokens *int
ToolsAllowed []string
CustomConfig map[string]any
Files []string // Override task-level file scope for this variant
Scope []string // Override task-level glob scope for this variant
}
Variant describes a model configuration to test.
type VariantReport ¶
type VariantReport struct {
VariantID string
VariantName string
ModelID string
Status RunStatus
Metrics RunMetrics
CriteriaScore float64
CriteriaPassed []string
CriteriaFailed []string
OutputPreview string
Error string
}
VariantReport captures metrics and criteria results per variant.