Documentation
¶
Index ¶
- Variables
- func CORSMiddleware(next http.Handler, allowedOrigins ...string) http.Handler
- func RegisterRoutes(mux *http.ServeMux, store RunStore)
- func RegisterRoutesWithStorage(mux *http.ServeMux, store RunStore, cfg *StorageConfig)
- type ConfidenceIntervalResponse
- type ErrorResponse
- type FileStore
- type GraderResult
- type Handlers
- func (h *Handlers) HandleHealth(w http.ResponseWriter, _ *http.Request)
- func (h *Handlers) HandleRunDetail(w http.ResponseWriter, r *http.Request)
- func (h *Handlers) HandleRuns(w http.ResponseWriter, r *http.Request)
- func (h *Handlers) HandleStorageStatus(w http.ResponseWriter, _ *http.Request)
- func (h *Handlers) HandleSummary(w http.ResponseWriter, _ *http.Request)
- type HealthResponse
- type RunDetail
- type RunStore
- type RunSummary
- type SessionDigestResponse
- type StorageAdapter
- type StorageConfig
- type StorageStatusResponse
- type SummaryResponse
- type TaskResult
- type TranscriptEventResponse
Constants ¶
This section is empty.
Variables ¶
var ErrRunNotFound = errors.New("run not found")
ErrRunNotFound is returned when a run ID does not match any stored run.
var Version = "0.4.0-alpha.1"
Version is set at build time or defaults to dev.
Functions ¶
func CORSMiddleware ¶
CORSMiddleware wraps a handler with CORS headers. If allowedOrigins is empty, no CORS header is set (same-origin only). Otherwise, the request Origin is checked against the allowed list.
func RegisterRoutes ¶
RegisterRoutes registers all web API routes on the given mux.
func RegisterRoutesWithStorage ¶
func RegisterRoutesWithStorage(mux *http.ServeMux, store RunStore, cfg *StorageConfig)
RegisterRoutesWithStorage registers all web API routes with storage config.
Types ¶
type ConfidenceIntervalResponse ¶
type ConfidenceIntervalResponse struct {
Lower float64 `json:"lower"`
Upper float64 `json:"upper"`
Mean float64 `json:"mean"`
ConfidenceLevel float64 `json:"confidenceLevel"`
}
ConfidenceIntervalResponse is the API representation of a bootstrap CI.
type ErrorResponse ¶
ErrorResponse is returned for errors.
type FileStore ¶
type FileStore struct {
// contains filtered or unexported fields
}
FileStore reads EvaluationOutcome JSON files from a directory.
func NewFileStore ¶
NewFileStore creates a FileStore that reads results from dir.
func (*FileStore) ListRuns ¶
func (fs *FileStore) ListRuns(sortField, order string) ([]RunSummary, error)
ListRuns returns all runs sorted by the given field and order.
func (*FileStore) Summary ¶
func (fs *FileStore) Summary() (*SummaryResponse, error)
Summary returns aggregate metrics across all runs.
type GraderResult ¶
type GraderResult struct {
Name string `json:"name"`
Type string `json:"type"`
Passed bool `json:"passed"`
Score float64 `json:"score"`
Weight float64 `json:"weight"`
Message string `json:"message"`
}
GraderResult is a single grader/validator result.
type Handlers ¶
type Handlers struct {
// contains filtered or unexported fields
}
Handlers holds the HTTP handler methods for the web API.
func NewHandlers ¶
NewHandlers creates a new Handlers with the given store.
func NewHandlersWithStorage ¶
func NewHandlersWithStorage(store RunStore, cfg *StorageConfig) *Handlers
NewHandlersWithStorage creates a new Handlers with storage configuration.
func (*Handlers) HandleHealth ¶
func (h *Handlers) HandleHealth(w http.ResponseWriter, _ *http.Request)
HandleHealth returns a simple health check response.
func (*Handlers) HandleRunDetail ¶
func (h *Handlers) HandleRunDetail(w http.ResponseWriter, r *http.Request)
HandleRunDetail returns full run detail with per-task results.
func (*Handlers) HandleRuns ¶
func (h *Handlers) HandleRuns(w http.ResponseWriter, r *http.Request)
HandleRuns returns a list of all runs, with optional sort/order query params.
func (*Handlers) HandleStorageStatus ¶
func (h *Handlers) HandleStorageStatus(w http.ResponseWriter, _ *http.Request)
HandleStorageStatus returns the current storage configuration status.
func (*Handlers) HandleSummary ¶
func (h *Handlers) HandleSummary(w http.ResponseWriter, _ *http.Request)
HandleSummary returns aggregate KPI metrics across all runs.
type HealthResponse ¶
HealthResponse is the health check response.
type RunDetail ¶
type RunDetail struct {
RunSummary
Tasks []TaskResult `json:"tasks"`
}
RunDetail is the API response for a single run with per-task results.
type RunStore ¶
type RunStore interface {
// ListRuns returns all runs, sorted by the given field and order.
ListRuns(sortField, order string) ([]RunSummary, error)
// GetRun returns a single run with full task details.
GetRun(id string) (*RunDetail, error)
// Summary returns aggregate metrics across all runs.
Summary() (*SummaryResponse, error)
}
RunStore provides access to evaluation run data.
type RunSummary ¶
type RunSummary struct {
ID string `json:"id"`
Spec string `json:"spec"`
Model string `json:"model"`
JudgeModel string `json:"judgeModel,omitempty"`
Outcome string `json:"outcome"`
PassCount int `json:"passCount"`
TaskCount int `json:"taskCount"`
Tokens int `json:"tokens"`
Cost float64 `json:"cost"`
Duration float64 `json:"duration"`
Timestamp time.Time `json:"timestamp"`
Source string `json:"source,omitempty"` // "local" or "azure-blob"
}
RunSummary is the API response for a single run in the list.
type SessionDigestResponse ¶
type SessionDigestResponse struct {
TotalTurns int `json:"totalTurns"`
ToolCallCount int `json:"toolCallCount"`
TokensIn int `json:"tokensIn"`
TokensOut int `json:"tokensOut"`
TokensTotal int `json:"tokensTotal"`
ToolsUsed []string `json:"toolsUsed"`
Errors []string `json:"errors"`
}
SessionDigestResponse is the API representation of a session digest.
type StorageAdapter ¶
type StorageAdapter struct {
// contains filtered or unexported fields
}
StorageAdapter adapts storage.ResultStore to the webapi.RunStore interface. It provides a bridge between the storage layer and the web API layer.
func NewStorageAdapter ¶
func NewStorageAdapter(store storage.ResultStore, source string) *StorageAdapter
NewStorageAdapter creates a RunStore backed by the given storage.ResultStore.
func (*StorageAdapter) GetRun ¶
func (sa *StorageAdapter) GetRun(id string) (*RunDetail, error)
GetRun returns a single run with full task details.
func (*StorageAdapter) ListRuns ¶
func (sa *StorageAdapter) ListRuns(sortField, order string) ([]RunSummary, error)
ListRuns returns all runs, sorted by the given field and order.
func (*StorageAdapter) Summary ¶
func (sa *StorageAdapter) Summary() (*SummaryResponse, error)
Summary returns aggregate metrics across all runs.
type StorageConfig ¶
StorageConfig holds storage configuration for the status endpoint.
type StorageStatusResponse ¶
type StorageStatusResponse struct {
Configured bool `json:"configured"`
Provider string `json:"provider,omitempty"`
Account string `json:"account,omitempty"`
}
StorageStatusResponse is the storage configuration status.
type SummaryResponse ¶
type SummaryResponse struct {
TotalRuns int `json:"totalRuns"`
TotalTasks int `json:"totalTasks"`
PassRate float64 `json:"passRate"`
AvgTokens float64 `json:"avgTokens"`
AvgCost float64 `json:"avgCost"`
AvgDuration float64 `json:"avgDuration"`
}
SummaryResponse is the aggregate KPI response.
type TaskResult ¶
type TaskResult struct {
Name string `json:"name"`
Outcome string `json:"outcome"`
Score float64 `json:"score"`
Duration float64 `json:"duration"`
GraderResults []GraderResult `json:"graderResults"`
Transcript []TranscriptEventResponse `json:"transcript,omitempty"`
SessionDigest *SessionDigestResponse `json:"sessionDigest,omitempty"`
BootstrapCI *ConfidenceIntervalResponse `json:"bootstrapCI,omitempty"`
IsSignificant *bool `json:"isSignificant,omitempty"`
}
TaskResult is a per-task result within a run.
type TranscriptEventResponse ¶
type TranscriptEventResponse struct {
Type string `json:"type"`
Content string `json:"content,omitempty"`
Message string `json:"message,omitempty"`
ToolCallID string `json:"toolCallId,omitempty"`
ToolName string `json:"toolName,omitempty"`
Arguments any `json:"arguments,omitempty"`
ToolResult any `json:"toolResult,omitempty"`
Success *bool `json:"success,omitempty"`
}
TranscriptEventResponse is the API representation of a transcript event.