eval

package
v0.2.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 9, 2026 License: Apache-2.0 Imports: 15 Imported by: 0

Documentation

Index

Constants

View Source
const (
	MatcherStrategyEmbedding = "embedding"
	MatcherStrategyLLMJudge  = "llm-judge"
	MatcherStrategyHybrid    = "hybrid"
)

Matcher strategy constants

Variables

This section is empty.

Functions

This section is empty.

Types

type ContainsMatcher

type ContainsMatcher struct{}

ContainsMatcher checks if actual contains expected values

func NewContainsMatcher

func NewContainsMatcher() *ContainsMatcher

func (*ContainsMatcher) Match

func (m *ContainsMatcher) Match(ctx context.Context, actual string, exp Expectation) (*MatchResult, error)

func (*ContainsMatcher) Name

func (m *ContainsMatcher) Name() string

type EmbeddingClient

type EmbeddingClient interface {
	Embed(ctx context.Context, text string) ([]float64, error)
}

EmbeddingClient interface for generating embeddings

type EmbeddingConfig

type EmbeddingConfig struct {
	Provider string `yaml:"provider"`           // ollama | openai
	Model    string `yaml:"model"`              // Embedding model name
	BaseURL  string `yaml:"base_url,omitempty"` // Optional base URL
}

EmbeddingConfig for embedding-based semantic matching

type EmbeddingMatcher

type EmbeddingMatcher struct {
	// contains filtered or unexported fields
}

EmbeddingMatcher uses embeddings to evaluate semantic similarity

func NewEmbeddingMatcher

func NewEmbeddingMatcher(config *SemanticConfig) (*EmbeddingMatcher, error)

NewEmbeddingMatcher creates a new embedding matcher

func (*EmbeddingMatcher) Match

func (m *EmbeddingMatcher) Match(ctx context.Context, actual string, exp Expectation) (*MatchResult, error)

Match evaluates semantic similarity using embeddings

func (*EmbeddingMatcher) Name

func (m *EmbeddingMatcher) Name() string

Name returns the matcher name

type ExactMatcher

type ExactMatcher struct{}

ExactMatcher checks for exact string match

func NewExactMatcher

func NewExactMatcher() *ExactMatcher

func (*ExactMatcher) Match

func (m *ExactMatcher) Match(ctx context.Context, actual string, exp Expectation) (*MatchResult, error)

func (*ExactMatcher) Name

func (m *ExactMatcher) Name() string

type Expectation

type Expectation struct {
	Type        string            `yaml:"type"` // exact, contains, regex, semantic
	Value       string            `yaml:"value,omitempty"`
	Values      []string          `yaml:"values,omitempty"`
	Pattern     string            `yaml:"pattern,omitempty"`
	Threshold   *float64          `yaml:"threshold,omitempty"` // For semantic matching (pointer for override detection)
	Description string            `yaml:"description,omitempty"`
	Trace       *TraceExpectation `yaml:"trace,omitempty"`

	// Semantic matching overrides (optional, per-test)
	Strategy    string           `yaml:"strategy,omitempty"`     // Override global strategy
	LLM         *LLMConfig       `yaml:"llm,omitempty"`          // Override global LLM config
	Embedding   *EmbeddingConfig `yaml:"embedding,omitempty"`    // Override global embedding config
	JudgePrompt string           `yaml:"judge_prompt,omitempty"` // Override global judge prompt
}

Expectation defines what to expect from test execution

type HTTPTarget

type HTTPTarget struct {
	// contains filtered or unexported fields
}

HTTPTarget handles HTTP-based test execution

func NewHTTPTarget

func NewHTTPTarget(baseURL string, timeout time.Duration) *HTTPTarget

NewHTTPTarget creates a new HTTP target

func (*HTTPTarget) Health

func (ht *HTTPTarget) Health() error

Health checks if the target is healthy

func (*HTTPTarget) Invoke

func (ht *HTTPTarget) Invoke(input string, timeout int) (*InvokeResponse, error)

Invoke sends a test to the target and returns the response

type HybridMatcher

type HybridMatcher struct {
	// contains filtered or unexported fields
}

HybridMatcher combines embedding and LLM judge strategies

func NewHybridMatcher

func NewHybridMatcher(config *SemanticConfig) (*HybridMatcher, error)

NewHybridMatcher creates a new hybrid matcher

func (*HybridMatcher) Match

func (m *HybridMatcher) Match(ctx context.Context, actual string, exp Expectation) (*MatchResult, error)

Match evaluates using hybrid approach Strategy: Fast embedding filter, then LLM judge for edge cases

func (*HybridMatcher) Name

func (m *HybridMatcher) Name() string

Name returns the matcher name

type InvokeRequest

type InvokeRequest struct {
	Input     string                 `json:"input"`
	SessionID string                 `json:"sessionID,omitempty"`
	Options   map[string]interface{} `json:"options,omitempty"`
}

InvokeRequest matches the EvalServer's request format

type InvokeResponse

type InvokeResponse struct {
	Output      string   `json:"output"`
	TraceID     string   `json:"trace_id"`
	SessionID   string   `json:"session_id"`
	DurationMs  int64    `json:"duration_ms"`
	Success     bool     `json:"success"`
	ToolsCalled []string `json:"tools_called,omitempty"`
	Error       string   `json:"error,omitempty"`
}

InvokeResponse matches the EvalServer's response format

type LLMConfig

type LLMConfig struct {
	Provider    string  `yaml:"provider"`           // ollama | openai | anthropic
	Model       string  `yaml:"model"`              // Model name
	Temperature float64 `yaml:"temperature"`        // Temperature for generation
	MaxTokens   int     `yaml:"max_tokens"`         // Max tokens for response
	BaseURL     string  `yaml:"base_url,omitempty"` // Optional base URL
}

LLMConfig for LLM-based semantic matching

type LLMJudgeMatcher

type LLMJudgeMatcher struct {
	// contains filtered or unexported fields
}

LLMJudgeMatcher uses an LLM to evaluate semantic similarity

func NewLLMJudgeMatcher

func NewLLMJudgeMatcher(config *SemanticConfig) (*LLMJudgeMatcher, error)

NewLLMJudgeMatcher creates a new LLM judge matcher

func (*LLMJudgeMatcher) Match

func (m *LLMJudgeMatcher) Match(ctx context.Context, actual string, exp Expectation) (*MatchResult, error)

Match evaluates semantic similarity using LLM

func (*LLMJudgeMatcher) Name

func (m *LLMJudgeMatcher) Name() string

Name returns the matcher name

type MatchResult

type MatchResult struct {
	Matched     bool                   // Whether the output matched the expectation
	Confidence  float64                // Confidence score (0.0 - 1.0)
	Explanation string                 // Human-readable explanation
	Strategy    string                 // Strategy used (exact, contains, regex, semantic)
	Details     map[string]interface{} // Strategy-specific details
}

MatchResult represents the result of a match operation

type Matcher

type Matcher struct{}

Matcher validates test outputs against expectations (legacy)

func NewMatcher

func NewMatcher() *Matcher

NewMatcher creates a new matcher

func (*Matcher) Match

func (m *Matcher) Match(actual string, expect Expectation) (bool, string)

Match checks if actual output matches the expectation (legacy method)

type MatcherFactory

type MatcherFactory struct {
	// contains filtered or unexported fields
}

MatcherFactory creates matchers based on configuration

func NewMatcherFactory

func NewMatcherFactory(config *SemanticConfig) *MatcherFactory

NewMatcherFactory creates a new matcher factory

func (*MatcherFactory) CreateMatcher

func (f *MatcherFactory) CreateMatcher(exp Expectation) (MatcherInterface, error)

CreateMatcher creates appropriate matcher for expectation type

type MatcherInterface

type MatcherInterface interface {
	// Match checks if actual output matches expected criteria
	Match(ctx context.Context, actual string, expected Expectation) (*MatchResult, error)

	// Name returns the matcher strategy name
	Name() string
}

MatcherInterface defines the interface for output validation

type OllamaEmbeddingClient

type OllamaEmbeddingClient struct {
	// contains filtered or unexported fields
}

func NewOllamaEmbeddingClient

func NewOllamaEmbeddingClient(config *EmbeddingConfig) (*OllamaEmbeddingClient, error)

func (*OllamaEmbeddingClient) Embed

func (c *OllamaEmbeddingClient) Embed(ctx context.Context, text string) ([]float64, error)

type OpenAIEmbeddingClient

type OpenAIEmbeddingClient struct {
	// contains filtered or unexported fields
}

func NewOpenAIEmbeddingClient

func NewOpenAIEmbeddingClient(config *EmbeddingConfig) (*OpenAIEmbeddingClient, error)

func (*OpenAIEmbeddingClient) Embed

func (c *OpenAIEmbeddingClient) Embed(ctx context.Context, text string) ([]float64, error)

type RegexMatcher

type RegexMatcher struct{}

RegexMatcher checks if actual matches regex pattern

func NewRegexMatcher

func NewRegexMatcher() *RegexMatcher

func (*RegexMatcher) Match

func (m *RegexMatcher) Match(ctx context.Context, actual string, exp Expectation) (*MatchResult, error)

func (*RegexMatcher) Name

func (m *RegexMatcher) Name() string

type Reporter

type Reporter struct {
	// contains filtered or unexported fields
}

Reporter generates test reports in various formats

func NewReporter

func NewReporter(format string) *Reporter

NewReporter creates a new reporter

func (*Reporter) Generate

func (r *Reporter) Generate(results *SuiteResults, w io.Writer) error

Generate creates a report and writes it to the writer

type Runner

type Runner struct {
	// contains filtered or unexported fields
}

Runner executes test suites

func NewRunner

func NewRunner(config *RunnerConfig) *Runner

NewRunner creates a new test runner

func (*Runner) Run

func (r *Runner) Run(suite *TestSuite) (*SuiteResults, error)

Run executes a test suite and returns results

type RunnerConfig

type RunnerConfig struct {
	Timeout      time.Duration
	Verbose      bool
	FailFast     bool
	OutputFormat string
}

RunnerConfig configures the test runner

type SemanticConfig

type SemanticConfig struct {
	Strategy    string           `yaml:"strategy"`               // embedding | llm-judge | hybrid
	LLM         *LLMConfig       `yaml:"llm,omitempty"`          // LLM configuration for llm-judge strategy
	Embedding   *EmbeddingConfig `yaml:"embedding,omitempty"`    // Embedding configuration
	Threshold   float64          `yaml:"threshold"`              // Similarity threshold (0.0 - 1.0)
	JudgePrompt string           `yaml:"judge_prompt,omitempty"` // Custom judge prompt template
}

SemanticConfig defines semantic matching configuration

type SuiteResults

type SuiteResults struct {
	SuiteName   string
	TotalTests  int
	PassedTests int
	FailedTests int
	Duration    time.Duration
	Results     []TestResult
	StartTime   time.Time
	EndTime     time.Time
}

SuiteResults represents results for an entire test suite

func (*SuiteResults) AllPassed

func (sr *SuiteResults) AllPassed() bool

AllPassed returns true if all tests passed

func (*SuiteResults) PassRate

func (sr *SuiteResults) PassRate() float64

PassRate returns the pass rate as a percentage

type Target

type Target struct {
	Type string `yaml:"type"` // http, grpc, etc.
	URL  string `yaml:"url"`  // Base URL for HTTP targets
}

Target defines where tests will be executed

type Test

type Test struct {
	Name        string                 `yaml:"name"`
	Description string                 `yaml:"description,omitempty"`
	Input       string                 `yaml:"input"`
	Expect      Expectation            `yaml:"expect"`
	Timeout     int                    `yaml:"timeout,omitempty"` // Override suite timeout
	Metadata    map[string]interface{} `yaml:"metadata,omitempty"`
}

Test represents a single test case

type TestResult

type TestResult struct {
	TestName       string
	Passed         bool
	Duration       time.Duration
	ActualOutput   string
	ExpectedOutput string
	ErrorMessage   string
	TraceID        string
	Metadata       map[string]interface{}

	// Semantic matching results
	MatchStrategy string                 `json:"match_strategy,omitempty"` // embedding, llm-judge, hybrid
	Confidence    float64                `json:"confidence,omitempty"`     // 0.0 - 1.0
	MatchDetails  map[string]interface{} `json:"match_details,omitempty"`  // Strategy-specific details
}

TestResult represents the result of a single test

type TestSuite

type TestSuite struct {
	Name        string            `yaml:"name"`
	Description string            `yaml:"description"`
	Target      Target            `yaml:"target"`
	Semantic    *SemanticConfig   `yaml:"semantic,omitempty"` // Global semantic matching config
	Tests       []Test            `yaml:"tests"`
	Metadata    map[string]string `yaml:"metadata,omitempty"`
}

TestSuite represents a collection of tests

func ParseTestFile

func ParseTestFile(filePath string) (*TestSuite, error)

ParseTestFile parses a YAML test file into a TestSuite

type TraceExpectation

type TraceExpectation struct {
	ToolCalls     []string `yaml:"tool_calls,omitempty"`
	LLMCalls      int      `yaml:"llm_calls,omitempty"`
	ExecutionPath []string `yaml:"execution_path,omitempty"`
	MinSteps      int      `yaml:"min_steps,omitempty"`
	MaxSteps      int      `yaml:"max_steps,omitempty"`
}

TraceExpectation defines expectations for trace data

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL