eval

package

v0.2.2 Latest Latest Go to latest Published: Feb 9, 2026 License: Apache-2.0 Imports: 15 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/agenticgokit/agk

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
type ContainsMatcher
- func NewContainsMatcher() *ContainsMatcher
- func (m *ContainsMatcher) Match(ctx context.Context, actual string, exp Expectation) (*MatchResult, error)
- func (m *ContainsMatcher) Name() string
type EmbeddingClient
type EmbeddingConfig
type EmbeddingMatcher
- func NewEmbeddingMatcher(config *SemanticConfig) (*EmbeddingMatcher, error)
- func (m *EmbeddingMatcher) Match(ctx context.Context, actual string, exp Expectation) (*MatchResult, error)
- func (m *EmbeddingMatcher) Name() string
type ExactMatcher
- func NewExactMatcher() *ExactMatcher
- func (m *ExactMatcher) Match(ctx context.Context, actual string, exp Expectation) (*MatchResult, error)
- func (m *ExactMatcher) Name() string
type Expectation
type HTTPTarget
- func NewHTTPTarget(baseURL string, timeout time.Duration) *HTTPTarget
- func (ht *HTTPTarget) Health() error
- func (ht *HTTPTarget) Invoke(input string, timeout int) (*InvokeResponse, error)
type HybridMatcher
- func NewHybridMatcher(config *SemanticConfig) (*HybridMatcher, error)
- func (m *HybridMatcher) Match(ctx context.Context, actual string, exp Expectation) (*MatchResult, error)
- func (m *HybridMatcher) Name() string
type InvokeRequest
type InvokeResponse
type LLMConfig
type LLMJudgeMatcher
- func NewLLMJudgeMatcher(config *SemanticConfig) (*LLMJudgeMatcher, error)
- func (m *LLMJudgeMatcher) Match(ctx context.Context, actual string, exp Expectation) (*MatchResult, error)
- func (m *LLMJudgeMatcher) Name() string
type MatchResult
type Matcher
- func NewMatcher() *Matcher
- func (m *Matcher) Match(actual string, expect Expectation) (bool, string)
type MatcherFactory
- func NewMatcherFactory(config *SemanticConfig) *MatcherFactory
- func (f *MatcherFactory) CreateMatcher(exp Expectation) (MatcherInterface, error)
type MatcherInterface
type OllamaEmbeddingClient
- func NewOllamaEmbeddingClient(config *EmbeddingConfig) (*OllamaEmbeddingClient, error)
- func (c *OllamaEmbeddingClient) Embed(ctx context.Context, text string) ([]float64, error)
type OpenAIEmbeddingClient
- func NewOpenAIEmbeddingClient(config *EmbeddingConfig) (*OpenAIEmbeddingClient, error)
- func (c *OpenAIEmbeddingClient) Embed(ctx context.Context, text string) ([]float64, error)
type RegexMatcher
- func NewRegexMatcher() *RegexMatcher
- func (m *RegexMatcher) Match(ctx context.Context, actual string, exp Expectation) (*MatchResult, error)
- func (m *RegexMatcher) Name() string
type Reporter
- func NewReporter(format string) *Reporter
- func (r *Reporter) Generate(results *SuiteResults, w io.Writer) error
type Runner
- func NewRunner(config *RunnerConfig) *Runner
- func (r *Runner) Run(suite *TestSuite) (*SuiteResults, error)
type RunnerConfig
type SemanticConfig
type SuiteResults
- func (sr *SuiteResults) AllPassed() bool
- func (sr *SuiteResults) PassRate() float64
type Target
type Test
type TestResult
type TestSuite
- func ParseTestFile(filePath string) (*TestSuite, error)
type TraceExpectation

Constants ¶

View Source

const (
	MatcherStrategyEmbedding = "embedding"
	MatcherStrategyLLMJudge  = "llm-judge"
	MatcherStrategyHybrid    = "hybrid"
)

Matcher strategy constants

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type ContainsMatcher ¶

type ContainsMatcher struct{}

ContainsMatcher checks if actual contains expected values

func NewContainsMatcher ¶

func NewContainsMatcher() *ContainsMatcher

func (*ContainsMatcher) Match ¶

func (m *ContainsMatcher) Match(ctx context.Context, actual string, exp Expectation) (*MatchResult, error)

func (*ContainsMatcher) Name ¶

func (m *ContainsMatcher) Name() string

type EmbeddingClient ¶

type EmbeddingClient interface {
	Embed(ctx context.Context, text string) ([]float64, error)
}

EmbeddingClient interface for generating embeddings

type EmbeddingConfig ¶

type EmbeddingConfig struct {
	Provider string `yaml:"provider"`           // ollama | openai
	Model    string `yaml:"model"`              // Embedding model name
	BaseURL  string `yaml:"base_url,omitempty"` // Optional base URL
}

EmbeddingConfig for embedding-based semantic matching

type EmbeddingMatcher ¶

type EmbeddingMatcher struct {
	// contains filtered or unexported fields
}

EmbeddingMatcher uses embeddings to evaluate semantic similarity

func NewEmbeddingMatcher ¶

func NewEmbeddingMatcher(config *SemanticConfig) (*EmbeddingMatcher, error)

NewEmbeddingMatcher creates a new embedding matcher

func (*EmbeddingMatcher) Match ¶

func (m *EmbeddingMatcher) Match(ctx context.Context, actual string, exp Expectation) (*MatchResult, error)

Match evaluates semantic similarity using embeddings

func (*EmbeddingMatcher) Name ¶

func (m *EmbeddingMatcher) Name() string

Name returns the matcher name

type ExactMatcher ¶

type ExactMatcher struct{}

ExactMatcher checks for exact string match

func NewExactMatcher ¶

func NewExactMatcher() *ExactMatcher

func (*ExactMatcher) Match ¶

func (m *ExactMatcher) Match(ctx context.Context, actual string, exp Expectation) (*MatchResult, error)

func (*ExactMatcher) Name ¶

func (m *ExactMatcher) Name() string

type Expectation ¶

type Expectation struct {
	Type        string            `yaml:"type"` // exact, contains, regex, semantic
	Value       string            `yaml:"value,omitempty"`
	Values      []string          `yaml:"values,omitempty"`
	Pattern     string            `yaml:"pattern,omitempty"`
	Threshold   *float64          `yaml:"threshold,omitempty"` // For semantic matching (pointer for override detection)
	Description string            `yaml:"description,omitempty"`
	Trace       *TraceExpectation `yaml:"trace,omitempty"`

	// Semantic matching overrides (optional, per-test)
	Strategy    string           `yaml:"strategy,omitempty"`     // Override global strategy
	LLM         *LLMConfig       `yaml:"llm,omitempty"`          // Override global LLM config
	Embedding   *EmbeddingConfig `yaml:"embedding,omitempty"`    // Override global embedding config
	JudgePrompt string           `yaml:"judge_prompt,omitempty"` // Override global judge prompt
}

Expectation defines what to expect from test execution

type HTTPTarget ¶

type HTTPTarget struct {
	// contains filtered or unexported fields
}

HTTPTarget handles HTTP-based test execution

func NewHTTPTarget ¶

func NewHTTPTarget(baseURL string, timeout time.Duration) *HTTPTarget

NewHTTPTarget creates a new HTTP target

func (*HTTPTarget) Health ¶

func (ht *HTTPTarget) Health() error

Health checks if the target is healthy

func (*HTTPTarget) Invoke ¶

func (ht *HTTPTarget) Invoke(input string, timeout int) (*InvokeResponse, error)

Invoke sends a test to the target and returns the response

type HybridMatcher ¶

type HybridMatcher struct {
	// contains filtered or unexported fields
}

HybridMatcher combines embedding and LLM judge strategies

func NewHybridMatcher ¶

func NewHybridMatcher(config *SemanticConfig) (*HybridMatcher, error)

NewHybridMatcher creates a new hybrid matcher

func (*HybridMatcher) Match ¶

func (m *HybridMatcher) Match(ctx context.Context, actual string, exp Expectation) (*MatchResult, error)

Match evaluates using hybrid approach Strategy: Fast embedding filter, then LLM judge for edge cases

func (*HybridMatcher) Name ¶

func (m *HybridMatcher) Name() string

Name returns the matcher name

type InvokeRequest ¶

type InvokeRequest struct {
	Input     string                 `json:"input"`
	SessionID string                 `json:"sessionID,omitempty"`
	Options   map[string]interface{} `json:"options,omitempty"`
}

InvokeRequest matches the EvalServer's request format

type InvokeResponse ¶

type InvokeResponse struct {
	Output      string   `json:"output"`
	TraceID     string   `json:"trace_id"`
	SessionID   string   `json:"session_id"`
	DurationMs  int64    `json:"duration_ms"`
	Success     bool     `json:"success"`
	ToolsCalled []string `json:"tools_called,omitempty"`
	Error       string   `json:"error,omitempty"`
}

InvokeResponse matches the EvalServer's response format

type LLMConfig ¶

type LLMConfig struct {
	Provider    string  `yaml:"provider"`           // ollama | openai | anthropic
	Model       string  `yaml:"model"`              // Model name
	Temperature float64 `yaml:"temperature"`        // Temperature for generation
	MaxTokens   int     `yaml:"max_tokens"`         // Max tokens for response
	BaseURL     string  `yaml:"base_url,omitempty"` // Optional base URL
}

LLMConfig for LLM-based semantic matching

type LLMJudgeMatcher ¶

type LLMJudgeMatcher struct {
	// contains filtered or unexported fields
}

LLMJudgeMatcher uses an LLM to evaluate semantic similarity

func NewLLMJudgeMatcher ¶

func NewLLMJudgeMatcher(config *SemanticConfig) (*LLMJudgeMatcher, error)

NewLLMJudgeMatcher creates a new LLM judge matcher

func (*LLMJudgeMatcher) Match ¶

func (m *LLMJudgeMatcher) Match(ctx context.Context, actual string, exp Expectation) (*MatchResult, error)

Match evaluates semantic similarity using LLM

func (*LLMJudgeMatcher) Name ¶

func (m *LLMJudgeMatcher) Name() string

Name returns the matcher name

type MatchResult ¶

type MatchResult struct {
	Matched     bool                   // Whether the output matched the expectation
	Confidence  float64                // Confidence score (0.0 - 1.0)
	Explanation string                 // Human-readable explanation
	Strategy    string                 // Strategy used (exact, contains, regex, semantic)
	Details     map[string]interface{} // Strategy-specific details
}

MatchResult represents the result of a match operation

type Matcher ¶

type Matcher struct{}

Matcher validates test outputs against expectations (legacy)

func NewMatcher ¶

func NewMatcher() *Matcher

NewMatcher creates a new matcher

func (*Matcher) Match ¶

func (m *Matcher) Match(actual string, expect Expectation) (bool, string)

Match checks if actual output matches the expectation (legacy method)

type MatcherFactory ¶

type MatcherFactory struct {
	// contains filtered or unexported fields
}

MatcherFactory creates matchers based on configuration

func NewMatcherFactory ¶

func NewMatcherFactory(config *SemanticConfig) *MatcherFactory

NewMatcherFactory creates a new matcher factory

func (*MatcherFactory) CreateMatcher ¶

func (f *MatcherFactory) CreateMatcher(exp Expectation) (MatcherInterface, error)

CreateMatcher creates appropriate matcher for expectation type

type MatcherInterface ¶

type MatcherInterface interface {
	// Match checks if actual output matches expected criteria
	Match(ctx context.Context, actual string, expected Expectation) (*MatchResult, error)

	// Name returns the matcher strategy name
	Name() string
}

MatcherInterface defines the interface for output validation

type OllamaEmbeddingClient ¶

type OllamaEmbeddingClient struct {
	// contains filtered or unexported fields
}

func NewOllamaEmbeddingClient ¶

func NewOllamaEmbeddingClient(config *EmbeddingConfig) (*OllamaEmbeddingClient, error)

func (*OllamaEmbeddingClient) Embed ¶

func (c *OllamaEmbeddingClient) Embed(ctx context.Context, text string) ([]float64, error)

type OpenAIEmbeddingClient ¶

type OpenAIEmbeddingClient struct {
	// contains filtered or unexported fields
}

func NewOpenAIEmbeddingClient ¶

func NewOpenAIEmbeddingClient(config *EmbeddingConfig) (*OpenAIEmbeddingClient, error)

func (*OpenAIEmbeddingClient) Embed ¶

func (c *OpenAIEmbeddingClient) Embed(ctx context.Context, text string) ([]float64, error)

type RegexMatcher ¶

type RegexMatcher struct{}

RegexMatcher checks if actual matches regex pattern

func NewRegexMatcher ¶

func NewRegexMatcher() *RegexMatcher

func (*RegexMatcher) Match ¶

func (m *RegexMatcher) Match(ctx context.Context, actual string, exp Expectation) (*MatchResult, error)

func (*RegexMatcher) Name ¶

func (m *RegexMatcher) Name() string

type Reporter ¶

type Reporter struct {
	// contains filtered or unexported fields
}

Reporter generates test reports in various formats

func NewReporter ¶

func NewReporter(format string) *Reporter

NewReporter creates a new reporter

func (*Reporter) Generate ¶

func (r *Reporter) Generate(results *SuiteResults, w io.Writer) error

Generate creates a report and writes it to the writer

type Runner ¶

type Runner struct {
	// contains filtered or unexported fields
}

Runner executes test suites

func NewRunner ¶

func NewRunner(config *RunnerConfig) *Runner

NewRunner creates a new test runner

func (*Runner) Run ¶

func (r *Runner) Run(suite *TestSuite) (*SuiteResults, error)

Run executes a test suite and returns results

type RunnerConfig ¶

type RunnerConfig struct {
	Timeout      time.Duration
	Verbose      bool
	FailFast     bool
	OutputFormat string
}

RunnerConfig configures the test runner

type SemanticConfig ¶

type SemanticConfig struct {
	Strategy    string           `yaml:"strategy"`               // embedding | llm-judge | hybrid
	LLM         *LLMConfig       `yaml:"llm,omitempty"`          // LLM configuration for llm-judge strategy
	Embedding   *EmbeddingConfig `yaml:"embedding,omitempty"`    // Embedding configuration
	Threshold   float64          `yaml:"threshold"`              // Similarity threshold (0.0 - 1.0)
	JudgePrompt string           `yaml:"judge_prompt,omitempty"` // Custom judge prompt template
}

SemanticConfig defines semantic matching configuration

type SuiteResults ¶

type SuiteResults struct {
	SuiteName   string
	TotalTests  int
	PassedTests int
	FailedTests int
	Duration    time.Duration
	Results     []TestResult
	StartTime   time.Time
	EndTime     time.Time
}

SuiteResults represents results for an entire test suite

func (*SuiteResults) AllPassed ¶

func (sr *SuiteResults) AllPassed() bool

AllPassed returns true if all tests passed

func (*SuiteResults) PassRate ¶

func (sr *SuiteResults) PassRate() float64

PassRate returns the pass rate as a percentage

type Target ¶

type Target struct {
	Type string `yaml:"type"` // http, grpc, etc.
	URL  string `yaml:"url"`  // Base URL for HTTP targets
}

Target defines where tests will be executed

type Test ¶

type Test struct {
	Name        string                 `yaml:"name"`
	Description string                 `yaml:"description,omitempty"`
	Input       string                 `yaml:"input"`
	Expect      Expectation            `yaml:"expect"`
	Timeout     int                    `yaml:"timeout,omitempty"` // Override suite timeout
	Metadata    map[string]interface{} `yaml:"metadata,omitempty"`
}

Test represents a single test case

type TestResult ¶

type TestResult struct {
	TestName       string
	Passed         bool
	Duration       time.Duration
	ActualOutput   string
	ExpectedOutput string
	ErrorMessage   string
	TraceID        string
	Metadata       map[string]interface{}

	// Semantic matching results
	MatchStrategy string                 `json:"match_strategy,omitempty"` // embedding, llm-judge, hybrid
	Confidence    float64                `json:"confidence,omitempty"`     // 0.0 - 1.0
	MatchDetails  map[string]interface{} `json:"match_details,omitempty"`  // Strategy-specific details
}

TestResult represents the result of a single test

type TestSuite ¶

type TestSuite struct {
	Name        string            `yaml:"name"`
	Description string            `yaml:"description"`
	Target      Target            `yaml:"target"`
	Semantic    *SemanticConfig   `yaml:"semantic,omitempty"` // Global semantic matching config
	Tests       []Test            `yaml:"tests"`
	Metadata    map[string]string `yaml:"metadata,omitempty"`
}

TestSuite represents a collection of tests

func ParseTestFile ¶

func ParseTestFile(filePath string) (*TestSuite, error)

ParseTestFile parses a YAML test file into a TestSuite

type TraceExpectation ¶

type TraceExpectation struct {
	ToolCalls     []string `yaml:"tool_calls,omitempty"`
	LLMCalls      int      `yaml:"llm_calls,omitempty"`
	ExecutionPath []string `yaml:"execution_path,omitempty"`
	MinSteps      int      `yaml:"min_steps,omitempty"`
	MaxSteps      int      `yaml:"max_steps,omitempty"`
}

TraceExpectation defines expectations for trace data

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL