Documentation
¶
Index ¶
- Constants
- Variables
- func BuildHeaders(apiKey string) map[string]string
- func CallLLMFallback(systemPrompt, userMessage string, apiKey string, provider string) (string, error)
- func CreateSpanFromTraceId(ctx context.Context, traceId string, parentSpanId string, spanName string) (context.Context, trace.Span)
- func ExtractTraceContext(ctx context.Context, carrier map[string]string) context.Context
- func FlushSpans(ctx context.Context) error
- func FormatHTTPError(resp *http.Response, operation string) string
- func GetAIQAClient() error
- func GetAPIKey(apiKey string) string
- func GetAPIKeyInfo(ctx context.Context, apiKeyID string, serverURL, apiKey string) (map[string]any, error)
- func GetActiveSpan(ctx context.Context) trace.Span
- func GetDefaultIgnorePatterns() []string
- func GetIgnoreRecursive() bool
- func GetMaxObjectStrChars() int
- func GetModelFromServer(ctx context.Context, modelId, serverUrl, apiKey string) (apiKeyOut string, provider string, _ error)
- func GetOrganisation(ctx context.Context, organisationID string, serverURL, apiKey string) (map[string]any, error)
- func GetServerURL(serverURL string) string
- func GetSpan(ctx context.Context, spanId string, organisationId string) (map[string]any, error)
- func GetSpanId(ctx context.Context) string
- func GetTraceId(ctx context.Context) string
- func InitTracing(serverURL, apiKey string, samplingRateArg ...float64) error
- func InjectTraceContext(ctx context.Context, carrier map[string]string)
- func IsTracingEnabled() bool
- func SerializeForSpan(value any) any
- func SerializeValue(value any) string
- func SetComponentTag(tag string)
- func SetConversationId(ctx context.Context, conversationId string) bool
- func SetDefaultIgnorePatterns(patterns []string)
- func SetIgnoreRecursive(recursive bool)
- func SetProviderAndModel(ctx context.Context, provider *string, model *string) bool
- func SetSpanAttribute(ctx context.Context, attributeName string, attributeValue any) bool
- func SetTokenUsage(ctx context.Context, inputTokens any, outputTokens any, totalTokens any, ...) bool
- func ShutdownTracing(ctx context.Context) error
- func SubmitFeedback(ctx context.Context, traceId string, feedback FeedbackOptions) error
- func TraceBedrockConverse(ctx context.Context, converseInput *bedrockruntime.ConverseInput, ...)
- type CallMyCodeFunc
- type Dataset
- type Example
- type Experiment
- type ExperimentRunner
- func (er *ExperimentRunner) CreateExample(ctx context.Context, example *Example) (*Example, error)
- func (er *ExperimentRunner) CreateExperiment(ctx context.Context, experimentSetup *Experiment) (*Experiment, error)
- func (er *ExperimentRunner) GetDataset(ctx context.Context) (*Dataset, error)
- func (er *ExperimentRunner) GetExample(ctx context.Context, exampleId string) (Example, error)
- func (er *ExperimentRunner) GetExampleInputs(ctx context.Context, limit int) ([]Example, error)
- func (er *ExperimentRunner) GetSummaryResults(ctx context.Context) (map[string]MetricStats, error)
- func (er *ExperimentRunner) LoadExperiment(ctx context.Context, experimentId string) (*Experiment, error)
- func (er *ExperimentRunner) Run(ctx context.Context, ...) (int, error)
- func (er *ExperimentRunner) RunExample(ctx context.Context, example Example, callMyCode CallMyCodeFunc) (*Result, error)
- func (er *ExperimentRunner) RunSomeExamples(ctx context.Context, ...) (int, error)
- func (er *ExperimentRunner) ScoreAndStore(ctx context.Context, result *Result, output any, scores map[string]float64) (*Result, error)
- type ExperimentRunnerOptions
- type FeedbackOptions
- type LLMCallFn
- type Metric
- type MetricResult
- type MetricStats
- type Result
- type ScorerForMetricFn
- type TracingOptions
Constants ¶
const ( // AIQATracerName is the name of the OpenTelemetry tracer used by AIQA AIQATracerName = "aiqa-tracer" // Version is the version of the AIQA client library // This is automatically updated by set-version-json.sh Version = "0.7.6" // LogTag is used in all logging output to identify AIQA messages LogTag = "AIQA" )
const ( AIQA_TRACE_ID = "aiqa.experiment" AIQA_EXAMPLE_ID = "aiqa.example" )
const ERROR_PREFIX_STOP_EXPERIMENT = "STOP_EXPERIMENT"
Any error that starts with this prefix will be treated as a request to stop the experiment.
Variables ¶
var ErrStopExperiment = errors.New(ERROR_PREFIX_STOP_EXPERIMENT)
ErrStopExperiment is returned by callMyCode to signal that Run() should stop the experiment Run early. This can be used when callMyCode detects issues like connectivity problems.
Functions ¶
func BuildHeaders ¶
BuildHeaders builds HTTP headers for AIQA API requests Note: net/http automatically handles gzip/deflate decompression when Accept-Encoding header is set Checks AIQA_API_KEY first, then falls back to OTEL_EXPORTER_OTLP_HEADERS if not set
func CallLLMFallback ¶ added in v0.7.5
func CallLLMFallback(systemPrompt, userMessage string, apiKey string, provider string) (string, error)
CallLLMFallback calls OpenAI or Anthropic using apiKey and provider, or env OPENAI_API_KEY / ANTHROPIC_API_KEY.
func CreateSpanFromTraceId ¶
func CreateSpanFromTraceId(ctx context.Context, traceId string, parentSpanId string, spanName string) (context.Context, trace.Span)
CreateSpanFromTraceId creates a new span that continues from an existing trace ID. This is useful for linking traces across different services or agents.
traceId: The trace ID as a hexadecimal string (32 characters) parentSpanId: Optional parent span ID as a hexadecimal string (16 characters).
If provided, the new span will be a child of this span.
spanName: Name for the new span (default: "continued_span") Returns: A context with the new span and the span itself. Use it with defer span.End().
func ExtractTraceContext ¶
ExtractTraceContext extracts trace context from a carrier (e.g., HTTP headers). Use this to continue a trace that was started in another service.
carrier: Map containing trace context (e.g., HTTP headers map) Returns: A context object that can be used with tracer.Start()
func FlushSpans ¶
FlushSpans flushes all pending spans to the server
func FormatHTTPError ¶ added in v0.6.2
FormatHTTPError formats an HTTP error message from a response object
func GetAIQAClient ¶ added in v0.6.2
func GetAIQAClient() error
GetAIQAClient initializes and returns the AIQA client singleton. This function is called automatically when WithTracing is first used, so you typically don't need to call it explicitly. However, you can call it manually if you want to: - Check if tracing is enabled (IsTracingEnabled()) - Initialize before the first WithTracing usage - Access the client state for advanced usage
The function loads environment variables (AIQA_SERVER_URL, AIQA_API_KEY, AIQA_COMPONENT_TAG) and initializes the tracing system.
The function is idempotent - calling it multiple times is safe and will only initialize once.
func GetAPIKey ¶
GetAPIKey gets API key from parameter or environment variable Checks AIQA_API_KEY first, then falls back to OTEL_EXPORTER_OTLP_HEADERS if not set
func GetAPIKeyInfo ¶ added in v0.6.2
func GetAPIKeyInfo(ctx context.Context, apiKeyID string, serverURL, apiKey string) (map[string]any, error)
GetAPIKeyInfo gets API key information via an API call
func GetActiveSpan ¶
GetActiveSpan returns the active span from context
func GetDefaultIgnorePatterns ¶ added in v0.6.2
func GetDefaultIgnorePatterns() []string
GetDefaultIgnorePatterns returns the default ignore patterns applied to all traced inputs and outputs. Default: ["_*"] (filters properties starting with '_') Returns a copy of the patterns to prevent external modification
func GetIgnoreRecursive ¶ added in v0.6.2
func GetIgnoreRecursive() bool
GetIgnoreRecursive returns whether ignore patterns are applied recursively to nested objects. Default: true (recursive filtering enabled)
func GetMaxObjectStrChars ¶
func GetMaxObjectStrChars() int
GetMaxObjectStrChars returns the maximum object string representation size in characters Configurable via AIQA_MAX_OBJECT_STR_CHARS environment variable (default: 1MB)
func GetModelFromServer ¶ added in v0.7.5
func GetModelFromServer(ctx context.Context, modelId, serverUrl, apiKey string) (apiKeyOut string, provider string, _ error)
GetModelFromServer fetches model from the AIQA server (with api_key if permitted). Returns nil if not found or no api_key.
func GetOrganisation ¶ added in v0.6.2
func GetOrganisation(ctx context.Context, organisationID string, serverURL, apiKey string) (map[string]any, error)
GetOrganisation gets organisation information based on API key via an API call
func GetServerURL ¶
GetServerURL gets server URL from parameter or environment variable, with trailing slashes removed Checks AIQA_SERVER_URL first, then falls back to OTEL_EXPORTER_OTLP_ENDPOINT if not set
func GetSpan ¶
GetSpan gets a span by its ID from the AIQA server.
spanId: The span ID as a hexadecimal string (16 characters) or client span ID organisationId: Optional organisation ID. If empty, will try to get from AIQA_ORGANISATION_ID
environment variable. The organisation is typically extracted from the API key during authentication, but the API requires it as a query parameter.
Returns: The span data as a map, or nil if not found, and an error if the request failed
Example:
span, err := GetSpan(ctx, "abc123...", "")
if err != nil {
log.Fatal(err)
}
if span != nil {
log.Printf("Found span: %v", span["name"])
}
func GetSpanId ¶
GetSpanId gets the current span ID as a hexadecimal string (16 characters). Returns: The span ID as a hex string, or empty string if no active span exists.
func GetTraceId ¶
GetTraceId gets the current trace ID as a hexadecimal string (32 characters). Returns: The trace ID as a hex string, or empty string if no active span exists.
func InitTracing ¶
InitTracing initializes the OpenTelemetry tracer provider with AIQA exporter samplingRate: value between 0 and 1, where 0 = tracing is off, 1 = trace all If not provided, reads from AIQA_SAMPLING_RATE environment variable (default: 1.0) If a TracerProvider already exists, it will add the AIQA exporter to it instead of creating a new one. If AIQA_SERVER_URL or AIQA_API_KEY are not set, tracing will be gracefully disabled (functions will execute normally without tracing overhead).
Note: This function is now a wrapper around ensureTracingInitialized for backward compatibility. For lazy initialization, use GetAIQAClient() or just use WithTracing (which calls it automatically).
func InjectTraceContext ¶
InjectTraceContext injects the current trace context into a carrier (e.g., HTTP headers). This allows you to pass trace context to another service.
carrier: Map to inject trace context into (e.g., HTTP headers map)
func IsTracingEnabled ¶
func IsTracingEnabled() bool
IsTracingEnabled returns whether tracing is currently enabled. Tracing is disabled if AIQA_SERVER_URL or AIQA_API_KEY are not set.
func SerializeForSpan ¶
SerializeForSpan serializes a value for span attributes OpenTelemetry only accepts primitives (bool, string, bytes, int, float) or sequences of those Complex types (maps, structs) are converted to JSON strings
func SerializeValue ¶
SerializeValue serializes a value to JSON string for span attributes Applies data filters before serialization
func SetComponentTag ¶
func SetComponentTag(tag string)
SetComponentTag sets a custom component tag that will be added to all spans created by AIQA. This can also be set via the AIQA_COMPONENT_TAG environment variable. The component tag allows you to identify which component/system generated the spans - e.g. in the AIQA Traces view.
tag: A component identifier (e.g., "mynamespace.mysystem", "backend.api", etc.)
func SetConversationId ¶
SetConversationId sets the gen_ai.conversation.id attribute on the active span. This allows you to group multiple traces together that are part of the same conversation. See https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-events/ for more details.
conversationId: A unique identifier for the conversation (e.g., user session ID, chat ID, etc.) Returns: True if gen_ai.conversation.id was set, False if no active span found
func SetDefaultIgnorePatterns ¶ added in v0.6.2
func SetDefaultIgnorePatterns(patterns []string)
SetDefaultIgnorePatterns sets the default ignore patterns applied to all traced inputs and outputs. Set to nil or empty slice to disable default ignore patterns. Supports wildcards (e.g., "_*" matches "_apple", "_fruit").
func SetIgnoreRecursive ¶ added in v0.6.2
func SetIgnoreRecursive(recursive bool)
SetIgnoreRecursive sets whether ignore patterns are applied recursively to nested objects. When true (default), ignore patterns are applied at all nesting levels. When false, ignore patterns are only applied to top-level keys.
func SetProviderAndModel ¶
SetProviderAndModel sets provider and model attributes on the active span using OpenTelemetry semantic conventions for gen_ai. This allows you to explicitly record provider and model information. See https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/ for more details.
provider: Name of the AI provider (e.g., "openai", "anthropic", "google") (maps to gen_ai.provider.name) model: Name of the model used (e.g., "gpt-4", "claude-3-5-sonnet") (maps to gen_ai.request.model) Returns: True if at least one attribute was set, False if no active span found
func SetSpanAttribute ¶
SetSpanAttribute sets an attribute on the active span
func SetTokenUsage ¶
func SetTokenUsage(ctx context.Context, inputTokens any, outputTokens any, totalTokens any, cachedInputTokens any) bool
SetTokenUsage sets token usage attributes on the active span using OpenTelemetry semantic conventions for gen_ai. This allows you to explicitly record token usage information. See https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/ for more details.
All arguments may be nil. They can be *int, *int32, int, int32, int64, or float64. inputTokens: Number of input tokens used (maps to gen_ai.usage.input_tokens) outputTokens: Number of output tokens generated (maps to gen_ai.usage.output_tokens) totalTokens: Total number of tokens used (maps to gen_ai.usage.total_tokens) cachedInputTokens: Number of cached input tokens used (maps to gen_ai.usage.cached_input_tokens) Returns: True if at least one token usage attribute was set, False if no active span found
func ShutdownTracing ¶
ShutdownTracing shuts down the tracer provider and exporter. Note: If InitTracing attached to an existing TracerProvider, this will only gate exports rather than shutting down the shared provider. Resets initialized so a subsequent InitTracing/GetAIQAClient can re-initialize.
func SubmitFeedback ¶
func SubmitFeedback(ctx context.Context, traceId string, feedback FeedbackOptions) error
SubmitFeedback submits feedback for a trace by creating a new span with the same trace ID. This allows you to add feedback (thumbs-up, thumbs-down, comment) to a trace after it has completed.
traceId: The trace ID as a hexadecimal string (32 characters) feedback: Feedback options with ThumbsUp and Comment Returns: Error if feedback could not be submitted
Example:
// Submit positive feedback
thumbsUp := true
err := SubmitFeedback(ctx, "abc123...", FeedbackOptions{
ThumbsUp: &thumbsUp,
Comment: "Great response!",
})
// Submit negative feedback
thumbsDown := false
err := SubmitFeedback(ctx, "abc123...", FeedbackOptions{
ThumbsUp: &thumbsDown,
Comment: "Incorrect answer",
})
func TraceBedrockConverse ¶ added in v0.7.5
func TraceBedrockConverse(ctx context.Context, converseInput *bedrockruntime.ConverseInput, converseOutput *bedrockruntime.ConverseOutput)
Types ¶
type CallMyCodeFunc ¶ added in v0.7.6
CallMyCode is how the experiment runner calls the engine function, paasing in the input from an Example, plus any parameters for the experiment. Error handling:
- Results are not recorded if the error is not nil.
- If the error is an instance of ErrStopExperiment, or if the error begins with ERROR_PREFIX_STOP_EXPERIMENT, then the experiment runner will stop the experiment.
type Dataset ¶
type Dataset struct {
Id string `json:"id"`
Organisation string `json:"organisation"`
Name string `json:"name"`
Description string `json:"description,omitempty"`
Tags []string `json:"tags,omitempty"`
Metrics []Metric `json:"metrics,omitempty"`
Created time.Time `json:"created"`
Updated time.Time `json:"updated"`
}
Dataset represents a dataset
type Example ¶
type Example struct {
Id string `json:"id,omitempty"`
Name string `json:"name,omitempty"`
// The source trace, if created from spans. Do not edit this - it is set by the server.
// For the trace relating to running an example, see Result.Trace.
Trace string `json:"trace,omitempty"`
Dataset string `json:"dataset"`
Organisation string `json:"organisation"`
Spans []any `json:"spans,omitempty"`
Input any `json:"input,omitempty"`
Outputs map[string]any `json:"outputs"`
Created time.Time `json:"created"`
Updated time.Time `json:"updated"`
Metrics []Metric `json:"metrics,omitempty"`
Tags []string `json:"tags,omitempty"`
}
Example represents an example from a dataset
type Experiment ¶
type Experiment struct {
Id string `json:"id"`
Dataset string `json:"dataset"`
Organisation string `json:"organisation"`
Name string `json:"name,omitempty"`
Parameters map[string]any `json:"parameters,omitempty"`
Summaries map[string]any `json:"summaries,omitempty"`
Created time.Time `json:"created"`
Updated time.Time `json:"updated"`
Results []Result `json:"results,omitempty"`
}
Experiment represents an experiment
type ExperimentRunner ¶
type ExperimentRunner struct {
// contains filtered or unexported fields
}
ExperimentRunner is the main class for running experiments on datasets. It can create an experiment, run it, and score the results. Handles setting up environment variables and passing parameters to the engine function.
func NewExperimentRunner ¶
func NewExperimentRunner(options ExperimentRunnerOptions) *ExperimentRunner
NewExperimentRunner creates a new ExperimentRunner
func (*ExperimentRunner) CreateExample ¶ added in v0.7.5
CreateExample creates an example in a dataset. Derives organisation from dataset when not set.
func (*ExperimentRunner) CreateExperiment ¶
func (er *ExperimentRunner) CreateExperiment(ctx context.Context, experimentSetup *Experiment) (*Experiment, error)
CreateExperiment creates an experiment if one does not exist. Derives organisation from dataset when not set.
func (*ExperimentRunner) GetDataset ¶
func (er *ExperimentRunner) GetDataset(ctx context.Context) (*Dataset, error)
GetDataset fetches the dataset to get its metrics. Caches the result and derives organisation from it when not set.
func (*ExperimentRunner) GetExample ¶ added in v0.7.5
GetExample fetches a single example by ID.
func (*ExperimentRunner) GetExampleInputs ¶
GetExampleInputs fetches example inputs from the dataset
func (*ExperimentRunner) GetSummaryResults ¶
func (er *ExperimentRunner) GetSummaryResults(ctx context.Context) (map[string]MetricStats, error)
GetSummaryResults fetches summary results from the server
func (*ExperimentRunner) LoadExperiment ¶ added in v0.7.5
func (er *ExperimentRunner) LoadExperiment(ctx context.Context, experimentId string) (*Experiment, error)
LoadExperiment loads an existing experiment by ID. This allows the experiment to be resumed.
func (*ExperimentRunner) Run ¶
func (er *ExperimentRunner) Run(ctx context.Context, engine func(ctx context.Context, input any, parameters map[string]any) (any, error)) (int, error)
Run runs an engine function on all examples and scores the results engine: function that takes context, input and parameters and returns output Checks if results already exist for an example before calling RunExample, allowing experiments to be resumed.
func (*ExperimentRunner) RunExample ¶
func (er *ExperimentRunner) RunExample(ctx context.Context, example Example, callMyCode CallMyCodeFunc, ) (*Result, error)
RunExample runs the engine on an example with the given parameters (looping over comparison parameters), and scores the result. Also calls ScoreAndStore to store the result in the server. If scorerForMetricId is non-nil, metrics are taken from dataset + example; for each metric either the custom scorer is used or, when metric.Type == "llm", the built-in LLM-as-judge is used (see LlmCallFn and OPENAI_API_KEY/ANTHROPIC_API_KEY). callMyCode receives a context.Context as the first parameter, which can be used to propagate trace context in HTTP calls.
func (*ExperimentRunner) RunSomeExamples ¶ added in v0.7.5
func (er *ExperimentRunner) RunSomeExamples(ctx context.Context, engine func(ctx context.Context, input any, parameters map[string]any) (any, error), tag string, limit int) (int, error)
RunSomeExamples runs an engine function on all or some of the examples and scores the results
type ExperimentRunnerOptions ¶
type ExperimentRunnerOptions struct {
Name string
DatasetId string
ExperimentId string
ServerUrl string
ApiKey string
OrganisationId string
// LlmCallFn optional: called for LLM-as-judge metrics when type is "llm".
// If nil, uses a default function that uses OPENAI_API_KEY/ANTHROPIC_API_KEY or model from server.
// If you want to track your token use on this too - either use the default (which has token tracking,
// or provide your own with token tracking.
LlmCallFn LLMCallFn
// ScorerForMetricId optional: map metric id -> scorer; used in RunExample when scoring metrics. If nil, no per-metric scoring.
ScorerForMetricId map[string]ScorerForMetricFn
// False by default. If true, Run and RunSome will re-run examples (once) if they do not have a score for each metric.
// This is useful if run-example hit a transient error.
// It can be wasteful if the error is permanent.
RerunExamplesWithMissingScores bool
}
ExperimentRunnerOptions contains options for creating an ExperimentRunner
type FeedbackOptions ¶
type FeedbackOptions struct {
ThumbsUp *bool // true for positive, false for negative, nil for neutral
Comment string // Optional text comment
}
FeedbackOptions contains options for submitting feedback
type LLMCallFn ¶ added in v0.7.5
LLMCallFn is a function that calls an LLM with system and user prompts and returns raw content (typically JSON). If nil, the runner uses OPENAI_API_KEY / ANTHROPIC_API_KEY env or fetches model from server.
type Metric ¶
type Metric struct {
Id string `json:"id,omitempty"`
Name string `json:"name"`
Description string `json:"description,omitempty"`
Unit string `json:"unit,omitempty"`
Type string `json:"type"` // "javascript", "llm", or "number"
Parameters map[string]any `json:"parameters,omitempty"`
Prompt string `json:"prompt,omitempty"`
PromptCriteria string `json:"promptCriteria,omitempty"`
Code string `json:"code,omitempty"`
Model string `json:"model,omitempty"`
Provider string `json:"provider,omitempty"`
Value any `json:"value,omitempty"`
}
Metric represents a metric for scoring
type MetricResult ¶ added in v0.7.5
type MetricResult struct {
Score float64 `json:"score"`
Message string `json:"message,omitempty"`
Error string `json:"error,omitempty"`
}
MetricResult is the result of evaluating one metric on an output (score 0–1, optional message/error).
func ParseLLMResponse ¶ added in v0.7.5
func ParseLLMResponse(content string) (MetricResult, error)
ParseLLMResponse parses LLM response content (JSON with "score" and optional "message") into MetricResult. Clamps score to [0, 1].
func ScoreLLMMetricLocal ¶ added in v0.7.5
func ScoreLLMMetricLocal( input, output any, example Example, metric Metric, llmCallFn LLMCallFn, ) (MetricResult, error)
ScoreLLMMetricLocal scores one LLM-as-judge metric: builds prompts from metric, calls LLM, parses response.
type MetricStats ¶
type MetricStats struct {
Mean float64 `json:"mean"`
Min float64 `json:"min"`
Max float64 `json:"max"`
Var float64 `json:"var"`
Count int `json:"count"`
}
MetricStats represents statistics for a metric
type Result ¶
type Result struct {
Example string `json:"example"`
Trace string `json:"trace,omitempty"`
RateLimited bool `json:"rateLimited,omitempty"`
Scores map[string]float64 `json:"scores"`
Messages map[string]string `json:"messages,omitempty"`
Errors map[string]string `json:"errors,omitempty"`
}
Result represents the result for an example. See Experiment.ts Result interface.
type ScorerForMetricFn ¶ added in v0.7.5
type ScorerForMetricFn func(input, output any, example Example, metric Metric, params map[string]any) (MetricResult, error)
ScorerForMetricFn scores one metric given input, output, example, metric definition, and parameters. Returns MetricResult with score in [0,1], optional message and error.