Documentation
¶
Overview ¶
Package client provides LLM provider clients for Anthropic, OpenAI, and OpenAI-compatible APIs with streaming, retry, and provider detection.
Index ¶
- Variables
- func AddCacheBreakpoints(messages []EyrieMessage) []anthropicCachedMessage
- func DetectProvider() string
- func FreezeRegistry()
- func ParseCustomHeaders() map[string]string
- func RegisterDynamicProvider(name, baseURL, envKey string) error
- func ResolveDefaultModel(provider string) string
- func ResolveProviderModelEnvOverride(provider string) string
- type AnthropicClient
- func (c *AnthropicClient) Chat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*EyrieResponse, error)
- func (c *AnthropicClient) Name() string
- func (c *AnthropicClient) Ping(ctx context.Context) error
- func (c *AnthropicClient) StreamChat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*StreamResult, error)
- type AnthropicClientConfig
- type BatchClient
- type BatchRequest
- type BatchResponse
- type BatchResult
- type CacheConfig
- type CacheControlType
- type CacheStatsResult
- type CachedContent
- type CachedProvider
- func (cp *CachedProvider) CacheStats() CacheStatsResult
- func (cp *CachedProvider) Chat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*EyrieResponse, error)
- func (cp *CachedProvider) ClearCache()
- func (cp *CachedProvider) Name() string
- func (cp *CachedProvider) Ping(ctx context.Context) error
- func (cp *CachedProvider) SetEnabled(enabled bool)
- func (cp *CachedProvider) StreamChat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*StreamResult, error)
- type CallMetrics
- type ChatOptions
- type ClientOption
- type ContinuationConfig
- type EmbeddingParams
- type EmbeddingRequest
- type EmbeddingResponse
- type EyrieClient
- func (c *EyrieClient) Chat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*EyrieResponse, error)
- func (c *EyrieClient) GetProviderInfo(provider string) *ProviderRegistryConfig
- func (c *EyrieClient) GetProviders() []string
- func (c *EyrieClient) Ping(ctx context.Context, provider string) error
- func (c *EyrieClient) SetAPIKey(provider, apiKey string)
- func (c *EyrieClient) StreamChat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*StreamResult, error)
- func (c *EyrieClient) StreamChatContinue(ctx context.Context, messages []EyrieMessage, opts ChatOptions, ...) (*StreamResult, error)
- type EyrieConfig
- type EyrieError
- type EyrieMessage
- type EyrieResponse
- type EyrieStreamEvent
- type EyrieTool
- type EyrieUsage
- type FallbackProvider
- func (fp *FallbackProvider) Chat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*EyrieResponse, error)
- func (fp *FallbackProvider) Name() string
- func (fp *FallbackProvider) Ping(ctx context.Context) error
- func (fp *FallbackProvider) SetLogger(l *slog.Logger)
- func (fp *FallbackProvider) Stats() map[string]int64
- func (fp *FallbackProvider) StreamChat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*StreamResult, error)
- type MetricsCollector
- type MockCall
- type MockMode
- type MockProvider
- func (m *MockProvider) CallCount() int
- func (m *MockProvider) Chat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*EyrieResponse, error)
- func (m *MockProvider) LastCall() *MockCall
- func (m *MockProvider) MarshalCalls() string
- func (m *MockProvider) Name() string
- func (m *MockProvider) Ping(_ context.Context) error
- func (m *MockProvider) Reset()
- func (m *MockProvider) StreamChat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*StreamResult, error)
- type OpenAIClient
- func (c *OpenAIClient) Chat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*EyrieResponse, error)
- func (c *OpenAIClient) Name() string
- func (c *OpenAIClient) Ping(ctx context.Context) error
- func (c *OpenAIClient) StreamChat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*StreamResult, error)
- type OpenAICompatConfig
- type Provider
- type ProviderRegistryConfig
- type ProviderType
- type RateLimitConfig
- type RateLimitedProvider
- func (r *RateLimitedProvider) Chat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*EyrieResponse, error)
- func (r *RateLimitedProvider) Name() string
- func (r *RateLimitedProvider) Ping(ctx context.Context) error
- func (r *RateLimitedProvider) StreamChat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*StreamResult, error)
- type RateLimiter
- type ResponseFormat
- type RetryConfig
- type SSEEvent
- type StreamResult
- type ToolCall
- type ToolResult
- type WeightedProvider
- func (wp *WeightedProvider) Chat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*EyrieResponse, error)
- func (wp *WeightedProvider) Name() string
- func (wp *WeightedProvider) Ping(ctx context.Context) error
- func (wp *WeightedProvider) Stats() map[string]int64
- func (wp *WeightedProvider) StreamChat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*StreamResult, error)
- type WeightedProviderConfig
Constants ¶
This section is empty.
Variables ¶
var ( OpenAICompat = OpenAICompatConfig{ SupportsStore: true, SupportsDeveloperRole: true, SupportsReasoningEffort: true, SupportsUsageInStreaming: true, MaxTokensField: "max_completion_tokens", } GrokCompat = OpenAICompatConfig{ MaxTokensField: "max_tokens", } OpenRouterCompat = OpenAICompatConfig{ ThinkingFormat: "openrouter", MaxTokensField: "max_tokens", SupportsUsageInStreaming: true, } GeminiCompat = OpenAICompatConfig{ MaxTokensField: "max_tokens", SupportsUsageInStreaming: true, } CanopyWaveCompat = OpenAICompatConfig{ MaxTokensField: "max_tokens", } OllamaCompat = OpenAICompatConfig{ MaxTokensField: "max_tokens", } OpenCodeGoCompat = OpenAICompatConfig{ MaxTokensField: "max_tokens", } )
Per-provider compat configs.
var CoreProviders = map[string]ProviderRegistryConfig{ "anthropic": {Name: "anthropic", Type: ProviderTypeAnthropic, EnvKey: "ANTHROPIC_API_KEY", SupportsStreaming: true, SupportsTools: true, SupportsReasoning: true}, "openai": {Name: "openai", Type: ProviderTypeOpenAI, BaseURL: "https://api.openai.com/v1", EnvKey: "OPENAI_API_KEY", SupportsStreaming: true, SupportsTools: true, SupportsReasoning: true}, }
CoreProviders are providers with dedicated SDKs.
var OpenAICompatibleProviders = map[string]ProviderRegistryConfig{ "grok": {Name: "grok", Type: ProviderTypeOpenAICompatible, BaseURL: "https://api.x.ai/v1", EnvKey: "XAI_API_KEY", SupportsStreaming: true, SupportsTools: true, SupportsReasoning: true}, "openrouter": {Name: "openrouter", Type: ProviderTypeOpenAICompatible, BaseURL: "https://openrouter.ai/api/v1", EnvKey: "OPENROUTER_API_KEY", SupportsStreaming: true, SupportsTools: true, SupportsReasoning: true}, "canopywave": {Name: "canopywave", Type: ProviderTypeOpenAICompatible, BaseURL: "https://inference.canopywave.io/v1", EnvKey: "CANOPYWAVE_API_KEY", SupportsStreaming: true, SupportsTools: true, SupportsReasoning: true}, "gemini": {Name: "gemini", Type: ProviderTypeOpenAICompatible, BaseURL: "https://api.gemini.google.com/v1/forward", EnvKey: "GEMINI_API_KEY", SupportsStreaming: true, SupportsTools: true, SupportsReasoning: true}, "ollama": {Name: "ollama", Type: ProviderTypeOpenAICompatible, BaseURL: "http://localhost:11434/v1", EnvKey: "OLLAMA_API_KEY", SupportsStreaming: true, SupportsTools: true, SupportsReasoning: false}, "opencodego": {Name: "opencodego", Type: ProviderTypeOpenAICompatible, BaseURL: config.DefaultOpenCodeGoBaseURL, EnvKey: "OPENCODEGO_API_KEY", SupportsStreaming: true, SupportsTools: true, SupportsReasoning: true}, }
OpenAICompatibleProviders use the OpenAI SDK with custom baseUrl.
var Version = "0.5.0"
Version is set by the root package and used in User-Agent headers.
Functions ¶
func AddCacheBreakpoints ¶
func AddCacheBreakpoints(messages []EyrieMessage) []anthropicCachedMessage
AddCacheBreakpoints returns a copy of messages with Anthropic cache_control breakpoints applied following the recommended pattern:
- Breakpoint on the second-to-last message (caches conversation prefix)
- The last user message is left uncached (always new)
Only applies to messages with role "user" or "assistant". No-op if fewer than 2 messages.
func DetectProvider ¶
func DetectProvider() string
DetectProvider detects the active provider from env vars.
func FreezeRegistry ¶
func FreezeRegistry()
FreezeRegistry prevents further provider registrations. Called automatically after first provider lookup.
func ParseCustomHeaders ¶
ParseCustomHeaders parses GRAYCODE_CUSTOM_HEADERS env var into a map.
func RegisterDynamicProvider ¶
RegisterDynamicProvider adds a user-defined OpenAI-compatible provider at runtime. name is the provider key (e.g. "my-local-llm"), baseURL is the API base (e.g. "http://localhost:8080/v1"), and envKey is the environment variable that holds the API key (e.g. "MY_LLM_API_KEY"). If envKey is empty, the provider is treated like ollama (no key required). Returns error if registry is frozen (after first provider lookup).
func ResolveDefaultModel ¶
ResolveDefaultModel resolves the default model for a provider from the catalog.
func ResolveProviderModelEnvOverride ¶
ResolveProviderModelEnvOverride resolves the model env override for a provider.
Types ¶
type AnthropicClient ¶
type AnthropicClient struct {
// contains filtered or unexported fields
}
AnthropicClient implements Provider for the Anthropic Messages API.
func NewAnthropicClient ¶
func NewAnthropicClient(apiKey, baseURL string, opts ...ClientOption) *AnthropicClient
NewAnthropicClient creates a configured Anthropic client.
func (*AnthropicClient) Chat ¶
func (c *AnthropicClient) Chat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*EyrieResponse, error)
Chat sends a non-streaming message to Anthropic. NOTE: Anthropic does not support a native JSON mode (response_format). Structured output with Anthropic is achieved via the tool-use pattern (defining a tool whose input_schema is your desired output schema). This is not implemented here; opts.ResponseFormat is ignored for Anthropic. Future work: implement tool-use-based structured output for Anthropic.
func (*AnthropicClient) Name ¶
func (c *AnthropicClient) Name() string
Name returns the provider name.
func (*AnthropicClient) Ping ¶
func (c *AnthropicClient) Ping(ctx context.Context) error
Ping checks connectivity to the Anthropic API.
func (*AnthropicClient) StreamChat ¶
func (c *AnthropicClient) StreamChat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*StreamResult, error)
StreamChat sends a streaming message to Anthropic.
type AnthropicClientConfig ¶
type AnthropicClientConfig struct {
APIKey string `json:"api_key,omitempty"`
DefaultHeaders map[string]string `json:"default_headers,omitempty"`
Timeout int `json:"timeout,omitempty"`
MaxRetries int `json:"max_retries,omitempty"`
Provider string `json:"provider,omitempty"`
BaseURL string `json:"base_url,omitempty"`
}
AnthropicClientConfig holds config for creating an Anthropic client.
type BatchClient ¶
type BatchClient struct {
// contains filtered or unexported fields
}
BatchClient handles Anthropic Message Batches API (50% cost discount).
func NewBatchClient ¶
func NewBatchClient(apiKey, baseURL string) *BatchClient
NewBatchClient creates a batch client for Anthropic's batch API.
func (*BatchClient) Poll ¶
func (bc *BatchClient) Poll(ctx context.Context, batchID string) (*BatchResult, error)
Poll checks the status of a batch. Returns the result when complete.
func (*BatchClient) Submit ¶
func (bc *BatchClient) Submit(ctx context.Context, requests []BatchRequest) (string, error)
Submit sends a batch of requests. Returns the batch ID for polling.
type BatchRequest ¶
type BatchRequest struct {
CustomID string `json:"custom_id"`
Messages []EyrieMessage `json:"messages"`
Options ChatOptions `json:"options"`
}
BatchRequest represents a single request in a batch.
type BatchResponse ¶
type BatchResponse struct {
CustomID string `json:"custom_id"`
Response *EyrieResponse `json:"response,omitempty"`
Error string `json:"error,omitempty"`
}
BatchResponse represents a single response from a batch.
type BatchResult ¶
type BatchResult struct {
ID string `json:"id"`
Status string `json:"status"` // "in_progress", "ended", "failed"
Responses []BatchResponse `json:"responses,omitempty"`
CreatedAt time.Time `json:"created_at"`
}
BatchResult holds the overall batch operation result.
type CacheConfig ¶
type CacheConfig struct {
// MaxAge is how long cache entries remain valid. Default: 5 minutes.
MaxAge time.Duration
// MaxSize is the maximum number of cached responses. Default: 100.
// When exceeded, the least-recently-used entry is evicted.
MaxSize int
// Enabled toggles caching. Default: true.
// When false, the CachedProvider passes all requests through unchanged.
Enabled bool
// TemperatureThreshold is the temperature above which responses are not cached.
// Default: 0.5. Responses with temperature > threshold are expected to vary,
// so caching them would defeat the purpose.
TemperatureThreshold float64
}
CacheConfig controls the behavior of CachedProvider.
func DefaultCacheConfig ¶
func DefaultCacheConfig() CacheConfig
DefaultCacheConfig returns a CacheConfig with sensible defaults.
type CacheControlType ¶
type CacheControlType string
CacheControlType is the type of cache control.
const ( // CacheControlEphemeral caches content for up to 5 minutes. CacheControlEphemeral CacheControlType = "ephemeral" )
type CacheStatsResult ¶
type CacheStatsResult struct {
Size int `json:"size"`
MaxSize int `json:"max_size"`
Enabled bool `json:"enabled"`
}
CacheStatsResult holds cache statistics.
type CachedContent ¶
type CachedContent struct {
Type string `json:"type"`
Text string `json:"text"`
CacheControl interface{} `json:"cache_control,omitempty"`
}
CachedContent wraps a content string with cache control metadata.
type CachedProvider ¶
type CachedProvider struct {
// contains filtered or unexported fields
}
CachedProvider wraps a Provider and caches non-streaming responses based on a hash of the input parameters. Inspired by maximhq/bifrost's caching layer.
CachedProvider is safe for concurrent use.
func NewCachedProvider ¶
func NewCachedProvider(inner Provider, cfg CacheConfig) *CachedProvider
NewCachedProvider wraps inner with a response cache configured by cfg. Zero-value fields in cfg are replaced with defaults.
func (*CachedProvider) CacheStats ¶
func (cp *CachedProvider) CacheStats() CacheStatsResult
CacheStats returns the current number of entries in the cache.
func (*CachedProvider) Chat ¶
func (cp *CachedProvider) Chat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*EyrieResponse, error)
Chat checks the cache first. On a miss, it calls the inner provider and caches the response (if the temperature is not too high).
func (*CachedProvider) ClearCache ¶
func (cp *CachedProvider) ClearCache()
ClearCache removes all cached entries.
func (*CachedProvider) Name ¶
func (cp *CachedProvider) Name() string
Name returns the inner provider's name.
func (*CachedProvider) Ping ¶
func (cp *CachedProvider) Ping(ctx context.Context) error
Ping delegates to the inner provider (no caching).
func (*CachedProvider) SetEnabled ¶
func (cp *CachedProvider) SetEnabled(enabled bool)
SetEnabled toggles caching at runtime.
func (*CachedProvider) StreamChat ¶
func (cp *CachedProvider) StreamChat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*StreamResult, error)
StreamChat delegates to the inner provider without caching. Streaming responses are inherently incremental and not suitable for simple response caching.
type CallMetrics ¶
type CallMetrics struct {
Model string `json:"model"`
Provider string `json:"provider"`
InputTokens int `json:"input_tokens"`
OutputTokens int `json:"output_tokens"`
CacheReadTokens int `json:"cache_read_tokens"`
CacheCreationTokens int `json:"cache_creation_tokens"`
LatencyMs int64 `json:"latency_ms"`
Timestamp time.Time `json:"timestamp"`
}
CallMetrics records telemetry for a single LLM API call.
type ChatOptions ¶
type ChatOptions struct {
Provider string `json:"provider,omitempty"`
Model string `json:"model,omitempty"`
Temperature *float64 `json:"temperature,omitempty"`
MaxTokens int `json:"max_tokens,omitempty"`
Stream bool `json:"stream,omitempty"`
Tools []EyrieTool `json:"tools,omitempty"`
System string `json:"system,omitempty"`
EnableCaching bool `json:"enable_caching,omitempty"`
ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
}
ChatOptions holds options for a chat request.
type ClientOption ¶
type ClientOption struct {
// contains filtered or unexported fields
}
ClientOption configures clients.
func WithHTTPClient ¶
func WithHTTPClient(hc *http.Client) ClientOption
WithHTTPClient sets a custom HTTP client.
func WithTimeout ¶
func WithTimeout(d time.Duration) ClientOption
WithTimeout sets the HTTP client timeout.
type ContinuationConfig ¶
type ContinuationConfig struct {
// MaxContinuations is the maximum number of continuation calls (default 3).
MaxContinuations int
// MaxTotalTokens caps the total output tokens across all continuations (0 = unlimited).
MaxTotalTokens int
}
ContinuationConfig controls output continuation behavior.
func DefaultContinuationConfig ¶
func DefaultContinuationConfig() ContinuationConfig
DefaultContinuationConfig returns sensible defaults.
type EmbeddingParams ¶
type EmbeddingParams struct {
Indexing map[string]string `json:"indexing,omitempty"`
Query map[string]string `json:"query,omitempty"`
}
EmbeddingParams holds asymmetric params for indexing vs query.
func DefaultEmbeddingParams ¶
func DefaultEmbeddingParams(model string) EmbeddingParams
DefaultEmbeddingParams returns known-good asymmetric params for common embedding models.
type EmbeddingRequest ¶
type EmbeddingRequest struct {
Model string `json:"model"`
Input []string `json:"input"`
Params map[string]string `json:"params,omitempty"` // indexing or query params
}
EmbeddingRequest represents an embedding API call.
type EmbeddingResponse ¶
type EmbeddingResponse struct {
Embeddings [][]float32 `json:"embeddings"`
Model string `json:"model"`
Usage *EyrieUsage `json:"usage,omitempty"`
}
EmbeddingResponse holds embedding results.
type EyrieClient ¶
type EyrieClient struct {
// contains filtered or unexported fields
}
EyrieClient is the universal LLM client. It is safe for concurrent use.
func NewEyrieClient ¶
func NewEyrieClient(cfg *EyrieConfig) *EyrieClient
NewEyrieClient creates a new EyrieClient.
func (*EyrieClient) Chat ¶
func (c *EyrieClient) Chat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*EyrieResponse, error)
Chat sends a chat request to the specified (or default) provider.
func (*EyrieClient) GetProviderInfo ¶
func (c *EyrieClient) GetProviderInfo(provider string) *ProviderRegistryConfig
GetProviderInfo returns config for a provider.
func (*EyrieClient) GetProviders ¶
func (c *EyrieClient) GetProviders() []string
GetProviders lists all available providers.
func (*EyrieClient) Ping ¶
func (c *EyrieClient) Ping(ctx context.Context, provider string) error
Ping checks connectivity to the specified (or default) provider.
func (*EyrieClient) SetAPIKey ¶
func (c *EyrieClient) SetAPIKey(provider, apiKey string)
SetAPIKey sets an API key for a provider.
func (*EyrieClient) StreamChat ¶
func (c *EyrieClient) StreamChat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*StreamResult, error)
StreamChat sends a streaming chat request.
func (*EyrieClient) StreamChatContinue ¶
func (c *EyrieClient) StreamChatContinue(ctx context.Context, messages []EyrieMessage, opts ChatOptions, cfg ContinuationConfig) (*StreamResult, error)
StreamChatContinue is like StreamChat but automatically continues if the response hits max_tokens with text-only content. Continuations are transparent to the caller.
type EyrieConfig ¶
type EyrieConfig struct {
Provider string `json:"provider,omitempty"`
APIKey string `json:"api_key,omitempty"`
BaseURL string `json:"base_url,omitempty"`
Model string `json:"model,omitempty"`
MaxRetries int `json:"max_retries,omitempty"`
}
EyrieConfig holds client configuration.
type EyrieError ¶
type EyrieError struct {
Provider string
Op string // operation that failed (e.g. "chat", "stream", "ping")
StatusCode int
RequestID string
Message string
Err error
}
EyrieError is a structured error that preserves provider context, HTTP metadata, and request identification for debugging.
func (*EyrieError) Error ¶
func (e *EyrieError) Error() string
func (*EyrieError) IsAuthError ¶
func (e *EyrieError) IsAuthError() bool
IsAuthError returns true if the error indicates an authentication/authorization problem.
func (*EyrieError) IsRateLimited ¶
func (e *EyrieError) IsRateLimited() bool
IsRateLimited returns true if the error indicates rate limiting.
func (*EyrieError) IsRetriable ¶
func (e *EyrieError) IsRetriable() bool
IsRetriable returns true if the error is likely transient and retrying may help.
func (*EyrieError) Unwrap ¶
func (e *EyrieError) Unwrap() error
type EyrieMessage ¶
type EyrieMessage struct {
Role string `json:"role"`
Content string `json:"content,omitempty"`
Images []string `json:"images,omitempty"`
ToolUse []ToolCall `json:"tool_use,omitempty"` // assistant message with tool calls
ToolResult *ToolResult `json:"tool_result,omitempty"` // user message with tool result
}
EyrieMessage represents a chat message.
func MergeConsecutiveRoles ¶
func MergeConsecutiveRoles(messages []EyrieMessage) []EyrieMessage
MergeConsecutiveRoles merges adjacent messages that share the same role by concatenating their content with a newline separator.
Messages with ToolUse or ToolResult are never merged, since those have special provider semantics and must remain separate.
func SanitizeMessages ¶
func SanitizeMessages(messages []EyrieMessage) []EyrieMessage
SanitizeMessages inspects messages for orphaned tool_use blocks (assistant messages with tool calls that lack matching tool_result blocks) and injects synthetic error results to prevent 400 errors from providers. This is critical for session resume and compaction scenarios.
type EyrieResponse ¶
type EyrieResponse struct {
Content string `json:"content"`
Usage *EyrieUsage `json:"usage,omitempty"`
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
FinishReason string `json:"finish_reason"`
RequestID string `json:"request_id,omitempty"`
}
EyrieResponse is the response from a chat call.
func ChatWithContinuation ¶
func ChatWithContinuation(ctx context.Context, p Provider, messages []EyrieMessage, opts ChatOptions, cfg ContinuationConfig) (*EyrieResponse, error)
ChatWithContinuation calls Chat and automatically continues if stop_reason is "max_tokens". It appends the partial response as an assistant message and retries, accumulating content. Returns the fully assembled response.
type EyrieStreamEvent ¶
type EyrieStreamEvent struct {
Type string `json:"type"` // content, tool_call, tool_input_delta, thinking, done, error
Content string `json:"content,omitempty"`
ToolCall *ToolCall `json:"tool_call,omitempty"`
Thinking string `json:"thinking,omitempty"`
Error string `json:"error,omitempty"`
RequestID string `json:"request_id,omitempty"`
Usage *EyrieUsage `json:"usage,omitempty"`
StopReason string `json:"stop_reason,omitempty"`
}
EyrieStreamEvent is a streaming event.
type EyrieTool ¶
type EyrieTool struct {
Name string `json:"name"`
Description string `json:"description"`
Parameters map[string]interface{} `json:"parameters"`
}
EyrieTool represents a tool definition.
type EyrieUsage ¶
type EyrieUsage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
CacheCreationTokens int `json:"cache_creation_tokens,omitempty"`
CacheReadTokens int `json:"cache_read_tokens,omitempty"`
}
EyrieUsage tracks token usage.
type FallbackProvider ¶
type FallbackProvider struct {
// contains filtered or unexported fields
}
FallbackProvider wraps multiple Providers and automatically falls back to the next one when the current provider returns a retriable error (429, 500, 502, 503, timeout). It does NOT fall back on client errors (400, 401, 403) because those indicate a problem with the request itself, not the provider.
Inspired by BerriAI/litellm's fallback chain feature.
FallbackProvider is safe for concurrent use.
func NewFallbackProvider ¶
func NewFallbackProvider(providers ...Provider) *FallbackProvider
NewFallbackProvider creates a FallbackProvider that tries providers in order. At least one provider must be supplied.
func (*FallbackProvider) Chat ¶
func (fp *FallbackProvider) Chat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*EyrieResponse, error)
Chat sends a non-streaming chat request, falling back through the provider chain on retriable errors. Returns the first successful response.
func (*FallbackProvider) Name ¶
func (fp *FallbackProvider) Name() string
Name returns a composite name listing all providers in the chain.
func (*FallbackProvider) Ping ¶
func (fp *FallbackProvider) Ping(ctx context.Context) error
Ping tries to ping each provider in order, returning nil on the first success.
func (*FallbackProvider) SetLogger ¶
func (fp *FallbackProvider) SetLogger(l *slog.Logger)
SetLogger sets a custom logger for the FallbackProvider.
func (*FallbackProvider) Stats ¶
func (fp *FallbackProvider) Stats() map[string]int64
Stats returns a snapshot of how many times each provider served a request.
func (*FallbackProvider) StreamChat ¶
func (fp *FallbackProvider) StreamChat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*StreamResult, error)
StreamChat sends a streaming chat request, falling back through the provider chain on retriable errors. Returns the first successful stream.
type MetricsCollector ¶
type MetricsCollector struct {
// contains filtered or unexported fields
}
MetricsCollector stores recent call metrics in a ring buffer.
func NewMetricsCollector ¶
func NewMetricsCollector() *MetricsCollector
NewMetricsCollector creates a new MetricsCollector.
func (*MetricsCollector) Recent ¶
func (mc *MetricsCollector) Recent(n int) []CallMetrics
Recent returns the last n call metrics, most recent first. If fewer than n entries exist, all available entries are returned.
func (*MetricsCollector) Record ¶
func (mc *MetricsCollector) Record(m CallMetrics)
Record adds a new CallMetrics entry to the ring buffer.
func (*MetricsCollector) TotalCost ¶
func (mc *MetricsCollector) TotalCost() float64
TotalCost estimates the total cost across all recorded metrics using a simplified pricing model (per 1M tokens):
- Input tokens: $3.00 / 1M
- Output tokens: $15.00 / 1M
- Cache read tokens: $0.30 / 1M
- Cache creation tokens: $3.75 / 1M
type MockCall ¶
type MockCall struct {
Messages []EyrieMessage
Options ChatOptions
}
MockCall records a single call to the mock provider.
type MockMode ¶
type MockMode string
MockMode controls how the mock provider responds.
const ( // MockModeEcho echoes the last user message back. MockModeEcho MockMode = "echo" // MockModeFixed returns a fixed response set via MockProvider.Response. MockModeFixed MockMode = "fixed" // MockModeToolUse returns a tool call response. MockModeToolUse MockMode = "tool_use" // MockModeError always returns an error. MockModeError MockMode = "error" // MockModeMaxTokens returns a response with stop_reason=max_tokens (for testing continuation). MockModeMaxTokens MockMode = "max_tokens" )
type MockProvider ¶
type MockProvider struct {
Mode MockMode
Response string // used in MockModeFixed
ToolName string // used in MockModeToolUse
ToolArgs map[string]interface{}
Delay time.Duration // simulate latency
Calls []MockCall // recorded calls for assertions
// contains filtered or unexported fields
}
MockProvider is a Provider implementation for testing. It never makes real HTTP requests.
func NewMockProvider ¶
func NewMockProvider(mode MockMode) *MockProvider
NewMockProvider creates a mock provider with the given mode.
func (*MockProvider) CallCount ¶
func (m *MockProvider) CallCount() int
CallCount returns the number of recorded calls.
func (*MockProvider) Chat ¶
func (m *MockProvider) Chat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*EyrieResponse, error)
Chat returns a mock response based on Mode.
func (*MockProvider) LastCall ¶
func (m *MockProvider) LastCall() *MockCall
LastCall returns the most recent recorded call, or nil.
func (*MockProvider) MarshalCalls ¶
func (m *MockProvider) MarshalCalls() string
MarshalCalls returns recorded calls as JSON for debugging.
func (*MockProvider) Ping ¶
func (m *MockProvider) Ping(_ context.Context) error
Ping always succeeds.
func (*MockProvider) StreamChat ¶
func (m *MockProvider) StreamChat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*StreamResult, error)
StreamChat streams a mock response word by word.
type OpenAIClient ¶
type OpenAIClient struct {
// contains filtered or unexported fields
}
OpenAIClient implements Provider for OpenAI and OpenAI-compatible APIs.
func NewOpenAIClient ¶
func NewOpenAIClient(apiKey, baseURL string, compat *OpenAICompatConfig, opts ...ClientOption) *OpenAIClient
NewOpenAIClient creates a configured OpenAI/compatible client.
func (*OpenAIClient) Chat ¶
func (c *OpenAIClient) Chat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*EyrieResponse, error)
Chat sends a non-streaming request.
func (*OpenAIClient) Ping ¶
func (c *OpenAIClient) Ping(ctx context.Context) error
Ping checks connectivity.
func (*OpenAIClient) StreamChat ¶
func (c *OpenAIClient) StreamChat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*StreamResult, error)
StreamChat sends a streaming request.
type OpenAICompatConfig ¶
type OpenAICompatConfig struct {
SupportsStore bool `json:"supports_store,omitempty"`
SupportsDeveloperRole bool `json:"supports_developer_role,omitempty"`
SupportsReasoningEffort bool `json:"supports_reasoning_effort,omitempty"`
SupportsUsageInStreaming bool `json:"supports_usage_in_streaming,omitempty"`
SupportsStrictMode bool `json:"supports_strict_mode,omitempty"`
MaxTokensField string `json:"max_tokens_field,omitempty"` // "max_tokens" or "max_completion_tokens"
RequiresToolResultName bool `json:"requires_tool_result_name,omitempty"`
RequiresAssistantAfterToolResult bool `json:"requires_assistant_after_tool_result,omitempty"`
RequiresThinkingAsText bool `json:"requires_thinking_as_text,omitempty"`
ThinkingFormat string `json:"thinking_format,omitempty"` // "openai", "zai", "qwen", "openrouter"
}
OpenAICompatConfig holds provider-specific compatibility flags that control how API requests are constructed for each provider.
type Provider ¶
type Provider interface {
// Chat sends a non-streaming chat request.
Chat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*EyrieResponse, error)
// StreamChat sends a streaming chat request.
// The caller must call Close() on the returned StreamResult when done.
StreamChat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*StreamResult, error)
// Ping checks connectivity and authentication.
Ping(ctx context.Context) error
// Name returns the provider name (e.g. "anthropic", "openai").
Name() string
}
Provider is the core interface for LLM providers. Implementations must be safe for concurrent use.
func WithRateLimit ¶
func WithRateLimit(p Provider, limiter *RateLimiter) Provider
WithRateLimit wraps a provider with a rate limiter.
type ProviderRegistryConfig ¶
type ProviderRegistryConfig struct {
Name string `json:"name"`
Type ProviderType `json:"type"`
BaseURL string `json:"base_url,omitempty"`
EnvKey string `json:"env_key"`
SupportsStreaming bool `json:"supports_streaming"`
SupportsTools bool `json:"supports_tools"`
SupportsReasoning bool `json:"supports_reasoning"`
Compat *OpenAICompatConfig `json:"compat,omitempty"`
}
ProviderRegistryConfig holds provider registry info.
type ProviderType ¶
type ProviderType string
ProviderType classifies providers.
const ( // ProviderTypeAnthropic uses the Anthropic Messages API. ProviderTypeAnthropic ProviderType = "anthropic" // ProviderTypeOpenAI uses the OpenAI Chat Completions API. ProviderTypeOpenAI ProviderType = "openai" // ProviderTypeOpenAICompatible uses OpenAI-compatible APIs with custom base URLs. ProviderTypeOpenAICompatible ProviderType = "openai-compatible" )
type RateLimitConfig ¶
type RateLimitConfig struct {
// RequestsPerMinute is the maximum requests per minute (0 = unlimited).
RequestsPerMinute int
// BurstSize is the maximum burst above the steady rate (default = RequestsPerMinute/10, min 1).
BurstSize int
// MinInterval is the minimum time between requests (e.g., 5ms for cloud, 0 for local).
MinInterval time.Duration
}
RateLimitConfig holds rate limit settings for a provider.
type RateLimitedProvider ¶
type RateLimitedProvider struct {
// contains filtered or unexported fields
}
RateLimitedProvider wraps a Provider with rate limiting.
func (*RateLimitedProvider) Chat ¶
func (r *RateLimitedProvider) Chat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*EyrieResponse, error)
func (*RateLimitedProvider) Name ¶
func (r *RateLimitedProvider) Name() string
func (*RateLimitedProvider) StreamChat ¶
func (r *RateLimitedProvider) StreamChat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*StreamResult, error)
type RateLimiter ¶
type RateLimiter struct {
// contains filtered or unexported fields
}
RateLimiter implements a token bucket rate limiter per provider. It limits the number of requests per second to avoid hitting provider rate limits.
func NewRateLimiter ¶
func NewRateLimiter(defaults RateLimitConfig) *RateLimiter
NewRateLimiter creates a rate limiter with default config applied to all providers.
func (*RateLimiter) SetProviderLimit ¶
func (rl *RateLimiter) SetProviderLimit(provider string, cfg RateLimitConfig)
SetProviderLimit sets a custom rate limit for a specific provider.
type ResponseFormat ¶
type ResponseFormat struct {
Type string `json:"type"` // "json_object" or "json_schema"
Schema string `json:"schema,omitempty"` // optional JSON schema for structured output
}
ResponseFormat specifies the desired output format for the model response.
type RetryConfig ¶
type RetryConfig struct {
MaxRetries int
BaseDelay time.Duration
MaxDelay time.Duration
RetryOn []int // HTTP status codes to retry on
}
RetryConfig controls retry behavior.
func DefaultRetryConfig ¶
func DefaultRetryConfig() RetryConfig
DefaultRetryConfig returns sensible defaults.
type StreamResult ¶
type StreamResult struct {
Events <-chan EyrieStreamEvent
RequestID string
// contains filtered or unexported fields
}
StreamResult wraps a streaming response with cleanup. Callers must call Close() when done reading events, or cancel the context.
func StreamChatWithContinuation ¶
func StreamChatWithContinuation(ctx context.Context, p Provider, messages []EyrieMessage, opts ChatOptions, cfg ContinuationConfig) (*StreamResult, error)
StreamChatWithContinuation wraps StreamChat with automatic continuation when the response stops with "max_tokens" and contains only text (no tool calls). It returns a StreamResult whose Events channel transparently continues across multiple LLM calls, emitting a "continuation" event at each boundary.
func (*StreamResult) Close ¶
func (sr *StreamResult) Close()
Close stops the stream and releases resources.
type ToolCall ¶
type ToolCall struct {
ID string `json:"id,omitempty"`
Name string `json:"name"`
Arguments map[string]interface{} `json:"arguments"`
}
ToolCall represents a tool invocation.
func ParseInlineToolCalls ¶
ParseInlineToolCalls detects and extracts tool calls embedded in text content. Some providers (e.g., canopywave/kimi) return tool calls in a text format: <|tool_calls_section_begin|> <|tool_call_begin|> functions.ToolName:0 <|tool_call_argument_begin|> {"arg":"val"} <|tool_call_end|> <|tool_calls_section_end|>
type ToolResult ¶
type ToolResult struct {
ToolUseID string `json:"tool_use_id"`
Content string `json:"content"`
IsError bool `json:"is_error,omitempty"`
}
ToolResult represents the result of a tool execution.
type WeightedProvider ¶
type WeightedProvider struct {
// contains filtered or unexported fields
}
WeightedProvider selects a provider based on configured weights, with automatic failover to remaining providers on retriable errors.
WeightedProvider is safe for concurrent use.
func NewWeightedProvider ¶
func NewWeightedProvider(configs []WeightedProviderConfig) *WeightedProvider
NewWeightedProvider creates a WeightedProvider that selects providers based on the configured weights. At least one provider must be supplied. Weights are normalized to sum to 1.0.
func (*WeightedProvider) Chat ¶
func (wp *WeightedProvider) Chat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*EyrieResponse, error)
Chat sends a non-streaming chat request using weighted random selection with failover on retriable errors.
func (*WeightedProvider) Name ¶
func (wp *WeightedProvider) Name() string
Name returns a composite name showing providers and their weights.
func (*WeightedProvider) Ping ¶
func (wp *WeightedProvider) Ping(ctx context.Context) error
Ping tries to ping each provider, returning nil on the first success.
func (*WeightedProvider) Stats ¶
func (wp *WeightedProvider) Stats() map[string]int64
Stats returns a snapshot of how many times each provider served a request.
func (*WeightedProvider) StreamChat ¶
func (wp *WeightedProvider) StreamChat(ctx context.Context, messages []EyrieMessage, opts ChatOptions) (*StreamResult, error)
StreamChat sends a streaming chat request using weighted random selection with failover on retriable errors.
type WeightedProviderConfig ¶
type WeightedProviderConfig struct {
Provider Provider
Weight float64 // relative weight (e.g., 0.8 for 80%)
}
WeightedProviderConfig associates a Provider with a selection weight.