Documentation
¶
Index ¶
- Constants
- Variables
- func ApplyAnthropicToolOverhead(tc *TokenCount, numTools int)
- func CountMessage(model string, msg Message) (int, error)
- func CountMessagesAndTools(tc *TokenCount, req TokenCountRequest, opts CountOpts) error
- func CountMessagesAndToolsAnthropic(tc *TokenCount, req TokenCountRequest) error
- func CountText(model, text string) (int, error)
- func DefaultHttpClient() *http.Client
- func NewHttpClient(opts HttpClientOpts) *http.Client
- type AssistantMessage
- type CacheHint
- type CompletedEvent
- type CountOpts
- type DebugEvent
- type DeltaEvent
- type DeltaKind
- type Envelope
- type ErrorEvent
- type Event
- type EventHandler
- type EventHandlerFunc
- type EventMeta
- type EventType
- type HttpClientOpts
- type Message
- type MessageOpt
- type Messages
- func (m *Messages) Add(all ...Message) *Messages
- func (m *Messages) Assistant(content string, toolCalls ...tool.Call) *Messages
- func (m *Messages) System(content string) *Messages
- func (m *Messages) Tool(toolCallID, output string) *Messages
- func (m *Messages) ToolErr(toolCallID, output string) *Messages
- func (m *Messages) UnmarshalJSON(data []byte) error
- func (m *Messages) User(content string) *Messages
- type Model
- type ModelFetcher
- type Option
- type Options
- type OutputFormat
- type Provider
- type ProviderError
- func AsProviderError(provider string, err error) *ProviderError
- func NewErrAPIError(provider string, statusCode int, body string) *ProviderError
- func NewErrBuildRequest(provider string, cause error) *ProviderError
- func NewErrContextCancelled(provider string, cause error) *ProviderError
- func NewErrMissingAPIKey(provider string) *ProviderError
- func NewErrNoProviders(provider string) *ProviderError
- func NewErrProviderMsg(provider string, msg string) *ProviderError
- func NewErrRequestFailed(provider string, cause error) *ProviderError
- func NewErrStreamDecode(provider string, cause error) *ProviderError
- func NewErrStreamRead(provider string, cause error) *ProviderError
- func NewErrUnknownModel(provider string, modelID string) *ProviderError
- type Publisher
- type ReasoningEffort
- type Request
- type Resolver
- type Response
- type Result
- type Role
- type RouteInfo
- type RouteInfoEvent
- type StopReason
- type Stream
- type StreamClosedEvent
- type StreamCreatedEvent
- type StreamProcessor
- func (r *StreamProcessor) HandleTool(handlers ...tool.NamedHandler) *StreamProcessor
- func (r *StreamProcessor) OnDelta(fn TypedEventHandler[*DeltaEvent]) *StreamProcessor
- func (r *StreamProcessor) OnEvent(fn EventHandler) *StreamProcessor
- func (r *StreamProcessor) OnReasoningDelta(fn func(delta string)) *StreamProcessor
- func (r *StreamProcessor) OnStart(fn TypedEventHandler[*StreamStartedEvent]) *StreamProcessor
- func (r *StreamProcessor) OnTextDelta(fn func(delta string)) *StreamProcessor
- func (r *StreamProcessor) OnToolDelta(fn func(d ToolDeltaPart)) *StreamProcessor
- func (r *StreamProcessor) Result() Result
- func (r *StreamProcessor) WithAsyncToolDispatch() *StreamProcessor
- func (r *StreamProcessor) WithToolDispatcher(d tool.DispatcherType) *StreamProcessor
- type StreamRequest
- type StreamStartedEvent
- type Streamer
- type SystemMessage
- type TextMessage
- type TokenCount
- type TokenCountRequest
- type TokenCounter
- type ToolCallEvent
- type ToolChoice
- type ToolChoiceAuto
- type ToolChoiceNone
- type ToolChoiceRequired
- type ToolChoiceTool
- type ToolDeltaPart
- type ToolMessage
- type TypedEventHandler
- type Usage
- type UsageUpdatedEvent
- type UserMessage
Constants ¶
const ( ProviderNameAnthropic = "anthropic" ProviderNameClaude = "claude" ProviderNameBedrock = "bedrock" ProviderNameOllama = "ollama" ProviderNameOpenAI = "openai" ProviderNameOpenRouter = "openrouter" ProviderNameRouter = "router" )
Provider name constants used in ProviderError.Provider.
Variables ¶
var ( // ErrContextCancelled is returned when the caller's context is cancelled // while a eventPub is in progress. ErrContextCancelled = errors.New("context cancelled") // ErrRequestFailed is returned when the HTTP transport fails before a // response is received (e.g. network error, DNS failure). ErrRequestFailed = errors.New("request failed") // ErrAPIError is returned when the provider API responds with a non-2xx // HTTP status. The ProviderError carries StatusCode and Body. ErrAPIError = errors.New("API error") // ErrStreamRead is returned when reading or scanning the response eventPub // fails at the I/O level (e.g. scanner error, connection reset). ErrStreamRead = errors.New("eventPub read/decode error") // ErrStreamDecode is returned when a eventPub chunk cannot be decoded // (e.g. malformed JSON in an SSE data line). ErrStreamDecode = errors.New("eventPub read/decode error") // ErrProviderError is returned when the provider sends an explicit // error inside the eventPub (e.g. Anthropic error event, OpenRouter // chunk-level error). ErrProviderError = errors.New("provider error") // ErrMissingAPIKey is returned when a provider requires an API key // but none has been configured. ErrMissingAPIKey = errors.New("missing API key") // ErrBuildRequest is returned when serialising the outgoing request // fails before it is sent. ErrBuildRequest = errors.New("build request error") // ErrUnknownModel is returned when a model ToolCallID or alias cannot be resolved. ErrUnknownModel = errors.New("unknown model") // ErrNoProviders is returned when no providers are configured or all // failover targets have been exhausted. ErrNoProviders = errors.New("no providers configured") // ErrUnknown is used to wrap any error that is not already a ProviderError. // Callers can test for it with errors.Is(err, llm.ErrUnknown). ErrUnknown = errors.New("unknown error") )
Sentinel errors for use with errors.Is. Each ProviderError wraps one of these so callers can inspect the error kind without string matching.
Functions ¶
func ApplyAnthropicToolOverhead ¶ added in v0.26.0
func ApplyAnthropicToolOverhead(tc *TokenCount, numTools int)
ApplyAnthropicToolOverhead adds the Anthropic tool-use preamble and per-tool serialisation framing to tc.OverheadTokens and tc.InputTokens.
This is exported so that providers using the Anthropic API format (e.g. MiniMax) can apply the same overhead after calling CountMessagesAndTools with their own encoding.
func CountMessage ¶ added in v0.24.0
CountMessage returns the number of tokens for a single Message for the given model. The message is converted to its text representation using the same logic as CountTokens (role content + tool call names/args for AssistantMessage, output for ToolResult, etc.).
This is a convenience function for callers that count messages individually rather than as a batch — for example, per-entry token estimates in a conversation history manager.
func CountMessagesAndTools ¶ added in v0.24.0
func CountMessagesAndTools(tc *TokenCount, req TokenCountRequest, opts CountOpts) error
CountMessagesAndTools is a low-level helper for provider TokenCounter implementations. Library consumers should use the TokenCounter interface directly rather than calling this function.
It fills tc.PerMessage, tc.ToolsTokens, tc.PerTool, and tc.InputTokens using the given BPE encoding, then calls applyRoleBreakdown to populate the role breakdown fields.
Returns an error if req.Model is empty.
func CountMessagesAndToolsAnthropic ¶ added in v0.24.0
func CountMessagesAndToolsAnthropic(tc *TokenCount, req TokenCountRequest) error
CountMessagesAndToolsAnthropic is like CountMessagesAndTools but applies Anthropic-specific tool overhead constants: the hidden tool-use system preamble (~330 tokens, paid once) plus per-tool serialisation framing (~126 tokens first tool, ~85 tokens each additional). In total, a request with N tools adds 330+126+(N-1)×85 tokens on top of the raw JSON counts.
Use this for anthropic, bedrock, and claude providers.
func CountText ¶ added in v0.24.0
CountText returns the number of tokens in text for the given model. The encoding is selected automatically based on the model ToolCallID: o200k_base for GPT-4o/o-series, cl100k_base for everything else.
This is a convenience function for callers that need to count raw text without constructing a full TokenCountRequest — for example, context-budget managers that count individual history entries.
func DefaultHttpClient ¶ added in v0.23.0
DefaultHttpClient returns the shared default HTTP client. It is safe for concurrent use and is reused across all providers that do not supply their own client.
func NewHttpClient ¶ added in v0.23.0
func NewHttpClient(opts HttpClientOpts) *http.Client
NewHttpClient creates a new *http.Client with sensible defaults for LLM provider use. The client has no top-level Timeout — LLM streams can be arbitrarily long and are cancelled via context. Transport-level timeouts guard against stalled connections at the TCP/TLS layer.
When opts.Logger is non-nil, every request and response is logged at Debug level. Set opts.Debug = true to also include headers and bodies. Response bodies are tee-logged as they eventPub — no buffering, no broken SSE.
Types ¶
type AssistantMessage ¶ added in v0.26.0
type AssistantMessage interface {
TextMessage
ToolCalls() []tool.Call
// contains filtered or unexported methods
}
func Assistant ¶ added in v0.26.0
func Assistant(content string, toolCalls ...tool.Call) AssistantMessage
func AssistantWithCacheHint ¶ added in v0.26.0
func AssistantWithCacheHint(content string, cacheHint *CacheHint, toolCalls ...tool.Call) AssistantMessage
func ToolCalls ¶ added in v0.26.0
func ToolCalls(toolCalls ...tool.Call) AssistantMessage
type CacheHint ¶ added in v0.20.0
type CacheHint struct {
// Enabled marks this content as a cache breakpoint candidate.
// For Anthropic/Bedrock: emits cache_control / cachePoint at this position.
// For OpenAI: no-op (caching is automatic).
Enabled bool
// TTL requests a specific cache duration.
// Valid values: "" (provider default, typically 5m), "5m", "1h".
// The "1h" option requires a supporting model (Claude Haiku/Sonnet/Opus 4.5+).
TTL string
}
CacheHint requests provider-side prompt caching for a message or request. It is a provider-neutral instruction: Anthropic and Bedrock translate it to explicit cache breakpoints on content blocks; OpenAI caching is always automatic and ignores per-message hints, but honours TTL on Request.CacheHint.
type CompletedEvent ¶ added in v0.26.0
type CompletedEvent struct {
StopReason StopReason `json:"stop_reason"`
}
func (CompletedEvent) Type ¶ added in v0.26.0
func (e CompletedEvent) Type() EventType
type CountOpts ¶ added in v0.26.0
type CountOpts struct {
// Encoding is the BPE encoding to use for token counting
// (e.g. "cl100k_base", "o200k_base", "minimax_bpe").
Encoding string
// PerMsgOverhead is added to InputTokens once per message. For example,
// OpenAI adds 4 tokens per message for role/framing overhead.
PerMsgOverhead int
// ReplyPriming is a fixed addend for reply-priming tokens. For example,
// OpenAI adds 3 tokens for the "assistant" token prepended by the API.
ReplyPriming int
}
CountOpts configures the shared CountMessagesAndTools helper.
type DebugEvent ¶ added in v0.26.0
type DebugEvent struct {
Message string `json:"message,omitempty"`
Data any `json:"data,omitempty"`
}
func (DebugEvent) Type ¶ added in v0.26.0
func (e DebugEvent) Type() EventType
type DeltaEvent ¶ added in v0.26.0
type DeltaEvent struct {
// Type identifies which payload field is set.
Kind DeltaKind `json:"kind"`
// Index is the position of this content block in the model's output array.
// nil when the provider does not supply block-level indexing.
//
// Index is meaningful because a single HTTP response can contain multiple
// blocks of the same type. Add Anthropic's interleaved-thinking beta a
// single response may produce: thinking(0) → text(1) → tool(2) → thinking(3) → text(4).
// Without Index a consumer cannot tell which thinking or text block a delta
// belongs to.
//
// Provider semantics:
// Anthropic — content_block index, all block types
// Bedrock — ContentBlockIndex, all block types
// OpenAI Responses — output_index, all output types
// OpenAI Completions — tool_calls[].index, tool calls only; text=nil
// OpenRouter — tool_calls[].index, tool calls only; text=nil
// Ollama — nil (complete tool calls only, no streaming fragments)
Index *uint32 `json:"index,omitempty"`
// Text is populated for DeltaKindText.
Text string `json:"text,omitempty"`
// Reasoning is populated for DeltaKindReasoning.
Reasoning string `json:"reasoning,omitempty"`
ToolDeltaPart
}
DeltaEvent carries one incremental content chunk from the model eventPub. Exactly one payload field is populated, indicated by EventType.
func ReasoningDelta ¶ added in v0.23.0
func ReasoningDelta(text string) *DeltaEvent
func TextDelta ¶ added in v0.23.0
func TextDelta(text string) *DeltaEvent
func ToolDelta ¶ added in v0.23.0
func ToolDelta(id, name, argsFragment string) *DeltaEvent
func (*DeltaEvent) Type ¶ added in v0.26.0
func (e *DeltaEvent) Type() EventType
func (*DeltaEvent) WithIndex ¶ added in v0.26.0
func (e *DeltaEvent) WithIndex(idx uint32) *DeltaEvent
type DeltaKind ¶ added in v0.26.0
type DeltaKind string
DeltaKind identifies the kind of incremental content carried by a DeltaEvent.
type ErrorEvent ¶ added in v0.26.0
type ErrorEvent struct {
Error error `json:"error"`
}
func (ErrorEvent) Type ¶ added in v0.26.0
func (e ErrorEvent) Type() EventType
type EventHandler ¶ added in v0.26.0
type EventHandler interface {
Handle(e Event)
}
type EventHandlerFunc ¶ added in v0.26.0
type EventHandlerFunc func(e Event)
func (EventHandlerFunc) Handle ¶ added in v0.26.0
func (h EventHandlerFunc) Handle(e Event)
type EventMeta ¶ added in v0.26.0
type EventMeta struct {
RequestID string `json:"request_id,omitempty"`
Seq uint64 `json:"seq,omitempty"`
CreatedAt time.Time `json:"created_at,omitempty"`
After time.Duration `json:"after,omitempty"`
TraceID string `json:"trace_id,omitempty"`
Model string `json:"model,omitempty"`
Attrs map[string]string `json:"attrs,omitempty"`
}
type EventType ¶ added in v0.26.0
type EventType string
EventType identifies the kind of streaming event from a provider.
const ( StreamEventCreated EventType = "created" StreamEventClosed EventType = "closed" StreamEventRouted EventType = "routed" StreamEventStarted EventType = "started" StreamEventUsageUpdated EventType = "usage" StreamEventDelta EventType = "delta" StreamEventToolCall EventType = "tool_call" StreamEventCompleted EventType = "completed" StreamEventError EventType = "error" StreamEventDebug EventType = "debug" )
type HttpClientOpts ¶ added in v0.23.0
type HttpClientOpts struct {
// Logger enables transport-level request/response logging at Debug level.
// When nil, no logging is performed.
Logger *slog.Logger
// Debug extends logging to include request/response headers and bodies.
// Has no effect when Logger is nil.
Debug bool
}
HttpClientOpts configures the HTTP client created by NewHttpClient.
type Message ¶
type Message interface {
Role() Role
Validate() error
CacheHint() *CacheHint
MarshalJSON() ([]byte, error)
UnmarshalJSON([]byte) error
// contains filtered or unexported methods
}
Message is the interface all message types implement.
type MessageOpt ¶ added in v0.26.0
type MessageOpt interface {
// contains filtered or unexported methods
}
type Messages ¶ added in v0.5.0
type Messages []Message
func (*Messages) UnmarshalJSON ¶ added in v0.5.0
type Model ¶
type Model struct {
ID string `json:"id"`
Name string `json:"name"`
Provider string `json:"provider"`
Aliases []string `json:"aliases,omitempty"`
}
Model represents an LLM model.
type ModelFetcher ¶
ModelFetcher is an optional interface providers can implement to list models dynamically from their API instead of returning a static list.
type Option ¶ added in v0.12.0
type Option func(*Options)
Option configures provider options.
func APIKeyFromEnv ¶ added in v0.12.0
APIKeyFromEnv returns an Option that reads the API key from environment variables. It tries each candidate in order, returning the first non-empty value. Returns an error at call time if none of the candidates are set.
func WithAPIKey ¶ added in v0.12.0
WithAPIKey sets a static API key.
func WithAPIKeyFunc ¶ added in v0.12.0
WithAPIKeyFunc sets a dynamic API key resolver. The function is called on each CreateStream() call, enabling:
- Lazy key resolution (fetch from secret manager on first use)
- Key rotation (fetch fresh key each time)
- Context-aware resolution (respect timeouts/cancellation)
func WithBaseURL ¶ added in v0.12.0
WithBaseURL sets a custom base URL for the provider.
func WithHTTPClient ¶ added in v0.23.0
WithHTTPClient sets a custom HTTP client for the provider. When not set, providers use DefaultHttpClient().
func WithLogger ¶ added in v0.23.0
WithLogger sets a logger for providers that emit events outside the HTTP transport layer (e.g. Bedrock's binary eventstream). Events are logged at Debug level using the same format as the HTTP transport, so the same log renderer handles output from all providers.
type Options ¶ added in v0.12.0
type Options struct {
// BaseURL is the base URL for the provider's API.
BaseURL string
// APIKeyFunc returns the API key for authentication.
// It is called on each CreateStream() call, allowing for lazy/dynamic resolution.
APIKeyFunc func(ctx context.Context) (string, error)
// HTTPClient is the HTTP client to use for API requests.
// When nil, providers fall back to DefaultHttpClient().
HTTPClient *http.Client
// Logger is used by providers that cannot log via the HTTP transport
// (e.g. Bedrock's binary eventstream). When set, eventPub events are logged
// at Debug level using the same message format as the HTTP transport logger
// so the same renderer handles both.
Logger *slog.Logger
}
Options holds configuration shared across providers.
type OutputFormat ¶ added in v0.25.0
type OutputFormat string
OutputFormat specifies the desired output format for the model response.
const ( // OutputFormatText requests plain text output (default for most providers). OutputFormatText OutputFormat = "text" // OutputFormatJSON requests JSON output. The model will be constrained // to output valid JSON. Not all providers support this. OutputFormatJSON OutputFormat = "json" )
type ProviderError ¶ added in v0.23.0
type ProviderError struct {
// Sentinel is one of the Err* vars above. errors.Is matches against it.
Sentinel error `json:"-"`
// Provider is the name of the provider that produced this error.
// Use the ProviderName* constants.
Provider string `json:"provider"`
// Message is a human-readable description of the error.
Message string `json:"message"`
// Cause is the underlying error that triggered this one, if any.
Cause error `json:"-"`
// StatusCode is the HTTP response status code. Only set for ErrAPIError.
StatusCode int `json:"status_code,omitempty"`
// Body is the raw HTTP response body. Only set for ErrAPIError.
Body string `json:"body,omitempty"`
}
ProviderError is a structured error emitted by any provider. It wraps a sentinel so errors.Is works, carries the provider name for identification, and optionally holds an HTTP status code and body for API errors.
func AsProviderError ¶ added in v0.23.0
func AsProviderError(provider string, err error) *ProviderError
AsProviderError ensures err is a *ProviderError. If it already is one, it is returned as-is. Otherwise it is wrapped in a new ProviderError with ErrUnknown as the sentinel. This guarantees that every error surface from CreateStream and EventStream.Error() is a *ProviderError.
func NewErrAPIError ¶ added in v0.23.0
func NewErrAPIError(provider string, statusCode int, body string) *ProviderError
NewErrAPIError wraps a non-2xx HTTP response from a provider API.
func NewErrBuildRequest ¶ added in v0.23.0
func NewErrBuildRequest(provider string, cause error) *ProviderError
NewErrBuildRequest wraps a failure that occurred while building the outgoing request (e.g. JSON serialisation error).
func NewErrContextCancelled ¶ added in v0.23.0
func NewErrContextCancelled(provider string, cause error) *ProviderError
NewErrContextCancelled wraps a context cancellation for a provider eventPub.
func NewErrMissingAPIKey ¶ added in v0.23.0
func NewErrMissingAPIKey(provider string) *ProviderError
NewErrMissingAPIKey returns an error for a provider that has no API key configured.
func NewErrNoProviders ¶ added in v0.23.0
func NewErrNoProviders(provider string) *ProviderError
NewErrNoProviders returns an error when no providers are available or all failover targets have been exhausted.
func NewErrProviderMsg ¶ added in v0.23.0
func NewErrProviderMsg(provider string, msg string) *ProviderError
NewErrProviderMsg wraps an explicit error message sent by the provider inside the eventPub (e.g. an Anthropic error event or OpenRouter chunk error).
func NewErrRequestFailed ¶ added in v0.23.0
func NewErrRequestFailed(provider string, cause error) *ProviderError
NewErrRequestFailed wraps an HTTP transport-level failure.
func NewErrStreamDecode ¶ added in v0.23.0
func NewErrStreamDecode(provider string, cause error) *ProviderError
NewErrStreamDecode wraps a JSON or protocol decode failure mid-eventPub.
func NewErrStreamRead ¶ added in v0.23.0
func NewErrStreamRead(provider string, cause error) *ProviderError
NewErrStreamRead wraps an I/O or scanner error that occurred while reading the response eventPub.
func NewErrUnknownModel ¶ added in v0.23.0
func NewErrUnknownModel(provider string, modelID string) *ProviderError
NewErrUnknownModel returns an error for a model ToolCallID or alias that cannot be resolved by the provider.
func (*ProviderError) Error ¶ added in v0.23.0
func (e *ProviderError) Error() string
Error returns a human-readable error string in the form: "<provider>: <sentinel>: <message>" or "<provider>: <sentinel>: <message>: <cause>"
func (*ProviderError) Is ¶ added in v0.23.0
func (e *ProviderError) Is(target error) bool
Is reports whether this error matches target. It matches if target is the same sentinel, enabling errors.Is(err, ErrAPIError) etc.
func (*ProviderError) MarshalJSON ¶ added in v0.23.0
func (e *ProviderError) MarshalJSON() ([]byte, error)
MarshalJSON serialises ProviderError to JSON. Sentinel and Cause are rendered as strings so the full error is machine-readable.
func (*ProviderError) Unwrap ¶ added in v0.23.0
func (e *ProviderError) Unwrap() error
Unwrap returns Cause when set, allowing errors.As/Is to traverse the chain. When Cause is nil, Unwrap returns Sentinel so errors.Is(err, ErrAPIError) still works even with no underlying cause.
type Publisher ¶ added in v0.26.0
type Publisher interface {
Publish(payload Event)
Started(started StreamStartedEvent)
Routed(routed RouteInfo)
Delta(d *DeltaEvent)
ToolCall(tc tool.Call)
Usage(usage Usage)
Completed(completed CompletedEvent)
Error(err error)
Debug(msg string, data any)
Close()
}
func NewEventPublisher ¶ added in v0.26.0
type ReasoningEffort ¶ added in v0.7.0
type ReasoningEffort string
ReasoningEffort controls the amount of reasoning for reasoning models. Lower values result in faster responses with fewer reasoning tokens.
const ( // ReasoningEffortNone disables reasoning (GPT-5.1+ only). ReasoningEffortNone ReasoningEffort = "none" // ReasoningEffortMinimal uses minimal reasoning effort. ReasoningEffortMinimal ReasoningEffort = "minimal" // ReasoningEffortLow uses low reasoning effort. ReasoningEffortLow ReasoningEffort = "low" // ReasoningEffortMedium uses medium reasoning effort (default for most models before GPT-5.1). ReasoningEffortMedium ReasoningEffort = "medium" // ReasoningEffortHigh uses high reasoning effort. ReasoningEffortHigh ReasoningEffort = "high" // ReasoningEffortXHigh uses extra high reasoning effort (codex-max+ only). ReasoningEffortXHigh ReasoningEffort = "xhigh" )
func (ReasoningEffort) Valid ¶ added in v0.8.0
func (r ReasoningEffort) Valid() bool
Valid returns true if the ReasoningEffort is a known valid value or empty.
type Request ¶ added in v0.25.0
type Request struct {
// Model is the model identifier or alias to use, e.g. "fast", "anthropic/claude-sonnet-4-5".
Model string `json:"model"`
// Messages is the conversation history to send to the model.
Messages Messages `json:"messages"`
// MaxTokens limits the maximum number of tokens in the response.
// When 0, the provider's default is used.
MaxTokens int `json:"max_tokens,omitempty"`
// Temperature controls randomness in sampling. Higher values produce
// more diverse outputs (0.0-2.0 for most providers). Not supported by
// Anthropic.
Temperature float64 `json:"temperature,omitempty"`
// TopP is the nucleus sampling threshold. The model considers only tokens
// comprising the top P probability mass. Not supported by Anthropic.
TopP float64 `json:"top_p,omitempty"`
// TopK restricts token selection to the K most likely tokens. Higher values
// increase diversity. Not supported by Anthropic.
TopK int `json:"top_k,omitempty"`
// OutputFormat specifies the desired output format.
// Supported by OpenAI and Anthropic. When set to JSON, the model will
// be constrained to output valid JSON.
OutputFormat OutputFormat `json:"output_format,omitempty"`
// Tools is the set of tools the model may call during the response.
Tools []llmtool.Definition `json:"tools,omitempty"`
// ToolChoice controls how the model selects tools. Defaults to Auto when Tools are provided.
ToolChoice ToolChoice `json:"tool_choice,omitempty"`
// ReasoningEffort controls the depth of reasoning for models that support it (e.g. OpenAI o-series).
ReasoningEffort ReasoningEffort `json:"reasoning_effort,omitempty"`
// CacheHint is a top-level prompt caching hint. Behaviour is provider-specific:
// Anthropic auto mode, Bedrock trailing cachePoint, OpenAI extended retention.
CacheHint *CacheHint `json:"cache_hint,omitempty"`
}
Request configures a provider CreateStream call.
type Resolver ¶ added in v0.16.0
type Resolver interface {
// Resolve returns the Model for the given model ToolCallID or alias.
// Returns an error if the model is not recognized.
Resolve(modelID string) (Model, error)
}
Resolver resolves a model alias or ToolCallID to its full Model representation.
type Response ¶ added in v0.26.0
type Response interface {
Message() AssistantMessage
Text() string
Reasoning() string
StopReason() StopReason
Usage() *Usage
Error() error
ToolCalls() []tool.Call
}
type RouteInfoEvent ¶ added in v0.26.0
type RouteInfoEvent struct {
RouteInfo RouteInfo `json:"route_info"`
}
func (RouteInfoEvent) Type ¶ added in v0.26.0
func (e RouteInfoEvent) Type() EventType
type StopReason ¶ added in v0.23.0
type StopReason string
StopReason describes why the model stopped generating.
const ( // StopReasonEndTurn is natural completion — the model finished its response. StopReasonEndTurn StopReason = "end_turn" // StopReasonToolUse means the model emitted one or more tool calls. StopReasonToolUse StopReason = "tool_use" // StopReasonMaxTokens means the output length limit was reached. StopReasonMaxTokens StopReason = "max_tokens" // StopReasonContentFilter means output was blocked by the provider. StopReasonContentFilter StopReason = "content_filter" // StopReasonCancelled means the context was cancelled before the eventPub ended. StopReasonCancelled StopReason = "cancelled" // StopReasonError means the eventPub ended with a StreamEventError. StopReasonError StopReason = "error" StopReasonUnknown StopReason = "" )
type StreamClosedEvent ¶ added in v0.26.0
type StreamClosedEvent struct{}
func (StreamClosedEvent) Type ¶ added in v0.26.0
func (e StreamClosedEvent) Type() EventType
type StreamCreatedEvent ¶ added in v0.26.0
type StreamCreatedEvent struct{}
func (StreamCreatedEvent) Type ¶ added in v0.26.0
func (e StreamCreatedEvent) Type() EventType
type StreamProcessor ¶ added in v0.26.0
type StreamProcessor struct {
// contains filtered or unexported fields
}
func NewEventProcessor ¶ added in v0.26.0
func NewEventProcessor(ctx context.Context, ch <-chan Envelope) *StreamProcessor
func (*StreamProcessor) HandleTool ¶ added in v0.26.0
func (r *StreamProcessor) HandleTool(handlers ...tool.NamedHandler) *StreamProcessor
HandleTool registers a Handler that is invoked when the model emits a completed tool call matching h.ToolName(). The handler's output is stored in StreamResult.ToolResults and included in the messages returned by Next/Apply.
Pass a *BoundToolSpec (from llm.Handle) for typed, spec-aware handlers:
proc.HandleTool(llm.Handle(weatherSpec, func(ctx context.Context, in GetWeatherParams) (*GetWeatherResult, error) {
return &GetWeatherResult{Temp: 22}, nil
}))
func (*StreamProcessor) OnDelta ¶ added in v0.26.0
func (r *StreamProcessor) OnDelta(fn TypedEventHandler[*DeltaEvent]) *StreamProcessor
func (*StreamProcessor) OnEvent ¶ added in v0.26.0
func (r *StreamProcessor) OnEvent(fn EventHandler) *StreamProcessor
func (*StreamProcessor) OnReasoningDelta ¶ added in v0.26.0
func (r *StreamProcessor) OnReasoningDelta(fn func(delta string)) *StreamProcessor
OnReasoningDelta registers a callback that is called for each incremental reasoning/thinking token.
func (*StreamProcessor) OnStart ¶ added in v0.26.0
func (r *StreamProcessor) OnStart(fn TypedEventHandler[*StreamStartedEvent]) *StreamProcessor
OnStart registers a callback that is called when the StreamEventStarted event arrives, carrying provider metadata (request ToolCallID, model, time-to-first-token).
func (*StreamProcessor) OnTextDelta ¶ added in v0.26.0
func (r *StreamProcessor) OnTextDelta(fn func(delta string)) *StreamProcessor
OnTextDelta registers a callback that is called for each incremental text token. Panics in the callback are recovered and recorded on the StreamResult error.
func (*StreamProcessor) OnToolDelta ¶ added in v0.26.0
func (r *StreamProcessor) OnToolDelta(fn func(d ToolDeltaPart)) *StreamProcessor
OnToolDelta registers a callback that is called for each partial tool-call argument fragment (DeltaTypeTool deltas).
func (*StreamProcessor) Result ¶ added in v0.26.0
func (r *StreamProcessor) Result() Result
Result starts consuming the eventPub (at most once) and returns a channel that yields exactly one *StreamResult when the eventPub is fully processed. The channel is closed after the result is sent.
Calling Result() multiple times is safe — the eventPub is only consumed once and the same channel is returned on subsequent calls.
func (*StreamProcessor) WithAsyncToolDispatch ¶ added in v0.26.0
func (r *StreamProcessor) WithAsyncToolDispatch() *StreamProcessor
WithAsyncToolDispatch switches tool handler dispatch to concurrent mode: all tool calls emitted in a single response are executed in parallel, one goroutine per call. Results are collected in emission order before the eventPub is considered complete.
func (*StreamProcessor) WithToolDispatcher ¶ added in v0.26.0
func (r *StreamProcessor) WithToolDispatcher(d tool.DispatcherType) *StreamProcessor
WithToolDispatcher sets the tool dispatcher explicitly.
type StreamRequest ¶ added in v0.23.0
type StreamRequest = Request
type StreamStartedEvent ¶ added in v0.26.0
type StreamStartedEvent struct {
RequestID string `json:"request_id,omitempty"`
// Model is the model identifier returned by the upstream API in its response.
// e.g., "claude-haiku-4-5-20251001". May be empty if the API doesn't echo the model back.
Model string `json:"model,omitempty"`
}
func (StreamStartedEvent) Type ¶ added in v0.26.0
func (e StreamStartedEvent) Type() EventType
type SystemMessage ¶ added in v0.26.0
type SystemMessage interface {
TextMessage
// contains filtered or unexported methods
}
func System ¶ added in v0.26.0
func System(content string, opts ...MessageOpt) SystemMessage
type TextMessage ¶ added in v0.26.0
type TokenCount ¶ added in v0.24.0
type TokenCount struct {
// InputTokens is the total estimated input token count:
// all messages + all tool definitions + any provider-specific overhead.
InputTokens int
// PerMessage contains the token count for each entry in TokenCountRequest.Messages,
// in the same index order. Does not include tool definitions or overhead.
// len(PerMessage) == len(TokenCountRequest.Messages) is guaranteed.
PerMessage []int
// Role breakdowns — derived from PerMessage, provided for convenience.
// SystemTokens + UserTokens + AssistantTokens + ToolResultTokens == sum(PerMessage).
SystemTokens int // sum of PerMessage for all RoleSystem messages
UserTokens int // sum of PerMessage for all RoleUser messages
AssistantTokens int // sum of PerMessage for all RoleAssistant messages
ToolResultTokens int // sum of PerMessage for all RoleTool (ToolResult) messages
// ToolsTokens is the total raw token count for all tool definitions combined,
// derived purely from the JSON-serialised tool schemas.
// sum(values(PerTool)) == ToolsTokens.
ToolsTokens int
// PerTool maps each tool definition's ToolName to its individual raw token count.
// sum(values(PerTool)) == ToolsTokens.
PerTool map[string]int
// OverheadTokens is the number of tokens the provider adds on top of the
// caller-supplied content — tokens the caller did not write and cannot
// control. Examples:
// - Anthropic: hidden tool-use system preamble + per-tool framing (~330+126+85×n)
// - Claude OAuth: injected billing/identity system blocks (~45 tokens)
//
// Zero for providers that add no hidden content (OpenAI, OpenRouter, Ollama).
//
// The invariant: InputTokens == sum(PerMessage) + ToolsTokens + OverheadTokens
// (plus any per-message overhead, e.g. +4/msg for OpenAI).
OverheadTokens int
}
TokenCount holds the result of a CountTokens call.
Invariants:
- len(PerMessage) == len(TokenCountRequest.Messages)
- SystemTokens + UserTokens + AssistantTokens + ToolResultTokens == sum(PerMessage)
- sum(values(PerTool)) == ToolsTokens (raw tool JSON counts only, no overhead)
- InputTokens == sum(PerMessage) + ToolsTokens + OverheadTokens + provider-specific per-message overhead
type TokenCountRequest ¶ added in v0.24.0
type TokenCountRequest struct {
// Model is the model ToolCallID to count tokens for (e.g. "gpt-4o", "claude-sonnet-4-5").
// Required — returns an error if empty.
Model string
Messages Messages
Tools []tool.Definition
}
TokenCountRequest is the input to TokenCounter.CountTokens. Model is required — providers use it to select the correct BPE encoding.
type TokenCounter ¶ added in v0.24.0
type TokenCounter interface {
CountTokens(ctx context.Context, req TokenCountRequest) (*TokenCount, error)
}
TokenCounter is an optional interface providers may implement to estimate token usage before sending a request.
All implementations in this codebase are local/offline — no network call is made. Counts should be treated as estimates; accuracy varies by provider:
- OpenAI: exact (tiktoken matches the API tokenizer)
- OpenRouter: approximate (tiktoken, best-effort model prefix matching)
- Anthropic: approximate (cl100k_base, ±5-10% for English; tokenizer not public)
- Bedrock: approximate (same as Anthropic)
- Ollama: approximate (cl100k_base; no public tokenize endpoint)
Usage:
if tc, ok := provider.(llm.TokenCounter); ok {
count, err := tc.CountTokens(ctx, llm.TokenCountRequest{
Model: "gpt-4o",
Messages: messages,
Tools: tools,
})
if err == nil && count.InputTokens > maxTokens {
return fmt.Errorf("request too large: %d tokens (limit %d)", count.InputTokens, maxTokens)
}
}
type ToolCallEvent ¶ added in v0.26.0
func (ToolCallEvent) Type ¶ added in v0.26.0
func (e ToolCallEvent) Type() EventType
type ToolChoice ¶ added in v0.6.0
type ToolChoice interface {
// contains filtered or unexported methods
}
ToolChoice controls whether and which tools the model should call.
type ToolChoiceAuto ¶ added in v0.6.0
type ToolChoiceAuto struct{}
ToolChoiceAuto lets the model decide whether to call tools. This is the default behavior when ToolChoice is nil.
type ToolChoiceNone ¶ added in v0.6.0
type ToolChoiceNone struct{}
ToolChoiceNone prevents the model from calling any tools.
type ToolChoiceRequired ¶ added in v0.6.0
type ToolChoiceRequired struct{}
ToolChoiceRequired forces the model to call at least one tool.
type ToolChoiceTool ¶ added in v0.6.0
type ToolChoiceTool struct {
Name string
}
ToolChoiceTool forces the model to call a specific tool by name.
type ToolDeltaPart ¶ added in v0.26.0
type ToolMessage ¶ added in v0.26.0
type ToolMessage interface {
Message
ToolCallID() string
ToolOutput() string
IsError() bool
// contains filtered or unexported methods
}
func Tool ¶ added in v0.26.0
func Tool(toolCallID, output string) ToolMessage
func ToolErr ¶ added in v0.26.0
func ToolErr(toolCallID, output string) ToolMessage
func ToolResult ¶ added in v0.26.0
func ToolResult(tr tool.Result) ToolMessage
type TypedEventHandler ¶ added in v0.26.0
type TypedEventHandler[T any] func(e T)
func (TypedEventHandler[T]) Handle ¶ added in v0.26.0
func (h TypedEventHandler[T]) Handle(e Event)
type Usage ¶
type Usage struct {
// InputTokens is the total number of input tokens processed, including
// tokens served from cache (CacheReadTokens) and tokens written to cache
// (CacheWriteTokens). Callers can use this as the single "how many input
// tokens did this request consume" figure.
InputTokens int `json:"input_tokens"`
// OutputTokens is the number of tokens generated in the response.
OutputTokens int `json:"output_tokens"`
// TotalTokens is InputTokens + OutputTokens.
TotalTokens int `json:"total_tokens"`
// Cost is the total request cost in USD.
// For Anthropic, Bedrock, and OpenAI this is locally calculated from
// provider pricing tables and equals the sum of the breakdown fields below.
// For OpenRouter this is API-reported by the proxy (already includes cache pricing).
Cost float64 `json:"cost"`
// Detailed token breakdown (provider-specific, may be zero).
CacheReadTokens int `json:"cache_read_tokens,omitempty"` // Input tokens served from an existing cache entry (all providers).
CacheWriteTokens int `json:"cache_write_tokens,omitempty"` // Input tokens written to a new cache entry (Anthropic, Bedrock).
ReasoningTokens int `json:"reasoning_tokens,omitempty"` // ToolOutput tokens consumed by model reasoning (e.g. extended thinking).
// Granular cost breakdown in USD (zero if provider/model pricing is unknown).
// Sum of InputCost + CacheReadCost + CacheWriteCost + OutputCost == Cost.
// Not populated for OpenRouter (API-reported cost is used instead).
//
// InputCost covers only the non-cached, non-write portion:
// InputTokens - CacheReadTokens - CacheWriteTokens tokens at the regular input rate.
InputCost float64 `json:"input_cost,omitempty"` // Cost of non-cached, non-write input tokens.
CacheReadCost float64 `json:"cache_read_cost,omitempty"` // Cost of cache-read tokens.
CacheWriteCost float64 `json:"cache_write_cost,omitempty"` // Cost of cache-write tokens.
OutputCost float64 `json:"output_cost,omitempty"` // Cost of output tokens.
}
Usage holds token counts and cost from a provider response.
type UsageUpdatedEvent ¶ added in v0.26.0
type UsageUpdatedEvent struct {
Usage Usage `json:"usage"`
}
func (UsageUpdatedEvent) Type ¶ added in v0.26.0
func (e UsageUpdatedEvent) Type() EventType
type UserMessage ¶ added in v0.26.0
type UserMessage interface {
TextMessage
// contains filtered or unexported methods
}
func User ¶ added in v0.26.0
func User(content string, opts ...MessageOpt) UserMessage
Source Files
¶
Directories
¶
| Path | Synopsis |
|---|---|
|
cmd
|
|
|
llmcli
command
llmcli is a command-line tool for testing LLM providers.
|
llmcli is a command-line tool for testing LLM providers. |
|
llmcli/cmds
Package cmds provides CLI commands for llmcli.
|
Package cmds provides CLI commands for llmcli. |
|
llmcli/store
Package store provides token storage implementations.
|
Package store provides token storage implementations. |
|
Package llmtest provides helpers for testing code that consumes llm.Stream channels, following the convention of packages like net/http/httptest.
|
Package llmtest provides helpers for testing code that consumes llm.Stream channels, following the convention of packages like net/http/httptest. |
|
Package modeldb provides access to the models.dev model database.
|
Package modeldb provides access to the models.dev model database. |
|
provider
|
|
|
anthropic/claude
Package claude provides an Anthropic provider using Claude OAuth tokens.
|
Package claude provides an Anthropic provider using Claude OAuth tokens. |
|
auto
Package auto provides zero-config multi-provider setup for LLM providers.
|
Package auto provides zero-config multi-provider setup for LLM providers. |
|
minimax
Package minimax provides a MiniMax LLM provider using the Anthropic-compatible API.
|
Package minimax provides a MiniMax LLM provider using the Anthropic-compatible API. |
|
Package tokencount provides a shared offline tiktoken wrapper for LLM token estimation.
|
Package tokencount provides a shared offline tiktoken wrapper for LLM token estimation. |