llm

package

v0.0.0-...-75ffc13 Latest Latest Go to latest Published: May 29, 2026 License: MIT Imports: 21 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/nijaru/ion

Links

Open Source Insights

Documentation ¶

Overview ¶

Package llm defines Canto's provider-agnostic model interface.

Request, Response, Message, Call, and Spec are the normalized types shared by the rest of the framework. Provider is the core backend contract for text generation, streaming, token counting, pricing, and capability reporting.

Registry, SmartResolver, and FailoverProvider help compose multiple providers, while concrete implementations live under llm/providers.

Index ¶

Variables
func ClearRegistry()
func IsRateLimit(err error) bool
func IsTransientTransportError(err error) bool
func ParsePartialJSON(input string) []byte
func RecordUsage(ctx context.Context, providerID, modelID string, usage Usage)
func RegisterModel(def ModelDef)
func TransformRequestForCapabilities(req *Request, caps Capabilities)
func ValidateRequest(req *Request) error
func WithRetryObserver(ctx context.Context, observer func(RetryEvent)) context.Context
type CacheControl
type Call
- func (c *Call) PartialArguments() (map[string]any, error)
type Capabilities
- func DefaultCapabilities() Capabilities
- func ResolveCapabilities(model string) Capabilities
- func (c Capabilities) ReasoningCaps() ReasoningCapabilities
- func (c Capabilities) SupportsReasoningControl(value string) bool
- func (c Capabilities) SupportsReasoningEffort(effort string) bool
- func (c Capabilities) SupportsReasoningToggle(value string) bool
- func (c Capabilities) SupportsThinking() bool
- func (c Capabilities) SupportsThinkingBudget(tokens int) bool
type Chunk
type Classification
type Classifier
type ContentPart
- func ImagePart(mimeType, data string) ContentPart
- func ImageURLPart(mimeType, url string) ContentPart
- func TextPart(text string) ContentPart
type ContentPartType
type Embedder
type FailoverProvider
- func NewFailoverProvider(providers ...Provider) *FailoverProvider
- func (p *FailoverProvider) Capabilities(model string) Capabilities
- func (p *FailoverProvider) Cost(ctx context.Context, model string, usage Usage) float64
- func (p *FailoverProvider) CountTokens(ctx context.Context, model string, messages []Message) (int, error)
- func (p *FailoverProvider) Generate(ctx context.Context, req *Request) (*Response, error)
- func (p *FailoverProvider) ID() string
- func (p *FailoverProvider) IsContextOverflow(err error) bool
- func (p *FailoverProvider) IsTransient(err error) bool
- func (p *FailoverProvider) Models(ctx context.Context) ([]Model, error)
- func (p *FailoverProvider) Stream(ctx context.Context, req *Request) (Stream, error)
type FauxProvider
- func NewFauxProvider(id string, steps ...FauxStep) *FauxProvider
- func (p *FauxProvider) Calls() []*Request
- func (p *FauxProvider) Capabilities(_ string) Capabilities
- func (p *FauxProvider) Cost(_ context.Context, _ string, usage Usage) float64
- func (p *FauxProvider) CountTokens(_ context.Context, _ string, messages []Message) (int, error)
- func (p *FauxProvider) Generate(_ context.Context, req *Request) (*Response, error)
- func (p *FauxProvider) ID() string
- func (p *FauxProvider) IsContextOverflow(err error) bool
- func (p *FauxProvider) IsTransient(err error) bool
- func (p *FauxProvider) Models(_ context.Context) ([]Model, error)
- func (p *FauxProvider) Remaining() int
- func (p *FauxProvider) Stream(_ context.Context, req *Request) (Stream, error)
type FauxStep
type FauxStream
- func NewFauxStream(chunks ...Chunk) *FauxStream
- func (s *FauxStream) Close() error
- func (s *FauxStream) Err() error
- func (s *FauxStream) Next() (*Chunk, bool)
type Message
- func TextMessage(role Role, text string) Message
- func (m Message) HasAssistantPayload() bool
- func (m Message) HasTextContent() bool
- func (m Message) TextContent() string
type Model
type ModelDef
type ModelPreset
type Prompt
- func NewPrompt(messages ...Message) Prompt
- func TextPrompt(text string) Prompt
- func (p Prompt) Clone() Prompt
type Provider
type ProviderCompat
- func DefaultProviderCompat() ProviderCompat
- func DetectCompat(provider, baseURL string) ProviderCompat
- func MergeCompat(detected, override ProviderCompat) ProviderCompat
type ProviderConfig
type ReasoningCapabilities
type ReasoningKind
type Registry
- func NewRegistry() *Registry
- func (r *Registry) Clear()
- func (r *Registry) Register(def ModelDef)
- func (r *Registry) Resolve(modelID string) Capabilities
type Request
- func PrepareRequestForCapabilities(req *Request, caps Capabilities) (*Request, error)
- func (r *Request) AppendMessage(msg Message)
- func (r *Request) Clone() *Request
- func (r *Request) InsertAfterCachePrefix(msg Message)
- func (r *Request) InsertMessage(index int, msg Message)
- func (r *Request) InsertPrefixMessage(index int, msg Message)
- func (r *Request) PrependMessage(msg Message)
type Response
- func GenerateFromStream(s Stream) (*Response, error)
type ResponseFormat
type ResponseFormatType
type RetryConfig
- func DefaultRetryConfig() RetryConfig
type RetryEvent
type RetryExhaustedError
- func (e *RetryExhaustedError) Error() string
- func (e *RetryExhaustedError) Unwrap() error
type RetryProvider
- func NewRetryProvider(p Provider) *RetryProvider
- func (r *RetryProvider) Generate(ctx context.Context, req *Request) (*Response, error)
- func (r *RetryProvider) IsTransient(err error) bool
- func (r *RetryProvider) Stream(ctx context.Context, req *Request) (Stream, error)
type Role
type SmartResolver
- func NewSmartResolver(strategy Strategy, providers ...Provider) *SmartResolver
- func (r *SmartResolver) Capabilities(model string) Capabilities
- func (r *SmartResolver) Cost(ctx context.Context, model string, usage Usage) float64
- func (r *SmartResolver) CountTokens(ctx context.Context, model string, messages []Message) (int, error)
- func (r *SmartResolver) Generate(ctx context.Context, req *Request) (*Response, error)
- func (r *SmartResolver) ID() string
- func (r *SmartResolver) IsContextOverflow(err error) bool
- func (r *SmartResolver) IsTransient(err error) bool
- func (r *SmartResolver) Models(ctx context.Context) ([]Model, error)
- func (r *SmartResolver) Stream(ctx context.Context, req *Request) (Stream, error)
type Spec
type StandardClassifier
- func NewStandardClassifier(p Provider, model string, systemPrompt string) *StandardClassifier
- func (c *StandardClassifier) Classify(ctx context.Context, input string, labels []string) (*Classification, error)
type Strategy
type Stream
type StreamAccumulator
- func (a *StreamAccumulator) Add(chunk *Chunk)
- func (a *StreamAccumulator) Response() Response
type ThinkingBlock
type ThinkingFormat
type Usage

Constants ¶

This section is empty.

Variables ¶

View Source

var DefaultRegistry = NewRegistry()

DefaultRegistry is the framework-wide capability registry.

Functions ¶

func ClearRegistry ¶

func ClearRegistry()

ClearRegistry clears all definitions from the global registry.

func IsRateLimit ¶

func IsRateLimit(err error) bool

IsRateLimit returns true if the error is a rate limit error (429).

func IsTransientTransportError ¶

func IsTransientTransportError(err error) bool

IsTransientTransportError reports whether err looks like a retryable network/transport failure rather than a provider-declared terminal error.

func ParsePartialJSON ¶

func ParsePartialJSON(input string) []byte

ParsePartialJSON attempts to repair a truncated JSON string by closing strings, brackets, and braces in the correct order. It also appends missing colons and null values for incomplete key-value pairs.

func RecordUsage ¶

func RecordUsage(ctx context.Context, providerID, modelID string, usage Usage)

RecordUsage emits OpenTelemetry metrics for a completed LLM call.

func RegisterModel ¶

func RegisterModel(def ModelDef)

RegisterModel registers a model capability definition globally.

func TransformRequestForCapabilities ¶

func TransformRequestForCapabilities(req *Request, caps Capabilities)

TransformRequestForCapabilities adapts a unified request to a model's capability constraints while preserving transcript continuity when sessions move across providers.

func ValidateRequest ¶

func ValidateRequest(req *Request) error

ValidateRequest checks provider-facing invariants for unified LLM requests.

func WithRetryObserver ¶

func WithRetryObserver(ctx context.Context, observer func(RetryEvent)) context.Context

WithRetryObserver returns a context that receives retry notifications from RetryProvider calls made with that context.

Types ¶

type CacheControl ¶

type CacheControl struct {
	Type string `json:"type"` // e.g. "ephemeral"
}

CacheControl defines the caching behavior for a block of content.

type Call ¶

type Call struct {
	ID       string `json:"id"`
	Type     string `json:"type"` // e.g., "function"
	Function struct {
		Name      string `json:"name"`
		Arguments string `json:"arguments"` // JSON string
	} `json:"function"`
}

Call represents a request from the LLM to call a tool.

func (*Call) PartialArguments ¶

func (c *Call) PartialArguments() (map[string]any, error)

PartialArguments parses the incrementally streaming arguments string into a map, repairing any truncated JSON structure. This is useful for rendering tool calls in a UI before they are fully received.

type Capabilities ¶

type Capabilities struct {
	// Streaming indicates the model supports token-by-token streaming.
	Streaming bool
	// Tools indicates the model supports tool/function calling.
	Tools bool
	// Temperature indicates the model accepts a temperature parameter.
	// Models with internal fixed-temperature reasoning should set this to false.
	Temperature bool
	// SystemRole is the role to use when passing system-level instructions.
	// RoleSystem (default) passes them through unchanged.
	// RoleUser means the model has no system role; Capabilities injects
	// system content as user messages with an "Instructions:" prefix.
	// RoleDeveloper means the model accepts a privileged instruction channel
	// distinct from the assistant conversation.
	SystemRole Role
	// Reasoning describes typed reasoning controls accepted by the model.
	Reasoning ReasoningCapabilities
}

Capabilities describes what features a model supports. The pipeline uses these to adapt requests before they reach the provider.

func DefaultCapabilities ¶

func DefaultCapabilities() Capabilities

DefaultCapabilities returns full capabilities — suitable for most chat models.

func ResolveCapabilities ¶

func ResolveCapabilities(model string) Capabilities

ResolveCapabilities resolves model capabilities globally.

func (Capabilities) ReasoningCaps ¶

func (c Capabilities) ReasoningCaps() ReasoningCapabilities

func (Capabilities) SupportsReasoningControl ¶

func (c Capabilities) SupportsReasoningControl(value string) bool

func (Capabilities) SupportsReasoningEffort ¶

func (c Capabilities) SupportsReasoningEffort(effort string) bool

func (Capabilities) SupportsReasoningToggle ¶

func (c Capabilities) SupportsReasoningToggle(value string) bool

func (Capabilities) SupportsThinking ¶

func (c Capabilities) SupportsThinking() bool

func (Capabilities) SupportsThinkingBudget ¶

func (c Capabilities) SupportsThinkingBudget(tokens int) bool

type Chunk ¶

type Chunk struct {
	Content        string          `json:"content"`
	Reasoning      string          `json:"reasoning,omitempty"`
	ThinkingBlocks []ThinkingBlock `json:"thinking_blocks,omitempty"`
	Calls          []Call          `json:"tool_calls,omitempty"`
	// Usage is cumulative when present. Providers may emit multiple usage chunks;
	// consumers should keep the latest value rather than summing chunks.
	Usage *Usage `json:"usage,omitempty"`
}

Chunk represents a single piece of a streaming response.

type Classification ¶

type Classification struct {
	// Label is the predicted class (e.g. "allow", "deny", "escalate").
	Label string
	// Reason is the model's justification for the label.
	Reason string
	// Usage is the token consumption for this judgment.
	Usage Usage
	// Metadata contains provider-specific or prompt-defined extra fields.
	Metadata map[string]any
}

Classification describes the result of a discrete model judgment.

type Classifier ¶

type Classifier interface {
	// Classify executes a judgment against the given input.
	// labels is the set of valid output categories.
	Classify(ctx context.Context, input string, labels []string) (*Classification, error)
}

Classifier describes the interface for an LLM judgment task. It is intended for small, fast models acting as evaluators or routers.

type ContentPart ¶

type ContentPart struct {
	Type     ContentPartType `json:"type"`
	Text     string          `json:"text,omitzero"`
	MIMEType string          `json:"mime_type,omitzero"`
	Data     string          `json:"data,omitzero"`
	URL      string          `json:"url,omitzero"`
}

ContentPart represents structured model-visible message content.

func ImagePart ¶

func ImagePart(mimeType, data string) ContentPart

ImagePart creates an image content part backed by base64-encoded data.

func ImageURLPart ¶

func ImageURLPart(mimeType, url string) ContentPart

ImageURLPart creates an image content part backed by a provider-readable URL.

func TextPart ¶

func TextPart(text string) ContentPart

TextPart creates a text content part.

type ContentPartType ¶

type ContentPartType string

ContentPartType identifies one typed part of a model-visible message.

const (
	ContentPartText ContentPartType = "text"
	// ContentPartImage represents image input encoded as base64 data or a
	// provider-readable URL. Providers that do not support image parts should
	// fall back to the surrounding text content.
	ContentPartImage ContentPartType = "image"
)

type Embedder ¶

type Embedder interface {
	// EmbedContent converts a text string into a high-dimensional vector representation.
	EmbedContent(ctx context.Context, text string) ([]float32, error)
}

Embedder defines the interface for creating vector embeddings from text content.

type FailoverProvider ¶

type FailoverProvider struct {
	// contains filtered or unexported fields
}

FailoverProvider tries a list of providers in sequence until one succeeds.

func NewFailoverProvider ¶

func NewFailoverProvider(providers ...Provider) *FailoverProvider

NewFailoverProvider creates a new failover provider.

func (*FailoverProvider) Capabilities ¶

func (p *FailoverProvider) Capabilities(model string) Capabilities

func (*FailoverProvider) Cost ¶

func (p *FailoverProvider) Cost(ctx context.Context, model string, usage Usage) float64

func (*FailoverProvider) CountTokens ¶

func (p *FailoverProvider) CountTokens(
	ctx context.Context,
	model string,
	messages []Message,
) (int, error)

func (*FailoverProvider) Generate ¶

func (p *FailoverProvider) Generate(ctx context.Context, req *Request) (*Response, error)

func (*FailoverProvider) ID ¶

func (p *FailoverProvider) ID() string

func (*FailoverProvider) IsContextOverflow ¶

func (p *FailoverProvider) IsContextOverflow(err error) bool

IsContextOverflow returns true if any underlying provider reports a context overflow error.

func (*FailoverProvider) IsTransient ¶

func (p *FailoverProvider) IsTransient(err error) bool

IsTransient returns true if any underlying provider reports a transient error.

func (*FailoverProvider) Models ¶

func (p *FailoverProvider) Models(ctx context.Context) ([]Model, error)

func (*FailoverProvider) Stream ¶

func (p *FailoverProvider) Stream(ctx context.Context, req *Request) (Stream, error)

type FauxProvider ¶

type FauxProvider struct {
	IsContextOverflowFn func(error) bool
	IsTransientFn       func(error) bool
	// contains filtered or unexported fields
}

FauxProvider is a deterministic in-memory Provider for examples and tests. It consumes scripted steps in order and never performs network I/O.

func NewFauxProvider ¶

func NewFauxProvider(id string, steps ...FauxStep) *FauxProvider

NewFauxProvider creates a deterministic provider with scripted responses.

func (*FauxProvider) Calls ¶

func (p *FauxProvider) Calls() []*Request

Calls returns requests processed by the provider.

func (*FauxProvider) Capabilities ¶

func (p *FauxProvider) Capabilities(_ string) Capabilities

func (*FauxProvider) Cost ¶

func (p *FauxProvider) Cost(_ context.Context, _ string, usage Usage) float64

func (*FauxProvider) CountTokens ¶

func (p *FauxProvider) CountTokens(_ context.Context, _ string, messages []Message) (int, error)

func (*FauxProvider) Generate ¶

func (p *FauxProvider) Generate(_ context.Context, req *Request) (*Response, error)

func (*FauxProvider) ID ¶

func (p *FauxProvider) ID() string

func (*FauxProvider) IsContextOverflow ¶

func (p *FauxProvider) IsContextOverflow(err error) bool

func (*FauxProvider) IsTransient ¶

func (p *FauxProvider) IsTransient(err error) bool

func (*FauxProvider) Models ¶

func (p *FauxProvider) Models(_ context.Context) ([]Model, error)

func (*FauxProvider) Remaining ¶

func (p *FauxProvider) Remaining() int

Remaining returns the number of unconsumed scripted steps.

func (*FauxProvider) Stream ¶

func (p *FauxProvider) Stream(_ context.Context, req *Request) (Stream, error)

type FauxStep ¶

type FauxStep struct {
	Content        string
	Reasoning      string
	ThinkingBlocks []ThinkingBlock
	Calls          []Call
	Usage          Usage
	Err            error
	// Chunks, if set, causes Stream to return these chunks instead of a single
	// synthesized chunk from Content and Calls.
	Chunks []Chunk
}

FauxStep is one scripted response returned by FauxProvider.

type FauxStream ¶

type FauxStream struct {
	// contains filtered or unexported fields
}

FauxStream is a deterministic Stream over scripted chunks.

func NewFauxStream ¶

func NewFauxStream(chunks ...Chunk) *FauxStream

func (*FauxStream) Close ¶

func (s *FauxStream) Close() error

func (*FauxStream) Err ¶

func (s *FauxStream) Err() error

func (*FauxStream) Next ¶

func (s *FauxStream) Next() (*Chunk, bool)

type Message ¶

type Message struct {
	Role           Role            `json:"role"`
	Content        string          `json:"content"`
	Parts          []ContentPart   `json:"parts,omitzero"`
	Reasoning      string          `json:"reasoning,omitzero"`
	ThinkingBlocks []ThinkingBlock `json:"thinking_blocks,omitzero"`
	Name           string          `json:"name,omitzero"` // For tool output or identifying the assistant
	ToolID         string          `json:"tool_id,omitzero"`
	Calls          []Call          `json:"tool_calls,omitzero"`
	CacheControl   *CacheControl   `json:"cache_control,omitzero"`
}

Message represents a single message in the LLM conversation.

func TextMessage ¶

func TextMessage(role Role, text string) Message

TextMessage creates a message whose text is also represented as a structured content part.

func (Message) HasAssistantPayload ¶

func (m Message) HasAssistantPayload() bool

HasAssistantPayload reports whether an assistant message carries useful model-visible payload.

func (Message) HasTextContent ¶

func (m Message) HasTextContent() bool

HasTextContent reports whether the message has non-empty visible text.

func (Message) TextContent ¶

func (m Message) TextContent() string

TextContent returns provider-visible text for adapters that do not yet expose native content-part support.

type Model ¶

type Model struct {
	ID            string        `json:"id"                       toml:"id"`
	ContextWindow int           `json:"context_window,omitzero"  toml:"context_window,omitzero"`
	CostPer1MIn   float64       `json:"cost_per_1m_in,omitzero"  toml:"cost_per_1m_in,omitzero"`
	CostPer1MOut  float64       `json:"cost_per_1m_out,omitzero" toml:"cost_per_1m_out,omitzero"`
	Capabilities  *Capabilities `json:"capabilities,omitzero"    toml:"capabilities,omitzero"`
}

Model describes an LLM model exposed by a provider.

type ModelDef ¶

type ModelDef struct {
	Pattern      string        `json:"pattern"                toml:"pattern"` // glob pattern (e.g. "deepseek-*") or exact name
	Preset       ModelPreset   `json:"preset,omitempty"       toml:"preset,omitempty"`
	Capabilities *Capabilities `json:"capabilities,omitempty" toml:"capabilities,omitempty"`
}

ModelDef represents a model capability mapping definition.

type ModelPreset ¶

type ModelPreset string

ModelPreset defines standard capability profiles.

const (
	PresetChat            ModelPreset = "chat"
	PresetReasoning       ModelPreset = "reasoning"
	PresetOpenAIReasoning ModelPreset = "openai-reasoning"
)

type Prompt ¶

type Prompt struct {
	Messages []Message `json:"messages"`
}

Prompt is typed host input for one model turn.

func NewPrompt ¶

func NewPrompt(messages ...Message) Prompt

NewPrompt creates typed turn input from one or more messages.

func TextPrompt ¶

func TextPrompt(text string) Prompt

TextPrompt creates a one-message user prompt.

func (Prompt) Clone ¶

func (p Prompt) Clone() Prompt

Clone returns a deep copy of the prompt.

type Provider ¶

type Provider interface {
	// ID returns the unique identifier for this provider.
	ID() string

	// Generate executes a non-streaming completion request. Providers receive a
	// neutral request draft and should prepare a provider-specific copy with
	// PrepareRequestForCapabilities before converting it to wire format.
	Generate(ctx context.Context, req *Request) (*Response, error)

	// Stream executes a streaming completion request. Providers receive a neutral
	// request draft and should prepare a provider-specific copy with
	// PrepareRequestForCapabilities before converting it to wire format.
	Stream(ctx context.Context, req *Request) (Stream, error)

	// Models returns the list of models supported by this provider.
	Models(ctx context.Context) ([]Model, error)

	// CountTokens returns the number of tokens in the given messages for a specific model.
	CountTokens(ctx context.Context, model string, messages []Message) (int, error)

	// Cost calculates the cost in USD for the given usage on a specific model.
	Cost(ctx context.Context, model string, usage Usage) float64

	// Capabilities returns the feature set supported by the given model.
	Capabilities(model string) Capabilities

	// IsTransient returns true if the given error is retryable (e.g. 429, 503).
	IsTransient(err error) bool

	// IsContextOverflow returns true if the error indicates the model's context
	// window was exceeded (e.g. context_length_exceeded, 400 bad request with
	// overflow message).
	IsContextOverflow(err error) bool
}

Provider defines the interface for an LLM backend.

type ProviderCompat ¶

type ProviderCompat struct {
	// ThinkingFormat controls how reasoning parameters are sent.
	ThinkingFormat ThinkingFormat

	// SupportsReasoningEffort indicates whether the provider supports reasoning_effort.
	SupportsReasoningEffort bool

	// MaxTokensField is the JSON field name for max tokens.
	// "max_tokens" or "max_completion_tokens".
	MaxTokensField string

	// SupportsStore indicates whether the provider supports the store field.
	SupportsStore bool

	// SupportsDeveloperRole indicates whether the provider supports the developer role.
	SupportsDeveloperRole bool

	// SupportsStrictMode indicates whether the provider supports strict mode in tool definitions.
	SupportsStrictMode bool

	// RequiresToolResultName indicates whether tool results need the name field.
	RequiresToolResultName bool

	// RequiresAssistantAfterToolResult indicates whether an assistant message is needed after tool results.
	RequiresAssistantAfterToolResult bool

	// RequiresThinkingAsText indicates whether thinking blocks must be converted to text blocks.
	RequiresThinkingAsText bool

	// RequiresReasoningContentOnAssistantMessages indicates whether assistant messages need reasoning_content field.
	RequiresReasoningContentOnAssistantMessages bool
}

ProviderCompat describes provider-specific compatibility settings. These control how requests are formatted for different providers.

func DefaultProviderCompat ¶

func DefaultProviderCompat() ProviderCompat

DefaultProviderCompat returns the default compatibility settings for OpenAI-compatible providers.

func DetectCompat ¶

func DetectCompat(provider, baseURL string) ProviderCompat

DetectCompat auto-detects compatibility settings from provider name and base URL. This matches Pi's detectCompat() logic.

func MergeCompat ¶

func MergeCompat(detected, override ProviderCompat) ProviderCompat

MergeCompat merges explicit model-level overrides with detected compat settings. Model-level overrides take precedence over detected settings.

type ProviderConfig ¶

type ProviderConfig struct {
	ID             string
	APIKey         string
	APIEndpoint    string
	DefaultHeaders map[string]string
	Models         []Model
}

ProviderConfig captures the shared endpoint/auth/model metadata used by Canto's built-in provider adapters.

type ReasoningCapabilities ¶

type ReasoningCapabilities struct {
	Kind                ReasoningKind
	Efforts             []string
	CanDisable          bool
	BudgetMinTokens     int
	BudgetMaxTokens     int
	BudgetDefaultTokens int
}

type ReasoningKind ¶

type ReasoningKind string

const (
	ReasoningKindNone    ReasoningKind = ""
	ReasoningKindEffort  ReasoningKind = "effort"
	ReasoningKindBudget  ReasoningKind = "budget"
	ReasoningKindBoolean ReasoningKind = "boolean"
)

type Registry ¶

type Registry struct {
	// contains filtered or unexported fields
}

Registry manages thread-safe resolution of model capabilities.

func NewRegistry ¶

func NewRegistry() *Registry

NewRegistry creates a new Model Capability Registry.

func (*Registry) Clear ¶

func (r *Registry) Clear()

Clear clears all registered model definitions.

func (*Registry) Register ¶

func (r *Registry) Register(def ModelDef)

Register registers a new model capability definition.

func (*Registry) Resolve ¶

func (r *Registry) Resolve(modelID string) Capabilities

Resolve resolves capabilities for a given model ID.

type Request ¶

type Request struct {
	Model          string          `json:"model"`
	Messages       []Message       `json:"messages"`
	Tools          []*Spec         `json:"tools,omitzero"`
	Temperature    float64         `json:"temperature"`
	MaxTokens      int             `json:"max_tokens,omitzero"`
	ResponseFormat *ResponseFormat `json:"response_format,omitzero"`
	// CachePrefixMessages is the number of leading messages Canto expects to
	// stay stable across ordinary turn growth. Use Request's message insertion
	// methods when changing Messages so this boundary stays aligned. Provider
	// adapters ignore it; prompt cache helpers use it to place provider-neutral
	// cache markers.
	CachePrefixMessages int `json:"-"`
	// ReasoningEffort controls the depth of internal reasoning for OpenAI o-series
	// models. Accepted values: "low", "medium", "high". Empty means provider default.
	ReasoningEffort string `json:"reasoning_effort,omitzero"`
	// ThinkingBudget, when > 0, enables Anthropic extended thinking with the given
	// token budget (minimum 1024, must be less than MaxTokens).
	ThinkingBudget int `json:"thinking_budget,omitzero"`
}

Request is the unified request sent to any provider.

func PrepareRequestForCapabilities ¶

func PrepareRequestForCapabilities(req *Request, caps Capabilities) (*Request, error)

PrepareRequestForCapabilities returns a provider-ready copy of req adapted to caps. The original request remains neutral and can be prepared again for a different provider or model.

func (*Request) AppendMessage ¶

func (r *Request) AppendMessage(msg Message)

AppendMessage appends msg after the current request.

func (*Request) Clone ¶

func (r *Request) Clone() *Request

Clone returns a deep copy of r's mutable request fields.

func (*Request) InsertAfterCachePrefix ¶

func (r *Request) InsertAfterCachePrefix(msg Message)

InsertAfterCachePrefix inserts msg immediately after the stable cache prefix.

func (*Request) InsertMessage ¶

func (r *Request) InsertMessage(index int, msg Message)

InsertMessage inserts msg at index and keeps CachePrefixMessages aligned. If the insertion is inside the current cache prefix, the prefix grows with it.

func (*Request) InsertPrefixMessage ¶

func (r *Request) InsertPrefixMessage(index int, msg Message)

InsertPrefixMessage inserts msg as part of the stable cache prefix.

func (*Request) PrependMessage ¶

func (r *Request) PrependMessage(msg Message)

PrependMessage inserts msg at the start of the request. If the request already has a stable cache prefix, the new message becomes part of that prefix.

type Response ¶

type Response struct {
	Content        string          `json:"content"`
	Reasoning      string          `json:"reasoning,omitzero"`
	ThinkingBlocks []ThinkingBlock `json:"thinking_blocks,omitzero"`
	Calls          []Call          `json:"tool_calls,omitzero"`
	Usage          Usage           `json:"usage"`
}

Response is the unified response from any provider.

func GenerateFromStream ¶

func GenerateFromStream(s Stream) (*Response, error)

GenerateFromStream collects chunks from a stream and assembles a Response. It is intended for use by Provider implementations to avoid duplicating the complex logic of assembling streaming chunks.

type ResponseFormat ¶

type ResponseFormat struct {
	Type ResponseFormatType `json:"type"`
	// Schema is the JSON Schema definition used when Type is ResponseFormatJSONSchema.
	Schema map[string]any `json:"schema,omitzero"`
	// Name identifies the schema for providers that require a name.
	Name   string `json:"name,omitzero"`
	Strict bool   `json:"strict,omitzero"`
}

ResponseFormat constrains LLM output to structured JSON. Providers that do not support structured outputs ignore this field.

type ResponseFormatType ¶

type ResponseFormatType string

ResponseFormatType controls how the model formats its output.

const (
	// ResponseFormatText is the default unstructured text output.
	ResponseFormatText ResponseFormatType = "text"
	// ResponseFormatJSON constrains output to valid JSON (no schema enforced).
	ResponseFormatJSON ResponseFormatType = "json_object"
	// ResponseFormatJSONSchema constrains output to JSON matching a schema.
	ResponseFormatJSONSchema ResponseFormatType = "json_schema"
)

type RetryConfig ¶

type RetryConfig struct {
	MaxAttempts               int
	MinInterval               time.Duration
	MaxInterval               time.Duration
	Multiplier                float64
	RetryForever              bool
	RetryForeverTransportOnly bool
	OnRetry                   func(RetryEvent)
}

RetryConfig controls the backoff behavior for a RetryProvider.

func DefaultRetryConfig ¶

func DefaultRetryConfig() RetryConfig

DefaultRetryConfig returns a safe default for production LLM usage.

type RetryEvent ¶

type RetryEvent struct {
	Attempt int
	Delay   time.Duration
	Err     error
}

RetryEvent describes a transient provider failure that will be retried.

type RetryExhaustedError ¶

type RetryExhaustedError struct {
	Attempts int
	Err      error
}

RetryExhaustedError marks a transient provider error as terminal after the configured retry policy has already handled it.

func (*RetryExhaustedError) Error ¶

func (e *RetryExhaustedError) Error() string

func (*RetryExhaustedError) Unwrap ¶

func (e *RetryExhaustedError) Unwrap() error

type RetryProvider ¶

type RetryProvider struct {
	Provider
	Config RetryConfig
}

RetryProvider wraps an LLM provider and automatically retries transient errors with exponential backoff.

func NewRetryProvider ¶

func NewRetryProvider(p Provider) *RetryProvider

NewRetryProvider creates a new provider with the default retry policy.

func (*RetryProvider) Generate ¶

func (r *RetryProvider) Generate(ctx context.Context, req *Request) (*Response, error)

func (*RetryProvider) IsTransient ¶

func (r *RetryProvider) IsTransient(err error) bool

func (*RetryProvider) Stream ¶

func (r *RetryProvider) Stream(ctx context.Context, req *Request) (Stream, error)

type Role ¶

type Role string

Role defines the role of a message in the conversation.

const (
	RoleSystem    Role = "system"
	RoleUser      Role = "user"
	RoleAssistant Role = "assistant"
	RoleTool      Role = "tool"
	// RoleDeveloper is a privileged instruction channel accepted by some models.
	// Capabilities converts system messages to this role when
	// Capabilities.SystemRole is RoleDeveloper.
	RoleDeveloper Role = "developer"
)

type SmartResolver ¶

type SmartResolver struct {
	// contains filtered or unexported fields
}

SmartResolver tracks provider health and rotates/fails over among providers.

func NewSmartResolver ¶

func NewSmartResolver(strategy Strategy, providers ...Provider) *SmartResolver

NewSmartResolver creates a new smart resolver.

func (*SmartResolver) Capabilities ¶

func (r *SmartResolver) Capabilities(model string) Capabilities

Capabilities returns the capabilities of the first healthy provider's view of the given model.

func (*SmartResolver) Cost ¶

func (r *SmartResolver) Cost(ctx context.Context, model string, usage Usage) float64

func (*SmartResolver) CountTokens ¶

func (r *SmartResolver) CountTokens(
	ctx context.Context,
	model string,
	messages []Message,
) (int, error)

func (*SmartResolver) Generate ¶

func (r *SmartResolver) Generate(ctx context.Context, req *Request) (*Response, error)

func (*SmartResolver) ID ¶

func (r *SmartResolver) ID() string

func (*SmartResolver) IsContextOverflow ¶

func (r *SmartResolver) IsContextOverflow(err error) bool

IsContextOverflow returns true if any underlying provider reports a context overflow error.

func (*SmartResolver) IsTransient ¶

func (r *SmartResolver) IsTransient(err error) bool

IsTransient returns true if any underlying provider reports a transient error.

func (*SmartResolver) Models ¶

func (r *SmartResolver) Models(ctx context.Context) ([]Model, error)

func (*SmartResolver) Stream ¶

func (r *SmartResolver) Stream(ctx context.Context, req *Request) (Stream, error)

type Spec ¶

type Spec struct {
	Name         string        `json:"name"`
	Description  string        `json:"description"`
	Parameters   any           `json:"parameters"` // JSON Schema
	CacheControl *CacheControl `json:"cache_control,omitzero"`
}

Spec represents a tool that can be called by the LLM.

type StandardClassifier ¶

type StandardClassifier struct {
	Provider Provider
	Model    string
	Prompt   string // System prompt describing the classification task
}

StandardClassifier implements Classifier by wrapping a standard LLM Provider. It uses structured outputs (JSON schema) to ensure deterministic labels.

func NewStandardClassifier ¶

func NewStandardClassifier(p Provider, model string, systemPrompt string) *StandardClassifier

NewStandardClassifier creates a classifier backed by a chat model.

func (*StandardClassifier) Classify ¶

func (c *StandardClassifier) Classify(
	ctx context.Context,
	input string,
	labels []string,
) (*Classification, error)

type Strategy ¶

type Strategy string

Strategy defines how a SmartResolver picks providers.

const (
	StrategyPriority   Strategy = "priority"
	StrategyRoundRobin Strategy = "round-robin"
)

type Stream ¶

type Stream interface {
	// Next returns the next chunk of the response.
	// It returns (nil, false) when the stream is exhausted.
	Next() (*Chunk, bool)
	// Err returns the first error encountered during streaming.
	Err() error
	// Close closes the stream.
	Close() error
}

Stream defines the interface for a streaming LLM response.

type StreamAccumulator ¶

type StreamAccumulator struct {
	// contains filtered or unexported fields
}

StreamAccumulator assembles normalized stream chunks into a provider response.

Provider adapters are responsible for turning provider-specific deltas into cumulative chunks. Tool calls with the same ID replace the previous call state, and Usage keeps the latest cumulative value.

func (*StreamAccumulator) Add ¶

func (a *StreamAccumulator) Add(chunk *Chunk)

func (*StreamAccumulator) Response ¶

func (a *StreamAccumulator) Response() Response

type ThinkingBlock ¶

type ThinkingBlock struct {
	Type      string `json:"type"` // "thinking" or "redacted_thinking"
	Thinking  string `json:"thinking,omitzero"`
	Signature string `json:"signature,omitzero"`
}

ThinkingBlock represents a reasoning block from a provider like Anthropic.

type ThinkingFormat ¶

type ThinkingFormat string

ThinkingFormat controls how reasoning/thinking parameters are sent to providers. Each provider has a different format for controlling reasoning behavior.

const (
	// ThinkingFormatOpenAI uses top-level reasoning_effort field.
	// Used by: OpenAI, Groq, Cerebras, Fireworks, Mistral, xAI
	ThinkingFormatOpenAI ThinkingFormat = "openai"

	// ThinkingFormatOpenRouter uses nested reasoning: { effort: "..." } object.
	// Used by: OpenRouter
	ThinkingFormatOpenRouter ThinkingFormat = "openrouter"

	// ThinkingFormatDeepSeek uses thinking: { type: "enabled"/"disabled" } plus reasoning_effort.
	// Used by: DeepSeek
	ThinkingFormatDeepSeek ThinkingFormat = "deepseek"

	// ThinkingFormatTogether uses reasoning: { enabled: bool } plus reasoning_effort.
	// Used by: Together AI
	ThinkingFormatTogether ThinkingFormat = "together"

	// ThinkingFormatZai uses top-level enable_thinking boolean.
	// Used by: Z.ai
	ThinkingFormatZai ThinkingFormat = "zai"

	// ThinkingFormatQwen uses top-level enable_thinking boolean.
	// Used by: Qwen models
	ThinkingFormatQwen ThinkingFormat = "qwen"

	// ThinkingFormatQwenChatTemplate uses chat_template_kwargs.enable_thinking.
	// Used by: Qwen models via vLLM
	ThinkingFormatQwenChatTemplate ThinkingFormat = "qwen-chat-template"

	// ThinkingFormatNone means no thinking/reasoning support.
	ThinkingFormatNone ThinkingFormat = ""
)

type Usage ¶

type Usage struct {
	InputTokens         int     `json:"input_tokens"`
	OutputTokens        int     `json:"output_tokens"`
	CacheReadTokens     int     `json:"cache_read_tokens,omitempty"`
	CacheCreationTokens int     `json:"cache_creation_tokens,omitempty"`
	TotalTokens         int     `json:"total_tokens"`
	Cost                float64 `json:"cost,omitzero"` // USD
}

Usage tracks token consumption and cost.

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL