provider

package

v1.3.0 Latest Latest Go to latest Published: Jun 23, 2026 License: MIT Imports: 18 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/eshanized/M31A

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
func ApplyReasoningParams(modelID string, body map[string]any) map[string]any
func BuildChatBody(req ChatRequest) map[string]any
func CachedModels(cache *ModelCache) []types.ModelInfo
func EnrichModelInfo(models []types.ModelInfo, providerName string) []types.ModelInfo
func EstimateCost(modelID string, usage types.Usage, cache *ModelCache) float64
func FetchOpenRouterMetadata(ctx context.Context) map[string]ModelMetadata
func GetModel(id string, cache *ModelCache) (*types.ModelInfo, error)
func GetRetryAfter(resp *http.Response) string
func IsContextExceeded(statusCode int, body string) bool
func IsLikelyBrokenOnNvidia(modelID string) bool
func IsNonChatModel(modelID string) bool
func IsRateLimited(resp *http.Response) bool
func LocalMetadataFallback() map[string]ModelMetadata
func ParseModelCapabilities(modelID string, extraReasoningPatterns ...string) types.CapFlags
func ParseSSEChunk(data string, modelID string) (*types.StreamChunk, error)
func ReadBodyLimited(resp *http.Response, maxBytes int64) ([]byte, error)
func SanitizeProviderError(statusCode int, body string, providerName string) string
func SetCommonHeaders(req *http.Request, apiKey string, version string)
func StaleFallback(cache *ModelCache) ([]types.ModelInfo, error)
func UserAgent(version string) string
type BaseClient
- func NewBaseClient(apiKey, baseURL, version string, cacheTTL, cacheStaleTTL time.Duration, ...) BaseClient
- func (b *BaseClient) APIKey() string
- func (b *BaseClient) CachedModels() []types.ModelInfo
- func (b *BaseClient) EstimateCost(modelID string, usage types.Usage) float64
- func (b *BaseClient) EvictModel(id string)
- func (b *BaseClient) GetModel(id string) (*types.ModelInfo, error)
- func (b *BaseClient) MakeIterator(sse *SSEParser, modelID string) *types.StreamIterator
type ChatRequest
type FallbackAfterWait
- func FindFallbackWithRetryAfter(registry *Registry, currentProvider string, retryAfterHeader string) FallbackAfterWait
type FallbackEvent
- func FindFallbackProvider(registry *Registry, currentProvider string) (string, *FallbackEvent, error)
type HTTPStatusError
- func (e *HTTPStatusError) Error() string
- func (e *HTTPStatusError) IsRetryable() bool
type LLMProvider
type ModelCache
- func NewModelCache(ttl time.Duration) *ModelCache
- func NewModelCacheWithStale(ttl time.Duration, staleTTL time.Duration) *ModelCache
- func (c *ModelCache) FetchTime() time.Time
- func (c *ModelCache) Get(id string) (*types.ModelInfo, bool)
- func (c *ModelCache) IsExpired() bool
- func (c *ModelCache) IsRefreshing() bool
- func (c *ModelCache) IsStale() bool
- func (c *ModelCache) Len() int
- func (c *ModelCache) Models() map[string]*types.ModelInfo
- func (c *ModelCache) Refresh(ctx context.Context, ...) ([]types.ModelInfo, error)
- func (c *ModelCache) Remove(id string)
- func (c *ModelCache) Set(models []types.ModelInfo)
type ModelMetadata
type ReasoningConfig
- func GetReasoningConfig(modelID string) (ReasoningConfig, bool)
type Registry
- func NewRegistry() *Registry
- func (r *Registry) Active() string
- func (r *Registry) ActiveProvider() LLMProvider
- func (r *Registry) Get(name string) (LLMProvider, error)
- func (r *Registry) List() []string
- func (r *Registry) ListAll() []string
- func (r *Registry) Register(name string, p LLMProvider) error
- func (r *Registry) RollbackActive(fromName, toName string) bool
- func (r *Registry) SetActive(name string) error
- func (r *Registry) TrySetActive(name string) (LLMProvider, error)
type RetryInfo
- func InspectResponse(resp *http.Response) RetryInfo
type SSEParser
- func NewSSEParser(resp *http.Response) *SSEParser
- func NewSSEParserWithContext(resp *http.Response, ctx context.Context) *SSEParser
- func (p *SSEParser) Close() error
- func (p *SSEParser) Next() (eventType string, data string, err error)
type ToolDefinition

Constants ¶

View Source

const DefaultCacheRefreshInterval = 5 * time.Minute

DefaultCacheRefreshInterval is the default interval for automatic model cache refreshes (5 minutes, matching ModelCacheTTL).

View Source

const DefaultStreamTimeout = 30 * time.Second

DefaultStreamTimeout is the maximum time to wait for a single SSE event. Reduced from 5 minutes to 30 seconds to prevent hanging on dead connections.

Variables ¶

This section is empty.

Functions ¶

func ApplyReasoningParams ¶

func ApplyReasoningParams(modelID string, body map[string]any) map[string]any

func BuildChatBody ¶

func BuildChatBody(req ChatRequest) map[string]any

BuildChatBody constructs the standard chat completion request body. Tool definitions are sent as native provider tools in OpenAI function-calling format when req.Tools is populated. The engine uses native-first tool dispatch with text-based JSON extraction as a fallback for models that do not support native function calling.

func CachedModels ¶

func CachedModels(cache *ModelCache) []types.ModelInfo

CachedModels returns all models from the cache as a slice.

func EnrichModelInfo ¶ added in v1.1.0

func EnrichModelInfo(models []types.ModelInfo, providerName string) []types.ModelInfo

EnrichModelInfo enriches a slice of ModelInfo with context_length and pricing data from OpenRouter (preferred) or the local fallback database.

This is designed for providers like Zen that don't return pricing or context length in their API response. The enrichment only modifies fields that are currently zero/default — it never overwrites non-zero values from the provider.

The providerName parameter controls enrichment behavior:

"zen": enriches both context_length and pricing
Other providers: only enriches context_length (pricing comes from their API)

func EstimateCost ¶

func EstimateCost(modelID string, usage types.Usage, cache *ModelCache) float64

EstimateCost calculates the cost of a usage sample against the model cache.

func FetchOpenRouterMetadata ¶ added in v1.1.0

func FetchOpenRouterMetadata(ctx context.Context) map[string]ModelMetadata

FetchOpenRouterMetadata fetches model metadata from OpenRouter's public API. Results are cached for openRouterMetadataTTL. Uses a dedicated HTTP client with a short timeout to avoid blocking provider operations. Unlike sync.Once, this retries on failure after a cooldown period.

func GetModel ¶

func GetModel(id string, cache *ModelCache) (*types.ModelInfo, error)

GetModel retrieves a model from the cache by ID.

func GetRetryAfter ¶

func GetRetryAfter(resp *http.Response) string

GetRetryAfter extracts the Retry-After header value from an HTTP response.

func IsContextExceeded ¶

func IsContextExceeded(statusCode int, body string) bool

IsContextExceeded checks if an HTTP error indicates context window overflow. Matches HTTP 400 and 413 with context-related patterns to avoid false positives. Uses containsFold for case-insensitive matching without allocating a lowered copy.

func IsLikelyBrokenOnNvidia ¶ added in v1.3.0

func IsLikelyBrokenOnNvidia(modelID string) bool

IsLikelyBrokenOnNvidia reports whether a model ID is known to fail at runtime on NVIDIA NIM despite appearing in the /models endpoint. These models return 404 or empty responses when used with /chat/completions.

func IsNonChatModel ¶ added in v1.2.0

func IsNonChatModel(modelID string) bool

IsNonChatModel reports whether a model ID belongs to a model that does not support chat completions (embeddings, vision-only, safety classifiers, etc.) or is a known deprecated model on NVIDIA NIM.

func IsRateLimited ¶

func IsRateLimited(resp *http.Response) bool

IsRateLimited checks if an HTTP response indicates rate limiting (429).

func LocalMetadataFallback ¶ added in v1.1.0

func LocalMetadataFallback() map[string]ModelMetadata

LocalMetadataFallback returns a hardcoded database of known model metadata. Used as a fallback when OpenRouter API is unreachable. Context windows and pricing are derived from public provider documentation.

func ParseModelCapabilities ¶

func ParseModelCapabilities(modelID string, extraReasoningPatterns ...string) types.CapFlags

ParseModelCapabilities infers capability flags from the model ID using heuristics. extraReasoningPatterns are additional patterns to check for reasoning detection (e.g., Zen uses "-r1" which OpenRouter does not). Tools capability defaults to false and is only set true for known tool-capable model families.

func ParseSSEChunk ¶

func ParseSSEChunk(data string, modelID string) (*types.StreamChunk, error)

func ReadBodyLimited ¶

func ReadBodyLimited(resp *http.Response, maxBytes int64) ([]byte, error)

ReadBodyLimited reads an HTTP response body up to maxBytes, preventing OOM from unbounded responses. Returns the body bytes and any error.

func SanitizeProviderError ¶

func SanitizeProviderError(statusCode int, body string, providerName string) string

SanitizeProviderError maps HTTP status codes to friendly messages and truncates/strips the response body to prevent raw HTML/JSON leaking to users. providerName controls minor behavioral differences: Zen appends body text for 401/502 errors while OpenRouter does not.

func SetCommonHeaders ¶

func SetCommonHeaders(req *http.Request, apiKey string, version string)

SetCommonHeaders sets Authorization and User-Agent headers on an HTTP request.

func StaleFallback ¶

func StaleFallback(cache *ModelCache) ([]types.ModelInfo, error)

StaleFallback returns cached models if the cache is not yet stale, otherwise returns ErrProviderUnreachable.

func UserAgent ¶

func UserAgent(version string) string

UserAgent returns the standard M31A User-Agent string.

Types ¶

type BaseClient ¶

type BaseClient struct {
	APIKeyField   string
	BaseURLField  string
	HTTPClient    *http.Client // streaming — no hard Timeout
	CatalogClient *http.Client // catalog/health — hard Timeout
	Cache         *ModelCache
	HealthLiveMs  int64
	HealthSlowMs  int64
	Version       string
}

BaseClient holds fields and methods shared by all provider implementations. Provider-specific clients embed BaseClient and override only Name(), FetchModels(), ChatCompletionStream(), and HealthCheck().

Two HTTP clients are maintained:

HTTPClient: no Timeout, used for long-running SSE streaming requests where the connection stays open for the entire generation.
CatalogClient: hard Timeout (FetchModelsTimeout), used for short catalog and health-check requests that must never block indefinitely.

func NewBaseClient ¶

func NewBaseClient(apiKey, baseURL, version string, cacheTTL, cacheStaleTTL time.Duration, healthLiveMs, healthSlowMs int64) BaseClient

NewBaseClient creates a BaseClient with the given settings.

func (*BaseClient) APIKey ¶

func (b *BaseClient) APIKey() string

APIKey returns a masked version of the API key for display.

func (*BaseClient) CachedModels ¶

func (b *BaseClient) CachedModels() []types.ModelInfo

CachedModels returns all models from the cache without a network call.

func (*BaseClient) EstimateCost ¶

func (b *BaseClient) EstimateCost(modelID string, usage types.Usage) float64

EstimateCost calculates cost for a usage sample against the model cache.

func (*BaseClient) EvictModel ¶ added in v1.2.0

func (b *BaseClient) EvictModel(id string)

EvictModel removes a model from the cache by ID. Used for self-healing when a chat completion request fails because the model is unavailable, deprecated, or incompatible — preventing it from appearing in the selector.

func (*BaseClient) GetModel ¶

func (b *BaseClient) GetModel(id string) (*types.ModelInfo, error)

GetModel retrieves a model from the cache by ID.

func (*BaseClient) MakeIterator ¶

func (b *BaseClient) MakeIterator(sse *SSEParser, modelID string) *types.StreamIterator

MakeIterator wraps an SSEParser into a StreamIterator.

type ChatRequest ¶

type ChatRequest struct {
	Model            string           `json:"model"`
	Messages         []types.Message  `json:"messages"`
	MaxTokens        int              `json:"max_tokens,omitempty"`
	Tools            []ToolDefinition `json:"tools,omitempty"`
	ReasoningEnabled bool             `json:"reasoning_enabled,omitempty"`
}

type FallbackAfterWait ¶

type FallbackAfterWait struct {
	Event *FallbackEvent
	Wait  time.Duration
	Err   error
}

FallbackAfterWait describes a pending fallback: the provider to switch to, the event describing the transition, and the delay to apply before committing the switch. Returned by FindFallbackWithRetryAfter so the caller (typically the TUI layer) can schedule the wait asynchronously via a tea.Cmd instead of blocking the event loop.

func FindFallbackWithRetryAfter ¶

func FindFallbackWithRetryAfter(registry *Registry, currentProvider string, retryAfterHeader string) FallbackAfterWait

FindFallbackWithRetryAfter attempts fallback with Retry-After awareness. Instead of blocking the caller (which would stall the Bubble Tea event loop), it returns a FallbackAfterWait describing the delay the caller should schedule asynchronously. The caller is responsible for waiting the returned Wait duration before applying the provider switch.

When the current provider returned a 429 with a Retry-After header, Wait is capped at maxRetryAfter (120s).

type FallbackEvent ¶

type FallbackEvent struct {
	From   string `json:"from"`
	To     string `json:"to"`
	Reason string `json:"reason"`
}

func FindFallbackProvider ¶

func FindFallbackProvider(registry *Registry, currentProvider string) (string, *FallbackEvent, error)

type HTTPStatusError ¶ added in v1.1.0

type HTTPStatusError struct {
	StatusCode int
	Message    string
}

HTTPStatusError carries the HTTP status code from a provider response, enabling typed retry classification via errors.As instead of fragile string matching on error messages.

func (*HTTPStatusError) Error ¶ added in v1.1.0

func (e *HTTPStatusError) Error() string

func (*HTTPStatusError) IsRetryable ¶ added in v1.1.0

func (e *HTTPStatusError) IsRetryable() bool

IsRetryable reports whether the HTTP status code indicates a transient server error that should be retried (500, 502, 503).

type LLMProvider ¶

type LLMProvider interface {
	Name() string
	APIKey() string
	FetchModels(ctx context.Context) ([]types.ModelInfo, error)
	CachedModels() []types.ModelInfo
	ChatCompletionStream(ctx context.Context, req ChatRequest) (*types.StreamIterator, error)
	EstimateCost(modelID string, usage types.Usage) float64
	HealthCheck(ctx context.Context) types.HealthStatus
	GetModel(id string) (*types.ModelInfo, error)
}

type ModelCache ¶

type ModelCache struct {
	// contains filtered or unexported fields
}

func NewModelCache ¶

func NewModelCache(ttl time.Duration) *ModelCache

func NewModelCacheWithStale ¶

func NewModelCacheWithStale(ttl time.Duration, staleTTL time.Duration) *ModelCache

NewModelCacheWithStale creates a ModelCache with explicit TTL and stale TTL.

func (*ModelCache) FetchTime ¶

func (c *ModelCache) FetchTime() time.Time

func (*ModelCache) Get ¶

func (c *ModelCache) Get(id string) (*types.ModelInfo, bool)

func (*ModelCache) IsExpired ¶

func (c *ModelCache) IsExpired() bool

func (*ModelCache) IsRefreshing ¶

func (c *ModelCache) IsRefreshing() bool

func (*ModelCache) IsStale ¶

func (c *ModelCache) IsStale() bool

func (*ModelCache) Len ¶

func (c *ModelCache) Len() int

func (*ModelCache) Models ¶

func (c *ModelCache) Models() map[string]*types.ModelInfo

func (*ModelCache) Refresh ¶

func (c *ModelCache) Refresh(ctx context.Context, fetchFn func(ctx context.Context) ([]types.ModelInfo, error)) ([]types.ModelInfo, error)

Refresh deduplicates concurrent calls via singleflight — only one HTTP request is made even if multiple goroutines call Refresh simultaneously. The "refreshing" flag is set/cleared only by the goroutine that actually runs fetchFn; waiters never touch it (BUG-17).

func (*ModelCache) Remove ¶ added in v1.2.0

func (c *ModelCache) Remove(id string)

Remove evicts a single model from the cache by ID. Used for self-healing when a model is discovered to be non-functional (e.g., NVIDIA NIM returns 404 for deprecated models still listed by /models).

func (*ModelCache) Set ¶

func (c *ModelCache) Set(models []types.ModelInfo)

type ModelMetadata ¶ added in v1.1.0

type ModelMetadata struct {
	ContextLength int64
	Pricing       types.Pricing
	Source        string // "openrouter", "local"
}

ModelMetadata holds enrichment data for a model from an external source.

type ReasoningConfig ¶

type ReasoningConfig struct {
	ModelFamily   string         `json:"model_family"`
	RequestParams map[string]any `json:"request_params"`
	// ExtraBodyParams are nested under "extra_body" in the request body.
	// Used by NVIDIA NIM which requires chat_template_kwargs and
	// reasoning_budget inside extra_body rather than at the top level.
	ExtraBodyParams map[string]any `json:"extra_body_params,omitempty"`
	SSEField        string         `json:"sse_field"`
	// Pre-computed field path parts to avoid per-chunk strings.Split
	SSEFieldParts []string `json:"-"`
}

func GetReasoningConfig ¶

func GetReasoningConfig(modelID string) (ReasoningConfig, bool)

type Registry ¶

type Registry struct {
	// contains filtered or unexported fields
}

func NewRegistry ¶

func NewRegistry() *Registry

func (*Registry) Active ¶

func (r *Registry) Active() string

func (*Registry) ActiveProvider ¶

func (r *Registry) ActiveProvider() LLMProvider

func (*Registry) Get ¶

func (r *Registry) Get(name string) (LLMProvider, error)

func (*Registry) List ¶

func (r *Registry) List() []string

func (*Registry) ListAll ¶

func (r *Registry) ListAll() []string

ListAll returns all registered provider names without any decoration. It delegates to List() to avoid code duplication.

func (*Registry) Register ¶

func (r *Registry) Register(name string, p LLMProvider) error

func (*Registry) RollbackActive ¶

func (r *Registry) RollbackActive(fromName, toName string) bool

RollbackActive reverts the active provider to the given name if the current active provider matches fromName. Used when a health check fails after TrySetActive, preventing the system from being left with an unhealthy active provider.

func (*Registry) SetActive ¶

func (r *Registry) SetActive(name string) error

func (*Registry) TrySetActive ¶

func (r *Registry) TrySetActive(name string) (LLMProvider, error)

TrySetActive atomically sets the provider as active if it exists. Returns the provider and nil on success. This prevents TOCTOU races where another goroutine could SetActive between Get and SetActive.

type RetryInfo ¶

type RetryInfo struct {
	RateLimited bool
	RetryAfter  string
	Wait        time.Duration
}

RetryInfo holds Retry-After metadata extracted from an HTTP response. Wait is zero when no retry header is present.

func InspectResponse ¶

func InspectResponse(resp *http.Response) RetryInfo

InspectResponse extracts rate-limit and Retry-After metadata from an HTTP response using IsRateLimited and GetRetryAfter. Returns a zero RetryInfo for nil responses. Callers use this to decide whether to wait before falling back to another provider.

type SSEParser ¶

type SSEParser struct {
	// contains filtered or unexported fields
}

func NewSSEParser ¶

func NewSSEParser(resp *http.Response) *SSEParser

func NewSSEParserWithContext ¶

func NewSSEParserWithContext(resp *http.Response, ctx context.Context) *SSEParser

func (*SSEParser) Close ¶

func (p *SSEParser) Close() error

Close releases the underlying response body. Idempotent — safe to call multiple times.

func (*SSEParser) Next ¶

func (p *SSEParser) Next() (eventType string, data string, err error)

type ToolDefinition ¶

type ToolDefinition struct {
	Name             string `json:"name"`
	Description      string `json:"description"`
	Parameters       string `json:"parameters"`
	ParametersParsed any    `json:"-"` // cached json.Unmarshal result, populated by buildToolDefinitions
}

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
nvidia
openrouter
zen

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL