Documentation
¶
Index ¶
- Constants
- Variables
- func ApplyPromptCaching(req *AnthropicRequest)
- func BackgroundTaskFromContext(ctx context.Context) bool
- func DisableModelRoutingFromContext(ctx context.Context) bool
- func DisableResponsesContinuationFromContext(ctx context.Context) bool
- func GetExcludedProviders(ctx context.Context) []string
- func GetPinnedProvider(ctx context.Context) string
- func InjectPromptCaching(body []byte) ([]byte, error)
- func IsContextWindowExceededMessage(msg string) bool
- func LocaleFromContext(ctx context.Context) string
- func LogTokenChurn(provider, model, promptCacheKey string, ...)
- func ReadPortFromFile(portFile string) (int, error)
- func SanitizeError(err error) string
- func SessionIDFromContext(ctx context.Context) string
- func ValidateRoutingConfig(cfg *RoutingConfig) []error
- func WithBackgroundTask(ctx context.Context) context.Context
- func WithDisableModelRouting(ctx context.Context) context.Context
- func WithDisableResponsesContinuation(ctx context.Context) context.Context
- func WithExcludedProviders(ctx context.Context, providerIDs ...string) context.Context
- func WithLocale(ctx context.Context, locale string) context.Context
- func WithPinnedProvider(ctx context.Context, providerID string) context.Context
- func WithResolvedRoute(ctx context.Context, rr *ResolvedRoute) context.Context
- func WithSessionID(ctx context.Context, sessionID string) context.Context
- type APIErrorClassifier
- type AnthropicCacheControl
- type AnthropicContentBlock
- type AnthropicMessage
- type AnthropicRequest
- type AnthropicResponse
- type AnthropicStreamContentBlock
- type AnthropicStreamDelta
- type AnthropicStreamEvent
- type AnthropicStreamUsage
- type AnthropicSystemBlock
- type AnthropicTool
- type AuthConfig
- type AuthExhaustedError
- type AuthProber
- func (ap *AuthProber) Apply(req *http.Request, strategy AuthStrategy, apiKey *providerpool.APIKey, ...)
- func (ap *AuthProber) Forget(providerID, memoryKey string)
- func (ap *AuthProber) ProbeAndForward(provider *providerpool.Provider, apiKey *providerpool.APIKey, ...) (*http.Response, error)
- func (ap *AuthProber) Recall(providerID, memoryKey string) (AuthStrategy, bool)
- func (ap *AuthProber) Remember(providerID, memoryKey string, strategy AuthStrategy)
- func (ap *AuthProber) Strategies(provider *providerpool.Provider, apiKey *providerpool.APIKey, ...) []AuthStrategy
- type AuthStrategy
- type Authenticator
- func (a *Authenticator) AddAPIKey(key string)
- func (a *Authenticator) AddAllowedIP(ip string)
- func (a *Authenticator) Authenticate(r *http.Request) (bool, string)
- func (a *Authenticator) CheckRateLimit(r *http.Request) (bool, string)
- func (a *Authenticator) CleanupRateLimits()
- func (a *Authenticator) ListAPIKeys() []map[string]interface{}
- func (a *Authenticator) RemoveAPIKey(key string)
- func (a *Authenticator) Stats() map[string]interface{}
- func (a *Authenticator) ValidateAPIKey(key string) bool
- type BreakerSnapshot
- type BufferPool
- type CacheEntry
- type ChatMessage
- type ChatRequest
- type ConfigReloader
- type ConfigWatcher
- type ConnWarmup
- type ConnectionConfig
- type ConnectionMetrics
- func (cm *ConnectionMetrics) RecordConnectionClosed()
- func (cm *ConnectionMetrics) RecordConnectionError()
- func (cm *ConnectionMetrics) RecordConnectionOpened()
- func (cm *ConnectionMetrics) RecordRequest(provider string)
- func (cm *ConnectionMetrics) RecordRequestComplete(provider string, success bool, latencyNs int64, reused bool)
- func (cm *ConnectionMetrics) Reset()
- func (cm *ConnectionMetrics) Stats() map[string]interface{}
- type ConnectionPool
- func (cp *ConnectionPool) Close()
- func (cp *ConnectionPool) CloseIdleConnectionsForProfile(profile ConnectionProfile)
- func (cp *ConnectionPool) GetClient(provider string, profiles ...ConnectionProfile) *http.Client
- func (cp *ConnectionPool) GetInsecureClient(provider string, profiles ...ConnectionProfile) *http.Client
- func (cp *ConnectionPool) GetTransport() *http.Transport
- func (cp *ConnectionPool) GetTransportForProfile(profile ConnectionProfile) *http.Transport
- func (cp *ConnectionPool) Stats() map[string]interface{}
- type ConnectionProfile
- type DNSCache
- type DataMasker
- func (dm *DataMasker) AddRule(rule *MaskingRule) error
- func (dm *DataMasker) GetRule(id string) (*MaskingRule, bool)
- func (dm *DataMasker) IsEnabled() bool
- func (dm *DataMasker) ListRules() []*MaskingRule
- func (dm *DataMasker) Mask(content string, direction MaskingDirection) string
- func (dm *DataMasker) MaskBytes(content []byte, direction MaskingDirection) []byte
- func (dm *DataMasker) MaskRequest(content string) string
- func (dm *DataMasker) MaskRequestBytes(content []byte) []byte
- func (dm *DataMasker) MaskResponse(content string) string
- func (dm *DataMasker) MaskResponseBytes(content []byte) []byte
- func (dm *DataMasker) RemoveRule(id string) bool
- func (dm *DataMasker) ResetStats()
- func (dm *DataMasker) SetEnabled(enabled bool)
- func (dm *DataMasker) SetLocaleFunc(f func() string)
- func (dm *DataMasker) SetRuleEnabled(id string, enabled bool) bool
- func (dm *DataMasker) Stats() map[string]interface{}
- type ErrorCategory
- type ErrorClassification
- type ErrorClassificationConfig
- type ErrorPattern
- type FailoverAPIHandler
- func (h *FailoverAPIHandler) GetCircuitBreakerStatus(c echo.Context) error
- func (h *FailoverAPIHandler) GetConfig(c echo.Context) error
- func (h *FailoverAPIHandler) GetMetrics(c echo.Context) error
- func (h *FailoverAPIHandler) GetOverview(c echo.Context) error
- func (h *FailoverAPIHandler) RegisterRoutes(g *echo.Group)
- func (h *FailoverAPIHandler) ResetCircuitBreakers(c echo.Context) error
- func (h *FailoverAPIHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
- func (h *FailoverAPIHandler) SetOnConfigSave(fn func(*FailoverConfig) error)
- func (h *FailoverAPIHandler) SetOnProviderRaceChange(fn func(ProviderRaceConfig))
- func (h *FailoverAPIHandler) SetProviderRaceStatsProvider(fn func() ProviderRaceStatsSnapshot)
- func (h *FailoverAPIHandler) UpdateConfig(c echo.Context) error
- type FailoverCircuitBreakerStatusResponse
- type FailoverConfig
- type FailoverHandler
- func (fh *FailoverHandler) Config() *FailoverConfig
- func (fh *FailoverHandler) Execute(ctx context.Context, provider *Provider, ...) (*http.Response, error)
- func (fh *FailoverHandler) GetBreakerState(name string) string
- func (fh *FailoverHandler) GetBreakerStats() map[string]interface{}
- func (fh *FailoverHandler) LoadBreakerState(snap BreakerSnapshot)
- func (fh *FailoverHandler) ResetAllBreakers()
- func (fh *FailoverHandler) ResetBreaker(name string)
- func (fh *FailoverHandler) SnapshotBreakers() []BreakerSnapshot
- type FailoverMetrics
- func (m *FailoverMetrics) GetStats() map[string]interface{}
- func (m *FailoverMetrics) RecordError(provider string, classification *ErrorClassification)
- func (m *FailoverMetrics) RecordFailover(fromProvider, toProvider string, success bool)
- func (m *FailoverMetrics) RecordProviderPoolResult(result *providerpool.FailoverResult)
- func (m *FailoverMetrics) RecordStreamAnomaly()
- type FailoverMetricsResponse
- type FailoverOverviewResponse
- type FailoverSnapshot
- type FormatConverter
- func (fc *FormatConverter) ConvertModelsResponse(body []byte, sourceType ProviderType) ([]byte, error)
- func (fc *FormatConverter) ConvertRequest(body []byte, targetType ProviderType) ([]byte, string, error)
- func (fc *FormatConverter) ConvertRequestWithCaching(body []byte, targetType ProviderType, promptCacheEnabled bool) ([]byte, string, error)
- func (fc *FormatConverter) ConvertResponse(body []byte, sourceType ProviderType) ([]byte, error)
- func (fc *FormatConverter) ConvertStreamingResponse(reader io.Reader, sourceType ProviderType, writer http.ResponseWriter) error
- func (fc *FormatConverter) DetectProviderType(endpoint string) ProviderType
- type GeminiCandidate
- type GeminiContent
- type GeminiFunctionCall
- type GeminiFunctionDeclaration
- type GeminiFunctionResponse
- type GeminiGenerationConfig
- type GeminiInlineData
- type GeminiModel
- type GeminiModelsResponse
- type GeminiPart
- type GeminiRequest
- type GeminiResponse
- type GeminiSafetySetting
- type GeminiStreamChunk
- type GeminiTool
- type GeminiUsageMetadata
- type GuardConfig
- type GuardResult
- type GuardRule
- type HealthCheckConfig
- type HealthChecker
- type HotReloadableConfig
- type MaskingCategory
- type MaskingConfig
- type MaskingDirection
- type MaskingRule
- type MetricsBucket
- type MetricsCollector
- func (mc *MetricsCollector) GetAllProviderMetrics() map[string]*ProviderMetrics
- func (mc *MetricsCollector) GetProviderMetrics(name string) (*ProviderMetrics, bool)
- func (mc *MetricsCollector) GetRecentRequests(limit int) []RequestMetrics
- func (mc *MetricsCollector) GetTimeSeries(start, end time.Time) []*MetricsBucket
- func (mc *MetricsCollector) LatencyStats() map[string]interface{}
- func (mc *MetricsCollector) Record(m RequestMetrics)
- func (mc *MetricsCollector) Reset()
- func (mc *MetricsCollector) Summary() map[string]interface{}
- type MetricsConfig
- type MiddlewareFunc
- type MockConfig
- type MockEndpoint
- type MockHandler
- func (mh *MockHandler) AddEndpoint(ep *MockEndpoint)
- func (mh *MockHandler) GetEndpoint(path, method string) (*MockEndpoint, bool)
- func (mh *MockHandler) Handle(w http.ResponseWriter, r *http.Request) bool
- func (mh *MockHandler) IsEnabled() bool
- func (mh *MockHandler) ListEndpoints() []*MockEndpoint
- func (mh *MockHandler) RemoveEndpoint(path, method string) bool
- func (mh *MockHandler) ResetStats()
- func (mh *MockHandler) SetEnabled(path, method string, enabled bool) bool
- func (mh *MockHandler) SetGlobalEnabled(enabled bool)
- func (mh *MockHandler) Stats() map[string]interface{}
- type ModelCompatConfig
- type ModelCompatLayer
- func (mcl *ModelCompatLayer) AdaptRequest(model string, req *ChatRequest) (*ChatRequest, error)
- func (mcl *ModelCompatLayer) GetAllFeatures() map[string]*ModelFeatures
- func (mcl *ModelCompatLayer) GetFeatures(model string) *ModelFeatures
- func (mcl *ModelCompatLayer) ListModels() []*ModelFeatures
- func (mcl *ModelCompatLayer) SetFeatures(model string, features *ModelFeatures)
- func (mcl *ModelCompatLayer) Stats() map[string]interface{}
- type ModelFamily
- type ModelFeatures
- type ModelOrigin
- type ModelPricing
- type ModelRoute
- type ModelRouter
- func (mr *ModelRouter) AddRule(rule *RegexRule) error
- func (mr *ModelRouter) GetFamilies() []*ModelFamily
- func (mr *ModelRouter) GetRules() []*RegexRule
- func (mr *ModelRouter) IsBackgroundRequest(r *http.Request) bool
- func (mr *ModelRouter) RemoveRule(pattern string) bool
- func (mr *ModelRouter) RouteModel(requestedModel string, isBackground bool) (*ModelRoute, error)
- func (mr *ModelRouter) SetTierResolver(tr *TierResolver)
- func (mr *ModelRouter) Stats() map[string]interface{}
- type ModelRouterConfig
- type ModelTier
- type OAuthTokenProvider
- type OpenAIChatRequest
- type OpenAIChatResponse
- type OpenAIMessage
- type OpenAIStreamChunk
- type OpenAITool
- type OpenAIToolCall
- type OpenAIToolCallFunc
- type OpenAIToolFunction
- type OriginRegistry
- type PerformanceConfig
- type PerformanceManager
- type Pipeline
- func (p *Pipeline) GetAuth() *Authenticator
- func (p *Pipeline) GetGuard() *PromptGuard
- func (p *Pipeline) GetMetrics() *MetricsCollector
- func (p *Pipeline) GetSessionMonitor() *SessionMonitor
- func (p *Pipeline) Stats() map[string]interface{}
- func (p *Pipeline) Use(mw MiddlewareFunc)
- func (p *Pipeline) Wrap(handler http.Handler) http.Handler
- type PipelineConfig
- type PipelineSnapshot
- type PipelineStatsCollector
- func (c *PipelineStatsCollector) Close() error
- func (c *PipelineStatsCollector) LoadBreakerState()
- func (c *PipelineStatsCollector) LoadSmartMetrics()
- func (c *PipelineStatsCollector) OnFailover(result *providerpool.FailoverResult)
- func (c *PipelineStatsCollector) SetFailoverHandler(fh *FailoverHandler)
- func (c *PipelineStatsCollector) SetSmartFailoverMetrics(m *FailoverMetrics)
- func (c *PipelineStatsCollector) Snapshot() PipelineSnapshot
- func (c *PipelineStatsCollector) Start()
- func (c *PipelineStatsCollector) Stop()
- type PortAllocator
- type PortConfig
- type PromptCacheBreakDetector
- type PromptCacheBreakObservation
- type PromptCacheSnapshot
- type PromptCacheStateSnapshot
- type PromptCacheStats
- type PromptGuard
- func (pg *PromptGuard) AddPattern(pattern string) error
- func (pg *PromptGuard) AddRule(rule GuardRule) error
- func (pg *PromptGuard) AddWhitelistPattern(pattern string) error
- func (pg *PromptGuard) Check(prompt string) *GuardResult
- func (pg *PromptGuard) GetRules() []GuardRule
- func (pg *PromptGuard) Stats() map[string]interface{}
- type Provider
- type ProviderConfig
- type ProviderConnectionMetrics
- type ProviderMemory
- func (pm *ProviderMemory) BlacklistModel(providerID, baseURL, model string)
- func (pm *ProviderMemory) ClearModelBlacklist(providerID, baseURL, model string)
- func (pm *ProviderMemory) ClearThrottle(providerID, baseURL string)
- func (pm *ProviderMemory) ForgetFormat(providerID, baseURL string)
- func (pm *ProviderMemory) ForgetModelAlias(providerID, baseURL, requestedModel string)
- func (pm *ProviderMemory) ForgetModelFormat(providerID, baseURL, requestedModel string)
- func (pm *ProviderMemory) ForgetThrottle(providerID, baseURL string)
- func (pm *ProviderMemory) ForgetToolCap(providerID, baseURL string)
- func (pm *ProviderMemory) GetRestrictions(providerID, baseURL string) map[string]interface{}
- func (pm *ProviderMemory) IsModelBlacklisted(providerID, baseURL, model string) bool
- func (pm *ProviderMemory) IsThrottled(providerID, baseURL string) bool
- func (pm *ProviderMemory) RecallFormat(providerID, baseURL string) (string, bool)
- func (pm *ProviderMemory) RecallModelAlias(providerID, baseURL, requestedModel string) (string, bool)
- func (pm *ProviderMemory) RecallModelFormat(providerID, baseURL, requestedModel string) (string, bool)
- func (pm *ProviderMemory) RecallToolCap(providerID, baseURL string) (ToolCapLevel, bool)
- func (pm *ProviderMemory) RememberFormat(providerID, baseURL string, format string)
- func (pm *ProviderMemory) RememberModelAlias(providerID, baseURL, requestedModel, actualModel string)
- func (pm *ProviderMemory) RememberModelFormat(providerID, baseURL, requestedModel, format string)
- func (pm *ProviderMemory) RememberThrottle(providerID, baseURL string, retryAfter time.Duration)
- func (pm *ProviderMemory) RememberToolCap(providerID, baseURL string, level ToolCapLevel)
- func (pm *ProviderMemory) ThrottleRemaining(providerID, baseURL string) time.Duration
- func (pm *ProviderMemory) ThrottleUntil(providerID, baseURL string) (time.Time, bool)
- type ProviderMetrics
- type ProviderRaceConfig
- type ProviderRaceStats
- type ProviderRaceStatsSnapshot
- type ProviderType
- type ProxyAPIHandler
- type ProxyConfig
- type ProxyHandler
- func (ph *ProxyHandler) GetPipelineStats() *PipelineStatsCollector
- func (ph *ProxyHandler) GetPromptCacheStats() PromptCacheSnapshot
- func (ph *ProxyHandler) GetProviderMemory() *ProviderMemory
- func (ph *ProxyHandler) GetProviderRaceStats() ProviderRaceStatsSnapshot
- func (ph *ProxyHandler) GetRoutingRules() []RoutingRule
- func (ph *ProxyHandler) GetRoutingStats() RoutingStatsSnapshot
- func (ph *ProxyHandler) GetRoutingStatsRef() *RoutingStats
- func (ph *ProxyHandler) IsPromptCacheEnabled() bool
- func (ph *ProxyHandler) IsRoutingEnabled() bool
- func (ph *ProxyHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
- func (ph *ProxyHandler) SetAPIKeyValidator(validator func(key string) ([]string, error))
- func (ph *ProxyHandler) SetAuthProber(ap *AuthProber)
- func (ph *ProxyHandler) SetDataMasker(dm *DataMasker)
- func (ph *ProxyHandler) SetModelRouter(mr *ModelRouter)
- func (ph *ProxyHandler) SetOAuthManager(m OAuthTokenProvider)
- func (ph *ProxyHandler) SetPipelineStats(ps *PipelineStatsCollector)
- func (ph *ProxyHandler) SetPromptCacheEnabled(enabled bool)
- func (ph *ProxyHandler) SetProviderPool(pool *providerpool.Pool)
- func (ph *ProxyHandler) SetProviderRaceConfig(cfg ProviderRaceConfig)
- func (ph *ProxyHandler) SetPruner(mw *pruner.Middleware)
- func (ph *ProxyHandler) SetPrunerFactory(factory func() *pruner.Middleware)
- func (ph *ProxyHandler) SetResponsesContextCompressor(compressor ResponsesContextCompressor)
- func (ph *ProxyHandler) SetRoutingEnabled(enabled bool)
- func (ph *ProxyHandler) SetRoutingRuleEnabled(name string, enabled bool) bool
- func (ph *ProxyHandler) SetRuleEngine(re *RuleEngine)
- func (ph *ProxyHandler) SetSTTService(service stt.Service)
- func (ph *ProxyHandler) SetSessionMonitor(sm *SessionMonitor)
- func (ph *ProxyHandler) SetTierResolver(tr *TierResolver)
- type ProxyServer
- func (ps *ProxyServer) GetEndpoint() string
- func (ps *ProxyServer) GetModelRouter() *ModelRouter
- func (ps *ProxyServer) GetPort() int
- func (ps *ProxyServer) GetQuotaMonitor() *QuotaMonitor
- func (ps *ProxyServer) Start() error
- func (ps *ProxyServer) Stats() map[string]interface{}
- func (ps *ProxyServer) Stop(ctx context.Context) error
- type QuotaInfo
- type QuotaMonitor
- func (qm *QuotaMonitor) ClearBan(provider string)
- func (qm *QuotaMonitor) ClearRateLimit(provider string)
- func (qm *QuotaMonitor) GetBestProvider() string
- func (qm *QuotaMonitor) GetQuota(provider string) *QuotaInfo
- func (qm *QuotaMonitor) GetQuotaSummary() *QuotaSummary
- func (qm *QuotaMonitor) IsProviderAvailable(provider string) bool
- func (qm *QuotaMonitor) RecordError(provider string, statusCode int)
- func (qm *QuotaMonitor) RecordRequest(provider string)
- func (qm *QuotaMonitor) RecordUsage(provider string, resp *http.Response, tokensUsed int64)
- func (qm *QuotaMonitor) Reset()
- func (qm *QuotaMonitor) ResetProvider(provider string)
- func (qm *QuotaMonitor) Stats() map[string]interface{}
- func (qm *QuotaMonitor) Stop()
- type QuotaMonitorConfig
- type QuotaSummary
- type RateLimitConfig
- type RegexRule
- type RequestMetrics
- type ResolvedRoute
- type ResponseCache
- func (rc *ResponseCache) Clear()
- func (rc *ResponseCache) GenerateCacheKey(provider, model, prompt string) string
- func (rc *ResponseCache) Get(key string) (*CacheEntry, bool)
- func (rc *ResponseCache) Set(key string, value []byte, statusCode int, headers map[string]string)
- func (rc *ResponseCache) Stats() map[string]interface{}
- type ResponsesAssistantCompressionInput
- type ResponsesAssistantCompressionMode
- type ResponsesContextCompressor
- type RestrictionsHandler
- type RetryableErrorType
- type RouteCondition
- type RouteConfig
- type RouteDecision
- type RouteRequest
- type Router
- func (r *Router) AddProvider(config *ProviderConfig)
- func (r *Router) GetAllProviders() []*Provider
- func (r *Router) GetAvailableProviders() []*Provider
- func (r *Router) GetProvider(name string) (*Provider, bool)
- func (r *Router) RemoveProvider(name string) error
- func (r *Router) SelectProvider(req *http.Request) (*Provider, error)
- func (r *Router) SetProviderEnabled(name string, enabled bool) error
- func (r *Router) Stats() map[string]interface{}
- func (r *Router) UpdateHealth(name string, healthy bool, err error)
- func (r *Router) UpdateHealthWithLatency(name string, healthy bool, err error, latency time.Duration)
- type RoutingConfig
- type RoutingRule
- type RoutingStats
- func (s *RoutingStats) Load(routedRequests, tokensRouted, costSavedMicro int64)
- func (s *RoutingStats) Record(originalModel, actualModel string, respBody []byte)
- func (s *RoutingStats) RecordTokens(originalModel, actualModel string, inputTokens, outputTokens int)
- func (s *RoutingStats) Snapshot() RoutingStatsSnapshot
- type RoutingStatsSnapshot
- type RuleEngine
- type Session
- type SessionConfig
- type SessionMonitor
- func (sm *SessionMonitor) CleanupIdleSessions() int
- func (sm *SessionMonitor) CompleteSession(id string, status SessionStatus)
- func (sm *SessionMonitor) GetSession(id string) (*Session, bool)
- func (sm *SessionMonitor) ListActiveSessions() []*Session
- func (sm *SessionMonitor) ListSessions() []*Session
- func (sm *SessionMonitor) RecordError(id string)
- func (sm *SessionMonitor) StartSession(clientIP, userAgent string) *Session
- func (sm *SessionMonitor) Stats() map[string]interface{}
- func (sm *SessionMonitor) UpdateSession(id string, provider, model string, tokensIn, tokensOut int64)
- type SessionStatus
- type SmartFailoverHandler
- func (sfh *SmartFailoverHandler) ExecuteStreamingWithAnomalyDetection(ctx context.Context, provider *Provider, ...) (*http.Response, error)
- func (sfh *SmartFailoverHandler) ExecuteWithSmartFailover(ctx context.Context, provider *Provider, ...) (*http.Response, error)
- func (sfh *SmartFailoverHandler) GetClassifier() *APIErrorClassifier
- func (sfh *SmartFailoverHandler) GetMetrics() *FailoverMetrics
- type StreamAnomaly
- type StreamBuffer
- type StreamChunkChoice
- type StreamChunkDelta
- type StreamChunkToolCall
- type StreamChunkToolCallFunc
- type StreamChunkUsage
- type StreamRecoveryStrategy
- type StreamingAnomalyConfig
- type StreamingAnomalyDetector
- type TierResolver
- type TieredModel
- type ToggleState
- type ToggleStore
- type Tool
- type ToolCapLevel
- type UpstreamRequestBridge
- type UpstreamRequestBridgeContext
- type WatcherConfig
Constants ¶
const BackgroundTaskHeader = "X-Background-Task"
const DisableResponsesContinuationHeader = "X-Zima-Disable-Responses-Continuation"
DisableResponsesContinuationHeader tells proxy request shaping logic to strip previous_response_id before forwarding to Responses endpoints.
const ResponsesContinuationDisabledHeader = "X-Zima-Responses-Continuation-Disabled"
ResponsesContinuationDisabledHeader is set when proxy detects upstream does not honor previous_response_id continuation semantics for the routed provider.
const ResponsesPreviousIDHeader = "X-Zima-Previous-Response-ID"
ResponsesPreviousIDHeader exposes the latest Responses response.id observed by the proxy for this session/request.
const ResponsesUsedHeader = "X-Zima-Responses-Used"
ResponsesUsedHeader tells downstream callers the proxy actually used a Responses endpoint upstream for this request.
Variables ¶
var ( // Port errors ErrPortAllocationFailed = errors.New("failed to allocate port") ErrPortRangeInvalid = errors.New("invalid port range") ErrPortInUse = errors.New("port already in use") // Provider errors ErrNoAvailableProvider = errors.New("no available provider") ErrAllProvidersFailed = errors.New("all providers failed") ErrProviderNotFound = errors.New("provider not found") ErrProviderDisabled = errors.New("provider is disabled") // Request errors ErrRequestFailed = errors.New("request failed") ErrRequestTimeout = errors.New("request timeout") ErrUpstreamError = errors.New("upstream error") ErrInvalidRequest = errors.New("invalid request") // Circuit breaker errors ErrCircuitOpen = errors.New("circuit breaker is open") // Configuration errors ErrConfigInvalid = errors.New("invalid configuration") ErrConfigNotFound = errors.New("configuration not found") // Model compatibility errors ErrToolCallingNotSupported = errors.New("tool calling not supported by model") ErrVisionNotSupported = errors.New("vision not supported by model") ErrContextTooLong = errors.New("context exceeds model limit") // Mock errors ErrMockEndpointNotFound = errors.New("mock endpoint not found") ErrMockDisabled = errors.New("mock endpoints disabled") )
Proxy errors
var (
ModelAliases map[string][]string
)
ModelAliases maps model names to common alternatives for relay compatibility.
Functions ¶
func ApplyPromptCaching ¶
func ApplyPromptCaching(req *AnthropicRequest)
ApplyPromptCaching adds cache_control breakpoints to an Anthropic request. Breakpoints are placed on:
(1) the system prompt static block(s) — up to 2 blocks for static+config (2) the last tool definition (3) a turn-boundary message (4th-from-last) for long conversations
This enables Anthropic's prompt caching, which can save up to 90% on input token costs. Anthropic allows up to 4 cache breakpoints per request.
func BackgroundTaskFromContext ¶
BackgroundTaskFromContext reports whether the request context is marked as a background/internal task.
func DisableModelRoutingFromContext ¶
DisableModelRoutingFromContext reports whether proxy-side model routing should be skipped for this request.
func DisableResponsesContinuationFromContext ¶
DisableResponsesContinuationFromContext reports whether continuation should be disabled.
func GetExcludedProviders ¶
GetExcludedProviders returns the excluded provider IDs from context.
func GetPinnedProvider ¶
GetPinnedProvider returns the pinned provider ID from context, or "".
func InjectPromptCaching ¶
InjectPromptCaching unmarshals an Anthropic request body, applies cache breakpoints, and re-marshals.
func IsContextWindowExceededMessage ¶
IsContextWindowExceededMessage returns true when an error message clearly indicates input/context overflow, including relay-wrapped variants that may be surfaced with a generic 5xx status code.
func LocaleFromContext ¶
LocaleFromContext returns locale from request context.
func LogTokenChurn ¶
func LogTokenChurn(provider, model, promptCacheKey string, inputTokens, cacheRead, cacheCreation int, observation *PromptCacheBreakObservation)
LogTokenChurn logs a per-request summary of token reuse vs churn.
func ReadPortFromFile ¶
ReadPortFromFile reads the proxy port from file
func SanitizeError ¶
SanitizeError converts raw Go network/TLS errors into user-friendly messages, stripping internal URLs and technical details.
func SessionIDFromContext ¶
SessionIDFromContext returns proxy session ID from request context.
func ValidateRoutingConfig ¶
func ValidateRoutingConfig(cfg *RoutingConfig) []error
ValidateRoutingConfig checks for common configuration errors.
func WithBackgroundTask ¶
WithBackgroundTask marks a request context as an internal/background task so the proxy can apply background-specific routing policies.
func WithDisableModelRouting ¶
WithDisableModelRouting marks a request context so proxy-side model routing leaves an explicit model untouched for this request.
func WithDisableResponsesContinuation ¶
WithDisableResponsesContinuation marks a request context so bridge/proxy strips previous_response_id before forwarding to Responses endpoints.
func WithExcludedProviders ¶
WithExcludedProviders returns a context carrying the providers that routing should skip for the current request.
func WithLocale ¶
WithLocale stores the request locale in context (e.g. "en-US", "zh-CN").
func WithPinnedProvider ¶
WithPinnedProvider returns a context carrying a preferred provider ID. The proxy handler reads this to set RouteRequest.PreferredProviderID.
func WithResolvedRoute ¶
func WithResolvedRoute(ctx context.Context, rr *ResolvedRoute) context.Context
WithResolvedRoute returns a context carrying a ResolvedRoute pointer. After the proxy handler completes, the struct will be populated.
Types ¶
type APIErrorClassifier ¶
type APIErrorClassifier struct {
// contains filtered or unexported fields
}
APIErrorClassifier classifies API errors from different providers
func NewAPIErrorClassifier ¶
func NewAPIErrorClassifier() *APIErrorClassifier
NewAPIErrorClassifier creates a new error classifier with default patterns
func (*APIErrorClassifier) AddProviderPatterns ¶
func (c *APIErrorClassifier) AddProviderPatterns(provider string, patterns []ErrorPattern)
AddProviderPatterns adds custom patterns for a provider
func (*APIErrorClassifier) ClassifyError ¶
func (c *APIErrorClassifier) ClassifyError( provider string, statusCode int, responseBody []byte, ) *ErrorClassification
ClassifyError analyzes response and returns error classification
type AnthropicCacheControl ¶
type AnthropicCacheControl struct {
Type string `json:"type"` // "ephemeral"
}
AnthropicCacheControl is the cache_control block for Anthropic prompt caching.
type AnthropicContentBlock ¶
type AnthropicContentBlock struct {
Type string `json:"type"`
Text string `json:"text,omitempty"`
ID string `json:"id,omitempty"`
Name string `json:"name,omitempty"`
Input interface{} `json:"input,omitempty"`
ToolUseID string `json:"tool_use_id,omitempty"`
Content string `json:"content,omitempty"`
Source *struct {
Type string `json:"type"`
MediaType string `json:"media_type"`
Data string `json:"data"`
} `json:"source,omitempty"`
CacheControl *AnthropicCacheControl `json:"cache_control,omitempty"`
}
type AnthropicMessage ¶
type AnthropicMessage struct {
Role string `json:"role"`
Content interface{} `json:"content"` // string or []ContentBlock
}
type AnthropicRequest ¶
type AnthropicRequest struct {
Model string `json:"model"`
Messages []AnthropicMessage `json:"messages"`
System interface{} `json:"system,omitempty"` // string or []AnthropicSystemBlock
MaxTokens int `json:"max_tokens"`
Temperature float64 `json:"temperature,omitempty"`
TopP float64 `json:"top_p,omitempty"`
TopK int `json:"top_k,omitempty"`
Stream bool `json:"stream,omitempty"`
StopSequences []string `json:"stop_sequences,omitempty"`
Tools []AnthropicTool `json:"tools,omitempty"`
ToolChoice interface{} `json:"tool_choice,omitempty"`
}
Anthropic request/response types
type AnthropicResponse ¶
type AnthropicResponse struct {
ID string `json:"id"`
Type string `json:"type"`
Role string `json:"role"`
Content []AnthropicContentBlock `json:"content"`
Model string `json:"model"`
StopReason string `json:"stop_reason"`
StopSequence string `json:"stop_sequence,omitempty"`
Usage struct {
InputTokens int `json:"input_tokens"`
OutputTokens int `json:"output_tokens"`
} `json:"usage"`
}
type AnthropicStreamContentBlock ¶
type AnthropicStreamContentBlock struct {
Type string `json:"type"`
Text string `json:"text,omitempty"`
ID string `json:"id,omitempty"`
Name string `json:"name,omitempty"`
}
AnthropicStreamContentBlock is the content_block in a content_block_start event.
type AnthropicStreamDelta ¶
type AnthropicStreamDelta struct {
Type string `json:"type"`
Text string `json:"text,omitempty"`
PartialJSON string `json:"partial_json,omitempty"`
StopReason string `json:"stop_reason,omitempty"`
}
AnthropicStreamDelta is the delta in content_block_delta / message_delta events.
type AnthropicStreamEvent ¶
type AnthropicStreamEvent struct {
Type string `json:"type"`
Index int `json:"index,omitempty"`
ContentBlock *AnthropicStreamContentBlock `json:"content_block,omitempty"`
Delta *AnthropicStreamDelta `json:"delta,omitempty"`
Message *AnthropicResponse `json:"message,omitempty"`
Usage *AnthropicStreamUsage `json:"usage,omitempty"`
}
type AnthropicStreamUsage ¶
type AnthropicStreamUsage struct {
InputTokens int `json:"input_tokens,omitempty"`
OutputTokens int `json:"output_tokens,omitempty"`
CacheReadInputTokens int `json:"cache_read_input_tokens,omitempty"`
CacheCreationInputTokens int `json:"cache_creation_input_tokens,omitempty"`
}
AnthropicStreamUsage is the usage in message_start / message_delta events.
type AnthropicSystemBlock ¶
type AnthropicSystemBlock struct {
Type string `json:"type"`
Text string `json:"text"`
CacheControl *AnthropicCacheControl `json:"cache_control,omitempty"`
}
AnthropicSystemBlock is a content block in the system prompt array (for prompt caching).
type AnthropicTool ¶
type AnthropicTool struct {
Name string `json:"name"`
Description string `json:"description,omitempty"`
InputSchema interface{} `json:"input_schema"`
CacheControl *AnthropicCacheControl `json:"cache_control,omitempty"`
}
type AuthConfig ¶
type AuthConfig struct {
Enabled bool `json:"enabled"`
Type string `json:"type"` // "none", "api_key", "bearer"
APIKeys []string `json:"api_keys"`
AllowedIPs []string `json:"allowed_ips"`
SkipPaths []string `json:"skip_paths"`
HeaderName string `json:"header_name"`
}
AuthConfig holds authentication configuration
func DefaultAuthConfig ¶
func DefaultAuthConfig() *AuthConfig
DefaultAuthConfig returns default auth configuration
type AuthExhaustedError ¶
AuthExhaustedError indicates all auth strategies failed for a provider.
func (*AuthExhaustedError) Error ¶
func (e *AuthExhaustedError) Error() string
type AuthProber ¶
type AuthProber struct {
// contains filtered or unexported fields
}
AuthProber manages auth strategy probing and remembers what works.
func NewAuthProber ¶
func NewAuthProber() *AuthProber
NewAuthProber creates a new auth prober with 1-hour TTL memory.
func (*AuthProber) Apply ¶
func (ap *AuthProber) Apply(req *http.Request, strategy AuthStrategy, apiKey *providerpool.APIKey, provider *providerpool.Provider)
Apply sets the appropriate auth headers on the request for the strategy.
func (*AuthProber) Forget ¶
func (ap *AuthProber) Forget(providerID, memoryKey string)
Forget evicts the cached strategy (e.g. on provider config change).
func (*AuthProber) ProbeAndForward ¶
func (ap *AuthProber) ProbeAndForward( provider *providerpool.Provider, apiKey *providerpool.APIKey, effectiveFormat providerpool.APIFormat, buildRequest func() (*http.Request, error), doRequest func(*http.Request) (*http.Response, error), ) (*http.Response, error)
ProbeAndForward tries auth strategies in order until one succeeds (non-401/403). Returns the successful response, or an error if all strategies are exhausted. effectiveFormat is the API format being used (openai, anthropic, etc.) to determine auth strategy.
func (*AuthProber) Recall ¶
func (ap *AuthProber) Recall(providerID, memoryKey string) (AuthStrategy, bool)
Recall returns the cached winning strategy, if any.
func (*AuthProber) Remember ¶
func (ap *AuthProber) Remember(providerID, memoryKey string, strategy AuthStrategy)
Remember caches the winning auth strategy for a provider+endpoint key.
func (*AuthProber) Strategies ¶
func (ap *AuthProber) Strategies(provider *providerpool.Provider, apiKey *providerpool.APIKey, effectiveFormat providerpool.APIFormat) []AuthStrategy
Strategies returns an ordered list of auth strategies to try. The cached winner (if any) is placed first for zero-latency happy path. effectiveFormat is the API format being used for this request (may differ from provider.APIFormat).
type AuthStrategy ¶
type AuthStrategy int
AuthStrategy represents an authentication method to try against an upstream provider.
const ( AuthBearer AuthStrategy = iota // Authorization: Bearer <key> AuthXAPIKey // x-api-key: <key> AuthAnthropic // x-api-key + anthropic-version header AuthNone // No auth header )
func (AuthStrategy) String ¶
func (s AuthStrategy) String() string
type Authenticator ¶
type Authenticator struct {
// contains filtered or unexported fields
}
Authenticator handles authentication and rate limiting
func NewAuthenticator ¶
func NewAuthenticator(authConfig *AuthConfig, rateLimitConfig *RateLimitConfig) *Authenticator
NewAuthenticator creates a new authenticator
func (*Authenticator) AddAPIKey ¶
func (a *Authenticator) AddAPIKey(key string)
AddAPIKey adds an API key
func (*Authenticator) AddAllowedIP ¶
func (a *Authenticator) AddAllowedIP(ip string)
AddAllowedIP adds an allowed IP
func (*Authenticator) Authenticate ¶
func (a *Authenticator) Authenticate(r *http.Request) (bool, string)
Authenticate checks if a request is authenticated
func (*Authenticator) CheckRateLimit ¶
func (a *Authenticator) CheckRateLimit(r *http.Request) (bool, string)
CheckRateLimit checks if a request is within rate limits
func (*Authenticator) CleanupRateLimits ¶
func (a *Authenticator) CleanupRateLimits()
CleanupRateLimits removes stale rate limit entries
func (*Authenticator) ListAPIKeys ¶
func (a *Authenticator) ListAPIKeys() []map[string]interface{}
ListAPIKeys returns all API keys (masked for security)
func (*Authenticator) RemoveAPIKey ¶
func (a *Authenticator) RemoveAPIKey(key string)
RemoveAPIKey removes an API key
func (*Authenticator) Stats ¶
func (a *Authenticator) Stats() map[string]interface{}
Stats returns authenticator statistics
func (*Authenticator) ValidateAPIKey ¶
func (a *Authenticator) ValidateAPIKey(key string) bool
ValidateAPIKey checks if an API key is valid
type BreakerSnapshot ¶
type BreakerSnapshot struct {
Name string
State string
Failures int
Successes int
LastFailureTime time.Time
LastStateChange time.Time
}
BreakerSnapshot holds persisted circuit breaker state for a single provider.
type BufferPool ¶
type BufferPool struct {
// contains filtered or unexported fields
}
BufferPool provides reusable byte buffers to reduce GC pressure
func NewBufferPool ¶
func NewBufferPool(config *PerformanceConfig) *BufferPool
NewBufferPool creates a new buffer pool
func (*BufferPool) Get ¶
func (bp *BufferPool) Get() *bytes.Buffer
Get retrieves a buffer from the pool
func (*BufferPool) Put ¶
func (bp *BufferPool) Put(buf *bytes.Buffer)
Put returns a buffer to the pool
func (*BufferPool) Stats ¶
func (bp *BufferPool) Stats() map[string]interface{}
Stats returns buffer pool statistics
type CacheEntry ¶
type CacheEntry struct {
Key string
Value []byte
StatusCode int
Headers map[string]string
CreatedAt time.Time
ExpiresAt time.Time
HitCount int64
}
CacheEntry represents a cached response
type ChatMessage ¶
type ChatMessage struct {
Role string `json:"role"`
Content interface{} `json:"content"`
}
ChatMessage represents a chat message
type ChatRequest ¶
type ChatRequest struct {
Model string `json:"model"`
Messages []ChatMessage `json:"messages"`
System string `json:"system,omitempty"`
Tools []Tool `json:"tools,omitempty"`
Stream bool `json:"stream,omitempty"`
MaxTokens int `json:"max_tokens,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
}
ChatRequest represents a chat completion request
func (*ChatRequest) HasImages ¶
func (r *ChatRequest) HasImages() bool
HasImages checks if request contains images
func (*ChatRequest) TokenCount ¶
func (r *ChatRequest) TokenCount() int
TokenCount estimates token count (simplified)
type ConfigReloader ¶
type ConfigReloader struct {
// contains filtered or unexported fields
}
ConfigReloader handles applying configuration changes
func NewConfigReloader ¶
func NewConfigReloader( router *Router, failover *FailoverHandler, guard *PromptGuard, auth *Authenticator, mock *MockHandler, compat *ModelCompatLayer, ) *ConfigReloader
NewConfigReloader creates a new config reloader
func (*ConfigReloader) ApplyConfig ¶
func (cr *ConfigReloader) ApplyConfig(config *ProxyConfig) error
ApplyConfig applies a new configuration
type ConfigWatcher ¶
type ConfigWatcher struct {
OnChange func(oldConfig, newConfig *ProxyConfig)
OnError func(error)
// contains filtered or unexported fields
}
ConfigWatcher watches for configuration changes
func NewConfigWatcher ¶
func NewConfigWatcher(configPath string, pollInterval time.Duration) *ConfigWatcher
NewConfigWatcher creates a new config watcher
func (*ConfigWatcher) ForceReload ¶
func (cw *ConfigWatcher) ForceReload() error
ForceReload forces a configuration reload
func (*ConfigWatcher) GetConfigPath ¶
func (cw *ConfigWatcher) GetConfigPath() string
GetConfigPath returns the config file path
func (*ConfigWatcher) SetConfigPath ¶
func (cw *ConfigWatcher) SetConfigPath(path string)
SetConfigPath sets a new config file path
func (*ConfigWatcher) Start ¶
func (cw *ConfigWatcher) Start()
Start starts watching for config changes
func (*ConfigWatcher) Stats ¶
func (cw *ConfigWatcher) Stats() map[string]interface{}
Stats returns watcher statistics
type ConnWarmup ¶
type ConnWarmup struct {
// contains filtered or unexported fields
}
ConnWarmup pre-establishes TCP+TLS connections to provider endpoints so the first real request doesn't pay the handshake cost.
func NewConnWarmup ¶
func NewConnWarmup(pool *ConnectionPool) *ConnWarmup
NewConnWarmup creates a new connection warmup manager.
func (*ConnWarmup) DNSCache ¶
func (cw *ConnWarmup) DNSCache() *DNSCache
DNSCache returns the DNS cache for use by the transport dialer.
func (*ConnWarmup) WarmOne ¶
func (cw *ConnWarmup) WarmOne(baseURL string)
WarmOne pre-establishes a TCP+TLS connection to a single provider base URL. Safe to call from warmup paths to pre-warm a likely provider.
func (*ConnWarmup) WarmProviders ¶
func (cw *ConnWarmup) WarmProviders(baseURLs []string)
WarmProviders pre-connects to all provider base URLs concurrently. Called once at startup after providers are loaded.
type ConnectionConfig ¶
type ConnectionConfig struct {
MaxIdleConns int `json:"max_idle_conns" yaml:"max_idle_conns"`
MaxIdleConnsPerHost int `json:"max_idle_conns_per_host" yaml:"max_idle_conns_per_host"`
MaxConnsPerHost int `json:"max_conns_per_host" yaml:"max_conns_per_host"`
IdleConnTimeout time.Duration `json:"idle_conn_timeout" yaml:"idle_conn_timeout"`
KeepAlive bool `json:"keep_alive" yaml:"keep_alive"`
KeepAliveInterval time.Duration `json:"keep_alive_interval" yaml:"keep_alive_interval"`
DialTimeout time.Duration `json:"dial_timeout" yaml:"dial_timeout"`
TLSHandshakeTimeout time.Duration `json:"tls_handshake_timeout" yaml:"tls_handshake_timeout"`
ResponseHeaderTimeout time.Duration `json:"response_header_timeout" yaml:"response_header_timeout"`
ForceHTTP2 bool `json:"force_http2" yaml:"force_http2"`
}
ConnectionConfig connection pool configuration
func DefaultConnectionConfig ¶
func DefaultConnectionConfig() *ConnectionConfig
DefaultConnectionConfig returns default connection configuration
type ConnectionMetrics ¶
type ConnectionMetrics struct {
// contains filtered or unexported fields
}
ConnectionMetrics tracks connection pool performance
func NewConnectionMetrics ¶
func NewConnectionMetrics(config *PerformanceConfig) *ConnectionMetrics
NewConnectionMetrics creates a new connection metrics tracker
func (*ConnectionMetrics) RecordConnectionClosed ¶
func (cm *ConnectionMetrics) RecordConnectionClosed()
RecordConnectionClosed records a closed connection
func (*ConnectionMetrics) RecordConnectionError ¶
func (cm *ConnectionMetrics) RecordConnectionError()
RecordConnectionError records a connection error
func (*ConnectionMetrics) RecordConnectionOpened ¶
func (cm *ConnectionMetrics) RecordConnectionOpened()
RecordConnectionOpened records a new connection
func (*ConnectionMetrics) RecordRequest ¶
func (cm *ConnectionMetrics) RecordRequest(provider string)
RecordRequest records a request start
func (*ConnectionMetrics) RecordRequestComplete ¶
func (cm *ConnectionMetrics) RecordRequestComplete(provider string, success bool, latencyNs int64, reused bool)
RecordRequestComplete records a request completion
func (*ConnectionMetrics) Stats ¶
func (cm *ConnectionMetrics) Stats() map[string]interface{}
Stats returns connection metrics
type ConnectionPool ¶
type ConnectionPool struct {
// contains filtered or unexported fields
}
ConnectionPool manages HTTP connections to upstream providers
func NewConnectionPool ¶
func NewConnectionPool(config *ConnectionConfig) *ConnectionPool
NewConnectionPool creates a new connection pool
func (*ConnectionPool) CloseIdleConnectionsForProfile ¶
func (cp *ConnectionPool) CloseIdleConnectionsForProfile(profile ConnectionProfile)
CloseIdleConnectionsForProfile closes idle connections for both secure and insecure transports of a profile.
func (*ConnectionPool) GetClient ¶
func (cp *ConnectionPool) GetClient(provider string, profiles ...ConnectionProfile) *http.Client
GetClient returns an HTTP client for the given provider
func (*ConnectionPool) GetInsecureClient ¶
func (cp *ConnectionPool) GetInsecureClient(provider string, profiles ...ConnectionProfile) *http.Client
GetInsecureClient returns an HTTP client that skips TLS verification
func (*ConnectionPool) GetTransport ¶
func (cp *ConnectionPool) GetTransport() *http.Transport
GetTransport returns the underlying transport
func (*ConnectionPool) GetTransportForProfile ¶
func (cp *ConnectionPool) GetTransportForProfile(profile ConnectionProfile) *http.Transport
GetTransportForProfile returns the transport for the given profile.
func (*ConnectionPool) Stats ¶
func (cp *ConnectionPool) Stats() map[string]interface{}
Stats returns connection pool statistics
type ConnectionProfile ¶
type ConnectionProfile string
const ( ConnectionProfileLong ConnectionProfile = "long" ConnectionProfileProbe ConnectionProfile = "probe" )
type DNSCache ¶
type DNSCache struct {
// contains filtered or unexported fields
}
DNSCache caches DNS lookups to avoid repeated resolution on the hot path.
func NewDNSCache ¶
NewDNSCache creates a DNS cache with the given TTL and max entries.
type DataMasker ¶
type DataMasker struct {
// contains filtered or unexported fields
}
DataMasker handles data masking (stub implementation)
func NewDataMasker ¶
func NewDataMasker(config *MaskingConfig) *DataMasker
NewDataMasker creates a new data masker
func (*DataMasker) AddRule ¶
func (dm *DataMasker) AddRule(rule *MaskingRule) error
AddRule adds a masking rule
func (*DataMasker) GetRule ¶
func (dm *DataMasker) GetRule(id string) (*MaskingRule, bool)
GetRule returns a masking rule by ID
func (*DataMasker) IsEnabled ¶
func (dm *DataMasker) IsEnabled() bool
IsEnabled returns whether masking is enabled
func (*DataMasker) ListRules ¶
func (dm *DataMasker) ListRules() []*MaskingRule
ListRules returns all masking rules
func (*DataMasker) Mask ¶
func (dm *DataMasker) Mask(content string, direction MaskingDirection) string
Mask masks sensitive data based on direction
func (*DataMasker) MaskBytes ¶
func (dm *DataMasker) MaskBytes(content []byte, direction MaskingDirection) []byte
MaskBytes masks sensitive data in byte content, avoiding []byte→string→[]byte round-trips. Uses regexp.ReplaceAll which operates on []byte directly.
func (*DataMasker) MaskRequest ¶
func (dm *DataMasker) MaskRequest(content string) string
MaskRequest masks sensitive data in request
func (*DataMasker) MaskRequestBytes ¶
func (dm *DataMasker) MaskRequestBytes(content []byte) []byte
MaskRequestBytes masks sensitive data in request body bytes.
func (*DataMasker) MaskResponse ¶
func (dm *DataMasker) MaskResponse(content string) string
MaskResponse masks sensitive data in response
func (*DataMasker) MaskResponseBytes ¶
func (dm *DataMasker) MaskResponseBytes(content []byte) []byte
MaskResponseBytes masks sensitive data in response body bytes.
func (*DataMasker) RemoveRule ¶
func (dm *DataMasker) RemoveRule(id string) bool
RemoveRule removes a masking rule
func (*DataMasker) SetEnabled ¶
func (dm *DataMasker) SetEnabled(enabled bool)
SetEnabled enables or disables masking globally
func (*DataMasker) SetLocaleFunc ¶
func (dm *DataMasker) SetLocaleFunc(f func() string)
SetLocaleFunc sets the function used to resolve the current locale for i18n replacement labels.
func (*DataMasker) SetRuleEnabled ¶
func (dm *DataMasker) SetRuleEnabled(id string, enabled bool) bool
SetRuleEnabled enables or disables a rule
func (*DataMasker) Stats ¶
func (dm *DataMasker) Stats() map[string]interface{}
Stats returns masker statistics
type ErrorCategory ¶
type ErrorCategory string
ErrorCategory classifies API errors for failover decisions
const ( // ErrorCategoryNonRetryable - do not failover or retry ErrorCategoryNonRetryable ErrorCategory = "non_retryable" // ErrorCategoryRetryable - retry with same provider (transient errors) ErrorCategoryRetryable ErrorCategory = "retryable" // ErrorCategoryFailover - retry with different provider (provider-specific limits) ErrorCategoryFailover ErrorCategory = "failover" // ErrorCategoryStreamAnomaly - force stop and recover ErrorCategoryStreamAnomaly ErrorCategory = "stream_anomaly" )
type ErrorClassification ¶
type ErrorClassification struct {
Type RetryableErrorType `json:"type"`
Category ErrorCategory `json:"category"`
Message string `json:"message"`
Retryable bool `json:"retryable"`
ShouldFailover bool `json:"should_failover"`
SuggestedContextWindow int `json:"suggested_context_window,omitempty"`
RetryAfter time.Duration `json:"retry_after,omitempty"`
OriginalStatusCode int `json:"original_status_code"`
}
ErrorClassification result
type ErrorClassificationConfig ¶
type ErrorClassificationConfig struct {
Enabled bool `json:"enabled" yaml:"enabled"`
FailoverErrors []string `json:"failover_errors" yaml:"failover_errors"` // Error types that trigger failover
RetryableErrors []string `json:"retryable_errors" yaml:"retryable_errors"` // Error types that trigger retry
}
ErrorClassificationConfig configuration for error classification
type ErrorPattern ¶
type ErrorPattern struct {
Type RetryableErrorType
Category ErrorCategory
StatusCodes []int
MessagePatterns []*regexp.Regexp
}
ErrorPattern defines a pattern to match API errors
type FailoverAPIHandler ¶
type FailoverAPIHandler struct {
// contains filtered or unexported fields
}
FailoverAPIHandler provides HTTP handlers for failover management
func NewFailoverAPIHandler ¶
func NewFailoverAPIHandler(smartFailover *SmartFailoverHandler, config *FailoverConfig) *FailoverAPIHandler
NewFailoverAPIHandler creates a new failover API handler
func (*FailoverAPIHandler) GetCircuitBreakerStatus ¶
func (h *FailoverAPIHandler) GetCircuitBreakerStatus(c echo.Context) error
GetCircuitBreakerStatus returns status of all circuit breakers GET /api/v1/proxy/failover/breakers
func (*FailoverAPIHandler) GetConfig ¶
func (h *FailoverAPIHandler) GetConfig(c echo.Context) error
GetConfig returns failover configuration GET /api/v1/proxy/failover/config
func (*FailoverAPIHandler) GetMetrics ¶
func (h *FailoverAPIHandler) GetMetrics(c echo.Context) error
GetMetrics returns failover metrics GET /api/v1/proxy/failover/metrics
func (*FailoverAPIHandler) GetOverview ¶
func (h *FailoverAPIHandler) GetOverview(c echo.Context) error
GetOverview returns aggregated failover state for sparse dashboard/status surfaces. GET /api/v1/proxy/failover/overview
func (*FailoverAPIHandler) RegisterRoutes ¶
func (h *FailoverAPIHandler) RegisterRoutes(g *echo.Group)
RegisterRoutes registers failover API routes
func (*FailoverAPIHandler) ResetCircuitBreakers ¶
func (h *FailoverAPIHandler) ResetCircuitBreakers(c echo.Context) error
ResetCircuitBreakers resets all circuit breakers POST /api/v1/proxy/failover/reset
func (*FailoverAPIHandler) ServeHTTP ¶
func (h *FailoverAPIHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
ServeHTTP implements http.Handler for standalone use
func (*FailoverAPIHandler) SetOnConfigSave ¶
func (h *FailoverAPIHandler) SetOnConfigSave(fn func(*FailoverConfig) error)
SetOnConfigSave sets a callback invoked after config updates.
func (*FailoverAPIHandler) SetOnProviderRaceChange ¶
func (h *FailoverAPIHandler) SetOnProviderRaceChange(fn func(ProviderRaceConfig))
SetOnProviderRaceChange sets a callback for provider-race runtime updates.
func (*FailoverAPIHandler) SetProviderRaceStatsProvider ¶
func (h *FailoverAPIHandler) SetProviderRaceStatsProvider(fn func() ProviderRaceStatsSnapshot)
SetProviderRaceStatsProvider sets a callback for provider-race overview stats.
func (*FailoverAPIHandler) UpdateConfig ¶
func (h *FailoverAPIHandler) UpdateConfig(c echo.Context) error
UpdateConfig updates failover configuration. PUT /api/v1/proxy/failover/config
type FailoverConfig ¶
type FailoverConfig struct {
Enabled bool `json:"enabled" yaml:"enabled"`
MaxRetries int `json:"max_retries" yaml:"max_retries"`
RetryDelay time.Duration `json:"retry_delay" yaml:"retry_delay"`
CircuitBreaker bool `json:"circuit_breaker" yaml:"circuit_breaker"`
FailureThreshold int `json:"failure_threshold" yaml:"failure_threshold"`
RecoveryTimeout time.Duration `json:"recovery_timeout" yaml:"recovery_timeout"`
// Transient error (502/503) circuit breaker overrides — shorter recovery for temporary blips
TransientRecoveryTimeout time.Duration `json:"transient_recovery_timeout" yaml:"transient_recovery_timeout"` // 0 = use RecoveryTimeout
// Smart failover settings
ErrorClassification ErrorClassificationConfig `json:"error_classification" yaml:"error_classification"`
StreamingAnomaly StreamingAnomalyConfig `json:"streaming_anomaly" yaml:"streaming_anomaly"`
ContextWindowCheck bool `json:"context_window_check" yaml:"context_window_check"` // Skip providers with insufficient context window
QuotaCooldown time.Duration `json:"quota_cooldown" yaml:"quota_cooldown"` // Skip recently-errored providers for quota errors (0 = disabled)
ContextWindowOverride map[string]int `json:"context_window_override" yaml:"context_window_override"` // Provider name -> max context tokens override
ProviderRace ProviderRaceConfig `json:"provider_race" yaml:"provider_race"` // Multi-provider concurrent race
}
FailoverConfig failover configuration
type FailoverHandler ¶
type FailoverHandler struct {
// contains filtered or unexported fields
}
FailoverHandler handles request failover
func NewFailoverHandler ¶
func NewFailoverHandler(config *FailoverConfig, router *Router) *FailoverHandler
NewFailoverHandler creates a new failover handler
func (*FailoverHandler) Config ¶
func (fh *FailoverHandler) Config() *FailoverConfig
Config returns the failover configuration pointer.
func (*FailoverHandler) Execute ¶
func (fh *FailoverHandler) Execute( ctx context.Context, provider *Provider, fn func(*Provider) (*http.Response, error), ) (*http.Response, error)
Execute executes request with failover support
func (*FailoverHandler) GetBreakerState ¶
func (fh *FailoverHandler) GetBreakerState(name string) string
GetBreakerState returns the circuit breaker state for a provider
func (*FailoverHandler) GetBreakerStats ¶
func (fh *FailoverHandler) GetBreakerStats() map[string]interface{}
GetBreakerStats returns circuit breaker statistics
func (*FailoverHandler) LoadBreakerState ¶
func (fh *FailoverHandler) LoadBreakerState(snap BreakerSnapshot)
LoadBreakerState restores a circuit breaker's state from persisted data. Creates the breaker if it doesn't exist yet.
func (*FailoverHandler) ResetAllBreakers ¶
func (fh *FailoverHandler) ResetAllBreakers()
ResetAllBreakers resets all circuit breakers
func (*FailoverHandler) ResetBreaker ¶
func (fh *FailoverHandler) ResetBreaker(name string)
ResetBreaker resets a circuit breaker
func (*FailoverHandler) SnapshotBreakers ¶
func (fh *FailoverHandler) SnapshotBreakers() []BreakerSnapshot
SnapshotBreakers returns a snapshot of all circuit breaker states for persistence.
type FailoverMetrics ¶
type FailoverMetrics struct {
// Error counts by type
ErrorsByType map[RetryableErrorType]int64 `json:"errors_by_type"`
// Failover counts
FailoverTotal int64 `json:"failover_total"`
FailoverSuccess int64 `json:"failover_success"`
FailoverFailure int64 `json:"failover_failure"`
// Provider-specific stats
ProviderErrors map[string]map[RetryableErrorType]int64 `json:"provider_errors"`
ProviderFailovers map[string]int64 `json:"provider_failovers"`
// Streaming anomaly stats
StreamAnomalies int64 `json:"stream_anomalies"`
// contains filtered or unexported fields
}
FailoverMetrics tracks failover statistics
func NewFailoverMetrics ¶
func NewFailoverMetrics() *FailoverMetrics
NewFailoverMetrics creates new metrics tracker
func (*FailoverMetrics) GetStats ¶
func (m *FailoverMetrics) GetStats() map[string]interface{}
GetStats returns a copy of current stats
func (*FailoverMetrics) RecordError ¶
func (m *FailoverMetrics) RecordError(provider string, classification *ErrorClassification)
RecordError records an error occurrence
func (*FailoverMetrics) RecordFailover ¶
func (m *FailoverMetrics) RecordFailover(fromProvider, toProvider string, success bool)
RecordFailover records a failover event
func (*FailoverMetrics) RecordProviderPoolResult ¶
func (m *FailoverMetrics) RecordProviderPoolResult(result *providerpool.FailoverResult)
RecordProviderPoolResult maps ProviderPool failover callbacks into smart failover metrics.
func (*FailoverMetrics) RecordStreamAnomaly ¶
func (m *FailoverMetrics) RecordStreamAnomaly()
RecordStreamAnomaly records a streaming anomaly
type FailoverMetricsResponse ¶
type FailoverMetricsResponse struct {
ErrorsByType map[string]int64 `json:"errors_by_type"`
FailoverTotal int64 `json:"failover_total"`
FailoverSuccess int64 `json:"failover_success"`
FailoverFailure int64 `json:"failover_failure"`
ProviderErrors map[string]map[string]int64 `json:"provider_errors"`
ProviderFailovers map[string]int64 `json:"provider_failovers"`
StreamAnomalies int64 `json:"stream_anomalies"`
}
FailoverMetricsResponse is the response for metrics endpoint
type FailoverOverviewResponse ¶
type FailoverOverviewResponse struct {
Metrics FailoverMetricsResponse `json:"metrics"`
Config FailoverConfig `json:"config"`
ProviderRace ProviderRaceStatsSnapshot `json:"provider_race"`
CircuitBreakers map[string]FailoverCircuitBreakerStatusResponse `json:"circuit_breakers"`
}
type FailoverSnapshot ¶
type FailoverSnapshot struct {
Total int64 `json:"total"`
Success int64 `json:"success"`
Failure int64 `json:"failure"`
ByReason map[string]int64 `json:"by_reason"`
Recent []*failoverLogEntry `json:"recent"`
}
FailoverSnapshot contains failover statistics.
type FormatConverter ¶
type FormatConverter struct{}
FormatConverter handles conversion between OpenAI and Anthropic API formats
func NewFormatConverter ¶
func NewFormatConverter() *FormatConverter
NewFormatConverter creates a new format converter
func (*FormatConverter) ConvertModelsResponse ¶
func (fc *FormatConverter) ConvertModelsResponse(body []byte, sourceType ProviderType) ([]byte, error)
ConvertModelsResponse converts provider models list to OpenAI format
func (*FormatConverter) ConvertRequest ¶
func (fc *FormatConverter) ConvertRequest(body []byte, targetType ProviderType) ([]byte, string, error)
ConvertRequest converts OpenAI request to target provider format
func (*FormatConverter) ConvertRequestWithCaching ¶
func (fc *FormatConverter) ConvertRequestWithCaching(body []byte, targetType ProviderType, promptCacheEnabled bool) ([]byte, string, error)
ConvertRequestWithCaching converts OpenAI request to target provider format and optionally applies prompt caching in a single unmarshal/marshal pass. This eliminates the double unmarshal/marshal that ConvertRequest + InjectPromptCaching does.
func (*FormatConverter) ConvertResponse ¶
func (fc *FormatConverter) ConvertResponse(body []byte, sourceType ProviderType) ([]byte, error)
ConvertResponse converts provider response to OpenAI format
func (*FormatConverter) ConvertStreamingResponse ¶
func (fc *FormatConverter) ConvertStreamingResponse(reader io.Reader, sourceType ProviderType, writer http.ResponseWriter) error
ConvertStreamingResponse creates a streaming response converter
func (*FormatConverter) DetectProviderType ¶
func (fc *FormatConverter) DetectProviderType(endpoint string) ProviderType
DetectProviderType detects the provider type from endpoint URL
type GeminiCandidate ¶
type GeminiCandidate struct {
Content GeminiContent `json:"content"`
FinishReason string `json:"finishReason,omitempty"`
SafetyRatings []struct {
Category string `json:"category"`
Probability string `json:"probability"`
} `json:"safetyRatings,omitempty"`
}
type GeminiContent ¶
type GeminiContent struct {
Role string `json:"role,omitempty"`
Parts []GeminiPart `json:"parts"`
}
type GeminiFunctionCall ¶
type GeminiFunctionResponse ¶
type GeminiGenerationConfig ¶
type GeminiInlineData ¶
type GeminiModel ¶
type GeminiModel struct {
Name string `json:"name"`
Version string `json:"version,omitempty"`
DisplayName string `json:"displayName,omitempty"`
Description string `json:"description,omitempty"`
InputTokenLimit int `json:"inputTokenLimit,omitempty"`
OutputTokenLimit int `json:"outputTokenLimit,omitempty"`
SupportedGenerationMethods []string `json:"supportedGenerationMethods,omitempty"`
}
type GeminiModelsResponse ¶
type GeminiModelsResponse struct {
Models []GeminiModel `json:"models"`
}
GeminiModelsResponse represents Gemini models list response
type GeminiPart ¶
type GeminiPart struct {
Text string `json:"text,omitempty"`
InlineData *GeminiInlineData `json:"inlineData,omitempty"`
FunctionCall *GeminiFunctionCall `json:"functionCall,omitempty"`
FunctionResponse *GeminiFunctionResponse `json:"functionResponse,omitempty"`
}
type GeminiRequest ¶
type GeminiRequest struct {
Contents []GeminiContent `json:"contents"`
SystemInstruction *GeminiContent `json:"systemInstruction,omitempty"`
GenerationConfig *GeminiGenerationConfig `json:"generationConfig,omitempty"`
Tools []GeminiTool `json:"tools,omitempty"`
SafetySettings []GeminiSafetySetting `json:"safetySettings,omitempty"`
}
Gemini request/response types
type GeminiResponse ¶
type GeminiResponse struct {
Candidates []GeminiCandidate `json:"candidates"`
UsageMetadata *GeminiUsageMetadata `json:"usageMetadata,omitempty"`
ModelVersion string `json:"modelVersion,omitempty"`
}
type GeminiSafetySetting ¶
type GeminiStreamChunk ¶
type GeminiStreamChunk struct {
Candidates []GeminiCandidate `json:"candidates,omitempty"`
UsageMetadata *GeminiUsageMetadata `json:"usageMetadata,omitempty"`
}
type GeminiTool ¶
type GeminiTool struct {
FunctionDeclarations []GeminiFunctionDeclaration `json:"functionDeclarations,omitempty"`
}
type GeminiUsageMetadata ¶
type GuardConfig ¶
type GuardConfig struct {
Enabled bool `json:"enabled"`
BlockOnDetection bool `json:"block_on_detection"`
LogDetections bool `json:"log_detections"`
CustomPatterns []string `json:"custom_patterns"`
WhitelistPatterns []string `json:"whitelist_patterns"`
MaxPromptLength int `json:"max_prompt_length"`
}
GuardConfig holds prompt guard configuration
func DefaultGuardConfig ¶
func DefaultGuardConfig() *GuardConfig
DefaultGuardConfig returns default guard configuration
type GuardResult ¶
type GuardResult struct {
Blocked bool `json:"blocked"`
Reason string `json:"reason,omitempty"`
RiskLevel string `json:"risk_level"`
Matches []string `json:"matches,omitempty"`
Suggestions []string `json:"suggestions,omitempty"`
}
GuardResult represents the result of a prompt guard check
type GuardRule ¶
type GuardRule struct {
ID string `json:"id"`
Name string `json:"name"`
Pattern string `json:"pattern"`
Description string `json:"description,omitempty"`
Enabled bool `json:"enabled"`
RiskLevel string `json:"risk_level"` // low, medium, high
Action string `json:"action"` // log, warn, block
}
GuardRule represents a custom guard rule
type HealthCheckConfig ¶
type HealthCheckConfig struct {
Enabled bool `json:"enabled" yaml:"enabled"`
Interval time.Duration `json:"interval" yaml:"interval"`
Timeout time.Duration `json:"timeout" yaml:"timeout"`
}
HealthCheckConfig health check configuration
type HealthChecker ¶
type HealthChecker struct {
// contains filtered or unexported fields
}
HealthChecker performs health checks on providers
func NewHealthChecker ¶
func NewHealthChecker(router *Router, connPool *ConnectionPool, config *HealthCheckConfig) *HealthChecker
NewHealthChecker creates a new health checker
func (*HealthChecker) CheckNow ¶
func (hc *HealthChecker) CheckNow()
CheckNow performs an immediate health check on all providers
func (*HealthChecker) CheckProvider ¶
func (hc *HealthChecker) CheckProvider(name string) error
CheckProvider performs an immediate health check on a specific provider
type HotReloadableConfig ¶
type HotReloadableConfig struct {
// Can be hot-reloaded
Providers []*ProviderConfig `json:"providers"`
Failover *FailoverConfig `json:"failover"`
Guard *GuardConfig `json:"guard"`
Auth *AuthConfig `json:"auth"`
RateLimit *RateLimitConfig `json:"rate_limit"`
Mock *MockConfig `json:"mock"`
ModelCompat *ModelCompatConfig `json:"model_compat"`
}
HotReloadableConfig defines which config sections can be hot-reloaded
type MaskingCategory ¶
type MaskingCategory string
MaskingCategory reserved masking categories
const ( MaskingPII MaskingCategory = "pii" // Personal Identifiable Information MaskingCredentials MaskingCategory = "credentials" // API keys, passwords, tokens MaskingFinancial MaskingCategory = "financial" // Credit card numbers, bank accounts MaskingCustom MaskingCategory = "custom" // User-defined patterns )
type MaskingConfig ¶
type MaskingConfig struct {
Enabled bool `json:"enabled"`
Rules []*MaskingRule `json:"rules"`
OnMask func(ruleID, original, masked string)
}
MaskingConfig data masking configuration
func DefaultMaskingConfig ¶
func DefaultMaskingConfig() *MaskingConfig
DefaultMaskingConfig returns default masking configuration
type MaskingDirection ¶
type MaskingDirection string
MaskingDirection specifies when to apply masking
const ( MaskingRequest MaskingDirection = "request" MaskingResponse MaskingDirection = "response" MaskingBoth MaskingDirection = "both" )
type MaskingRule ¶
type MaskingRule struct {
ID string `json:"id"`
Name string `json:"name"`
Category MaskingCategory `json:"category"`
Pattern string `json:"pattern"` // Regex pattern
Replacement string `json:"replacement"` // e.g., "[REDACTED]", "***"
Direction MaskingDirection `json:"direction"` // request, response, both
Enabled bool `json:"enabled"`
}
MaskingRule defines what to mask
func GetDefaultRules ¶
func GetDefaultRules() []*MaskingRule
GetDefaultRules returns predefined masking rules. Replacement text uses {MASKED} placeholder — resolved at runtime via DataMasker.maskLabel().
type MetricsBucket ¶
type MetricsBucket struct {
StartTime time.Time `json:"start_time"`
EndTime time.Time `json:"end_time"`
RequestCount int64 `json:"request_count"`
SuccessCount int64 `json:"success_count"`
ErrorCount int64 `json:"error_count"`
TotalLatency int64 `json:"total_latency_ms"`
TotalTTFT int64 `json:"total_ttft_ms"`
TotalTokensIn int64 `json:"total_tokens_in"`
TotalTokensOut int64 `json:"total_tokens_out"`
TotalBytes int64 `json:"total_bytes"`
}
MetricsBucket holds aggregated metrics for a time period
type MetricsCollector ¶
type MetricsCollector struct {
// contains filtered or unexported fields
}
MetricsCollector collects and aggregates usage metrics
func NewMetricsCollector ¶
func NewMetricsCollector(config *MetricsConfig) *MetricsCollector
NewMetricsCollector creates a new metrics collector
func (*MetricsCollector) GetAllProviderMetrics ¶
func (mc *MetricsCollector) GetAllProviderMetrics() map[string]*ProviderMetrics
GetAllProviderMetrics returns metrics for all providers
func (*MetricsCollector) GetProviderMetrics ¶
func (mc *MetricsCollector) GetProviderMetrics(name string) (*ProviderMetrics, bool)
GetProviderMetrics returns metrics for a specific provider
func (*MetricsCollector) GetRecentRequests ¶
func (mc *MetricsCollector) GetRecentRequests(limit int) []RequestMetrics
GetRecentRequests returns recent request metrics
func (*MetricsCollector) GetTimeSeries ¶
func (mc *MetricsCollector) GetTimeSeries(start, end time.Time) []*MetricsBucket
GetTimeSeries returns time-series data for a time range
func (*MetricsCollector) LatencyStats ¶
func (mc *MetricsCollector) LatencyStats() map[string]interface{}
LatencyStats returns detailed latency statistics
func (*MetricsCollector) Record ¶
func (mc *MetricsCollector) Record(m RequestMetrics)
Record records a request metric
func (*MetricsCollector) Summary ¶
func (mc *MetricsCollector) Summary() map[string]interface{}
Summary returns a summary of all metrics
type MetricsConfig ¶
type MetricsConfig struct {
Enabled bool `json:"enabled"`
RetentionPeriod time.Duration `json:"retention_period"`
BucketSize time.Duration `json:"bucket_size"`
}
MetricsConfig holds metrics configuration
func DefaultMetricsConfig ¶
func DefaultMetricsConfig() *MetricsConfig
DefaultMetricsConfig returns default metrics configuration
type MiddlewareFunc ¶
MiddlewareFunc is a function that wraps an http.Handler
type MockConfig ¶
type MockConfig struct {
Enabled bool `json:"enabled"`
Endpoints []*MockEndpoint `json:"endpoints"`
}
MockConfig mock endpoint configuration
func DefaultMockConfig ¶
func DefaultMockConfig() *MockConfig
DefaultMockConfig returns default mock configuration
type MockEndpoint ¶
type MockEndpoint struct {
Path string `json:"path"`
Method string `json:"method"`
Response interface{} `json:"response"`
StatusCode int `json:"status_code"`
Delay time.Duration `json:"delay"`
Enabled bool `json:"enabled"`
Headers http.Header `json:"headers,omitempty"`
}
MockEndpoint defines a mock endpoint
type MockHandler ¶
type MockHandler struct {
// contains filtered or unexported fields
}
MockHandler handles mock endpoints
func NewMockHandler ¶
func NewMockHandler(config *MockConfig) *MockHandler
NewMockHandler creates a new mock handler
func (*MockHandler) AddEndpoint ¶
func (mh *MockHandler) AddEndpoint(ep *MockEndpoint)
AddEndpoint adds a mock endpoint
func (*MockHandler) GetEndpoint ¶
func (mh *MockHandler) GetEndpoint(path, method string) (*MockEndpoint, bool)
GetEndpoint returns a mock endpoint
func (*MockHandler) Handle ¶
func (mh *MockHandler) Handle(w http.ResponseWriter, r *http.Request) bool
Handle checks if request matches a mock endpoint
func (*MockHandler) IsEnabled ¶
func (mh *MockHandler) IsEnabled() bool
IsEnabled returns whether mock handling is enabled
func (*MockHandler) ListEndpoints ¶
func (mh *MockHandler) ListEndpoints() []*MockEndpoint
ListEndpoints returns all mock endpoints
func (*MockHandler) RemoveEndpoint ¶
func (mh *MockHandler) RemoveEndpoint(path, method string) bool
RemoveEndpoint removes a mock endpoint
func (*MockHandler) SetEnabled ¶
func (mh *MockHandler) SetEnabled(path, method string, enabled bool) bool
SetEnabled enables or disables a mock endpoint
func (*MockHandler) SetGlobalEnabled ¶
func (mh *MockHandler) SetGlobalEnabled(enabled bool)
SetGlobalEnabled enables or disables all mock endpoints
func (*MockHandler) Stats ¶
func (mh *MockHandler) Stats() map[string]interface{}
Stats returns mock handler statistics
type ModelCompatConfig ¶
type ModelCompatConfig struct {
AutoDetect bool `json:"auto_detect"`
DetectionCache time.Duration `json:"detection_cache"`
ModelOverrides map[string]*ModelFeatures `json:"model_overrides"`
ToolCallFallback string `json:"tool_call_fallback"` // error, prompt, skip
}
ModelCompatConfig model compatibility configuration
func DefaultModelCompatConfig ¶
func DefaultModelCompatConfig() *ModelCompatConfig
DefaultModelCompatConfig returns default model compatibility config
type ModelCompatLayer ¶
type ModelCompatLayer struct {
// contains filtered or unexported fields
}
ModelCompatLayer handles model compatibility
func NewModelCompatLayer ¶
func NewModelCompatLayer(config *ModelCompatConfig) *ModelCompatLayer
NewModelCompatLayer creates a new compatibility layer
func (*ModelCompatLayer) AdaptRequest ¶
func (mcl *ModelCompatLayer) AdaptRequest(model string, req *ChatRequest) (*ChatRequest, error)
AdaptRequest adapts request for model compatibility
func (*ModelCompatLayer) GetAllFeatures ¶
func (mcl *ModelCompatLayer) GetAllFeatures() map[string]*ModelFeatures
GetAllFeatures returns all model features as a map
func (*ModelCompatLayer) GetFeatures ¶
func (mcl *ModelCompatLayer) GetFeatures(model string) *ModelFeatures
GetFeatures returns features for a model
func (*ModelCompatLayer) ListModels ¶
func (mcl *ModelCompatLayer) ListModels() []*ModelFeatures
ListModels returns all known models
func (*ModelCompatLayer) SetFeatures ¶
func (mcl *ModelCompatLayer) SetFeatures(model string, features *ModelFeatures)
SetFeatures sets features for a model
func (*ModelCompatLayer) Stats ¶
func (mcl *ModelCompatLayer) Stats() map[string]interface{}
Stats returns compatibility layer statistics
type ModelFamily ¶
type ModelFamily struct {
Name string `json:"name" yaml:"name"` // e.g., "claude-3", "gpt-4", "gemini-pro"
Patterns []string `json:"patterns" yaml:"patterns"` // Regex patterns to match model IDs
Provider string `json:"provider" yaml:"provider"` // Target provider for this family
Fallback string `json:"fallback" yaml:"fallback"` // Fallback model for background tasks
}
ModelFamily represents a model series/family
type ModelFeatures ¶
type ModelFeatures struct {
Model string `json:"model"`
ToolCalling bool `json:"tool_calling"`
Vision bool `json:"vision"`
Streaming bool `json:"streaming"`
SystemPrompt bool `json:"system_prompt"`
MaxContextTokens int `json:"max_context_tokens"`
MaxOutputTokens int `json:"max_output_tokens"`
}
ModelFeatures describes model capabilities
type ModelOrigin ¶
type ModelOrigin string
ModelOrigin indicates where a model runs.
const ( OriginCloud ModelOrigin = "cloud" OriginEdge ModelOrigin = "edge" OriginLocal ModelOrigin = "local" )
type ModelPricing ¶
type ModelPricing struct {
Input float64 `yaml:"input" json:"input"`
Output float64 `yaml:"output" json:"output"`
}
ModelPricing holds per-model token pricing (USD per 1M tokens).
type ModelRoute ¶
type ModelRoute struct {
OriginalModel string `json:"original_model"`
TargetModel string `json:"target_model"`
Provider string `json:"provider"`
Family string `json:"family,omitempty"`
RuleApplied string `json:"rule_applied,omitempty"`
Downgraded bool `json:"downgraded,omitempty"`
}
ModelRoute represents the routing decision
type ModelRouter ¶
type ModelRouter struct {
// contains filtered or unexported fields
}
ModelRouter handles intelligent model-based routing
func NewModelRouter ¶
func NewModelRouter(config *ModelRouterConfig) (*ModelRouter, error)
NewModelRouter creates a new model router
func (*ModelRouter) AddRule ¶
func (mr *ModelRouter) AddRule(rule *RegexRule) error
AddRule adds a new regex rule
func (*ModelRouter) GetFamilies ¶
func (mr *ModelRouter) GetFamilies() []*ModelFamily
GetFamilies returns all configured model families
func (*ModelRouter) GetRules ¶
func (mr *ModelRouter) GetRules() []*RegexRule
GetRules returns all configured regex rules
func (*ModelRouter) IsBackgroundRequest ¶
func (mr *ModelRouter) IsBackgroundRequest(r *http.Request) bool
IsBackgroundRequest detects if request is a background task (e.g., title generation)
func (*ModelRouter) RemoveRule ¶
func (mr *ModelRouter) RemoveRule(pattern string) bool
RemoveRule removes a regex rule by pattern
func (*ModelRouter) RouteModel ¶
func (mr *ModelRouter) RouteModel(requestedModel string, isBackground bool) (*ModelRoute, error)
RouteModel determines the target provider and model for a request
func (*ModelRouter) SetTierResolver ¶
func (mr *ModelRouter) SetTierResolver(tr *TierResolver)
SetTierResolver sets the tier resolver for dynamic background downgrade.
func (*ModelRouter) Stats ¶
func (mr *ModelRouter) Stats() map[string]interface{}
Stats returns model router statistics
type ModelRouterConfig ¶
type ModelRouterConfig struct {
Enabled bool `json:"enabled" yaml:"enabled"`
Families []*ModelFamily `json:"families" yaml:"families"`
BackgroundModels []string `json:"background_models" yaml:"background_models"` // Models for background tasks
DefaultFamily string `json:"default_family" yaml:"default_family"`
RegexCustomRules []*RegexRule `json:"regex_rules" yaml:"regex_rules"` // Expert-level regex rules
}
ModelRouterConfig configuration for model routing
func DefaultModelRouterConfig ¶
func DefaultModelRouterConfig() *ModelRouterConfig
DefaultModelRouterConfig returns default model router configuration. Families and BackgroundModels are intentionally empty — the TierResolver dynamically handles model classification based on actual pricing data. Users can still add custom RegexCustomRules for explicit overrides.
type OAuthTokenProvider ¶
type OAuthTokenProvider interface {
GetAccessToken(providerID string) (string, error)
GetCopilotAccessToken(providerID string) (string, error)
GetCopilotEndpoint() string
}
OAuthTokenProvider provides OAuth access tokens for providers.
type OpenAIChatRequest ¶
type OpenAIChatRequest struct {
Model string `json:"model"`
Messages []OpenAIMessage `json:"messages"`
MaxTokens int `json:"max_tokens,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
TopP float64 `json:"top_p,omitempty"`
Stream bool `json:"stream,omitempty"`
Stop []string `json:"stop,omitempty"`
PresencePenalty float64 `json:"presence_penalty,omitempty"`
FrequencyPenalty float64 `json:"frequency_penalty,omitempty"`
Tools []OpenAITool `json:"tools,omitempty"`
ToolChoice interface{} `json:"tool_choice,omitempty"`
ResponseFormat map[string]interface{} `json:"response_format,omitempty"`
}
OpenAI request/response types
type OpenAIChatResponse ¶
type OpenAIChatResponse struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created"`
Model string `json:"model"`
Choices []struct {
Index int `json:"index"`
Message OpenAIMessage `json:"message"`
FinishReason string `json:"finish_reason"`
} `json:"choices"`
Usage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
} `json:"usage"`
}
type OpenAIMessage ¶
type OpenAIMessage struct {
Role string `json:"role"`
Content interface{} `json:"content"` // string or []ContentPart
Name string `json:"name,omitempty"`
ToolCalls []OpenAIToolCall `json:"tool_calls,omitempty"`
ToolCallID string `json:"tool_call_id,omitempty"`
}
type OpenAIStreamChunk ¶
type OpenAIStreamChunk struct {
ID string `json:"id"`
Object string `json:"object"`
Created int64 `json:"created,omitempty"`
Model string `json:"model,omitempty"`
Choices []StreamChunkChoice `json:"choices"`
Usage *StreamChunkUsage `json:"usage,omitempty"`
}
type OpenAITool ¶
type OpenAITool struct {
Type string `json:"type"`
Function OpenAIToolFunction `json:"function"`
Strict bool `json:"strict,omitempty"`
}
type OpenAIToolCall ¶
type OpenAIToolCall struct {
ID string `json:"id"`
CallID string `json:"call_id,omitempty"`
Type string `json:"type"`
Function OpenAIToolCallFunc `json:"function"`
}
type OpenAIToolCallFunc ¶
OpenAIToolCallFunc is the function part of an OpenAI tool call. Arguments is normally a JSON string, but some providers send it as a JSON object.
func (*OpenAIToolCallFunc) UnmarshalJSON ¶
func (f *OpenAIToolCallFunc) UnmarshalJSON(data []byte) error
UnmarshalJSON handles Arguments being either a JSON string or a JSON object.
type OpenAIToolFunction ¶
type OriginRegistry ¶
type OriginRegistry struct {
// contains filtered or unexported fields
}
OriginRegistry resolves model IDs to their origin (cloud/edge/local) using glob patterns with an LRU cache for repeated lookups.
func NewOriginRegistry ¶
func NewOriginRegistry(patterns map[string]string) *OriginRegistry
NewOriginRegistry creates a registry from a map of glob→origin strings.
func (*OriginRegistry) Resolve ¶
func (r *OriginRegistry) Resolve(model string) ModelOrigin
Resolve returns the origin for a model ID. Defaults to OriginCloud. Results are cached after first lookup.
type PerformanceConfig ¶
type PerformanceConfig struct {
// Buffer pool settings
BufferPoolEnabled bool `json:"buffer_pool_enabled"`
BufferPoolSize int `json:"buffer_pool_size"` // Number of buffers
BufferInitialSize int `json:"buffer_initial_size"` // Initial buffer size in bytes
BufferMaxSize int `json:"buffer_max_size"` // Max buffer size in bytes
// Response cache settings
CacheEnabled bool `json:"cache_enabled"`
CacheMaxSize int `json:"cache_max_size"` // Max cache entries
CacheMaxEntrySize int `json:"cache_max_entry_size"` // Max size per entry in bytes
CacheTTL time.Duration `json:"cache_ttl"`
CacheableStatusCodes []int `json:"cacheable_status_codes"`
// Connection metrics
MetricsEnabled bool `json:"metrics_enabled"`
MetricsInterval time.Duration `json:"metrics_interval"`
}
PerformanceConfig holds performance optimization settings
func DefaultPerformanceConfig ¶
func DefaultPerformanceConfig() *PerformanceConfig
DefaultPerformanceConfig returns default performance configuration
type PerformanceManager ¶
type PerformanceManager struct {
// contains filtered or unexported fields
}
PerformanceManager coordinates all performance optimizations
func NewPerformanceManager ¶
func NewPerformanceManager(config *PerformanceConfig) *PerformanceManager
NewPerformanceManager creates a new performance manager
func (*PerformanceManager) GetBufferPool ¶
func (pm *PerformanceManager) GetBufferPool() *BufferPool
GetBufferPool returns the buffer pool
func (*PerformanceManager) GetCache ¶
func (pm *PerformanceManager) GetCache() *ResponseCache
GetCache returns the response cache
func (*PerformanceManager) GetConnectionMetrics ¶
func (pm *PerformanceManager) GetConnectionMetrics() *ConnectionMetrics
GetConnectionMetrics returns the connection metrics
func (*PerformanceManager) Stats ¶
func (pm *PerformanceManager) Stats() map[string]interface{}
Stats returns all performance statistics
type Pipeline ¶
type Pipeline struct {
// contains filtered or unexported fields
}
Pipeline represents a request processing pipeline
func NewPipeline ¶
func NewPipeline(config *PipelineConfig) *Pipeline
NewPipeline creates a new request pipeline
func (*Pipeline) GetAuth ¶
func (p *Pipeline) GetAuth() *Authenticator
GetAuth returns the authenticator
func (*Pipeline) GetGuard ¶
func (p *Pipeline) GetGuard() *PromptGuard
GetGuard returns the prompt guard
func (*Pipeline) GetMetrics ¶
func (p *Pipeline) GetMetrics() *MetricsCollector
GetMetrics returns the metrics collector
func (*Pipeline) GetSessionMonitor ¶
func (p *Pipeline) GetSessionMonitor() *SessionMonitor
GetSessionMonitor returns the session monitor
func (*Pipeline) Use ¶
func (p *Pipeline) Use(mw MiddlewareFunc)
Use adds a middleware to the pipeline
type PipelineConfig ¶
type PipelineConfig struct {
AuthConfig *AuthConfig `json:"auth"`
RateLimitConfig *RateLimitConfig `json:"rate_limit"`
GuardConfig *GuardConfig `json:"guard"`
SessionConfig *SessionConfig `json:"session"`
MetricsConfig *MetricsConfig `json:"metrics"`
}
PipelineConfig holds pipeline configuration
func DefaultPipelineConfig ¶
func DefaultPipelineConfig() *PipelineConfig
DefaultPipelineConfig returns default pipeline configuration
type PipelineSnapshot ¶
type PipelineSnapshot struct {
Routing RoutingStatsSnapshot `json:"routing"`
Failover FailoverSnapshot `json:"failover"`
}
PipelineSnapshot is the combined stats snapshot returned by the API.
type PipelineStatsCollector ¶
type PipelineStatsCollector struct {
// contains filtered or unexported fields
}
PipelineStatsCollector collects and batch-persists proxy pipeline statistics (routing, failover) asynchronously.
func NewPipelineStatsCollector ¶
func NewPipelineStatsCollector(db *sql.DB, routingStats *RoutingStats) *PipelineStatsCollector
NewPipelineStatsCollector creates a new collector. Call Start() to begin background persistence.
func NewPipelineStatsCollectorWithReadDB ¶
func NewPipelineStatsCollectorWithReadDB(writeDB, readDB *sql.DB, routingStats *RoutingStats) *PipelineStatsCollector
NewPipelineStatsCollectorWithReadDB creates a new collector with separate write and read database handles.
func (*PipelineStatsCollector) Close ¶
func (c *PipelineStatsCollector) Close() error
Close implements io.Closer for lifecycle shutdown hooks.
func (*PipelineStatsCollector) LoadBreakerState ¶
func (c *PipelineStatsCollector) LoadBreakerState()
LoadBreakerState restores circuit breaker state from DB. Must be called after SetFailoverHandler.
func (*PipelineStatsCollector) LoadSmartMetrics ¶
func (c *PipelineStatsCollector) LoadSmartMetrics()
LoadSmartMetrics restores smart failover metrics from DB. Must be called after SetSmartFailoverMetrics.
func (*PipelineStatsCollector) OnFailover ¶
func (c *PipelineStatsCollector) OnFailover(result *providerpool.FailoverResult)
func (*PipelineStatsCollector) SetFailoverHandler ¶
func (c *PipelineStatsCollector) SetFailoverHandler(fh *FailoverHandler)
SetFailoverHandler sets the failover handler reference for circuit breaker persistence.
func (*PipelineStatsCollector) SetSmartFailoverMetrics ¶
func (c *PipelineStatsCollector) SetSmartFailoverMetrics(m *FailoverMetrics)
SetSmartFailoverMetrics sets the smart failover metrics reference for persistence.
func (*PipelineStatsCollector) Snapshot ¶
func (c *PipelineStatsCollector) Snapshot() PipelineSnapshot
Snapshot returns a combined pipeline stats snapshot.
func (*PipelineStatsCollector) Start ¶
func (c *PipelineStatsCollector) Start()
Start begins the background flush goroutine.
func (*PipelineStatsCollector) Stop ¶
func (c *PipelineStatsCollector) Stop()
Stop flushes remaining data and stops the background goroutine.
type PortAllocator ¶
type PortAllocator struct {
// contains filtered or unexported fields
}
PortAllocator handles dynamic port allocation
func NewPortAllocator ¶
func NewPortAllocator(config *PortConfig) *PortAllocator
NewPortAllocator creates a new port allocator
func (*PortAllocator) Allocate ¶
func (pa *PortAllocator) Allocate() (int, error)
Allocate finds and binds to an available port
func (*PortAllocator) GetBindAddress ¶
func (pa *PortAllocator) GetBindAddress() string
GetBindAddress returns the bind address with port
func (*PortAllocator) GetEndpoint ¶
func (pa *PortAllocator) GetEndpoint() string
GetEndpoint returns the full proxy endpoint URL
func (*PortAllocator) GetPort ¶
func (pa *PortAllocator) GetPort() int
GetPort returns the currently allocated port
func (*PortAllocator) Release ¶
func (pa *PortAllocator) Release() error
Release releases the allocated port
type PortConfig ¶
type PortConfig struct {
Value int `json:"value" yaml:"value"` // 0 = dynamic allocation
Range string `json:"range" yaml:"range"` // e.g., "9000-9100"
BindAddress string `json:"bind_address" yaml:"bind_address"` // Default: "127.0.0.1"
PortFile string `json:"port_file" yaml:"port_file"` // Write allocated port to file
}
PortConfig port configuration
type PromptCacheBreakDetector ¶
type PromptCacheBreakDetector struct {
// contains filtered or unexported fields
}
func NewPromptCacheBreakDetector ¶
func NewPromptCacheBreakDetector(maxTracked int) *PromptCacheBreakDetector
func (*PromptCacheBreakDetector) Observe ¶
func (d *PromptCacheBreakDetector) Observe(snapshot PromptCacheStateSnapshot, cacheReadTokens int) *PromptCacheBreakObservation
type PromptCacheSnapshot ¶
type PromptCacheSnapshot struct {
Requests int64 `json:"requests"`
CacheHits int64 `json:"cache_hits"`
CacheMisses int64 `json:"cache_misses"`
TotalCacheReadTokens int64 `json:"total_cache_read_tokens"`
TotalCacheCreation int64 `json:"total_cache_creation_tokens"`
TotalInputTokens int64 `json:"total_input_tokens"`
HitRate float64 `json:"hit_rate"`
ReuseRatio float64 `json:"reuse_ratio"`
}
PromptCacheSnapshot is a JSON-serializable snapshot of cache stats.
type PromptCacheStateSnapshot ¶
type PromptCacheStateSnapshot struct {
TrackingKey string
Provider string
Model string
Path string
SystemHash string
CacheControlHash string
ToolsHash string
ExtraBodyHash string
SystemBlockCount int
ToolCount int
ToolNames []string
PerToolHashes map[string]string
}
func BuildPromptCacheStateSnapshot ¶
func BuildPromptCacheStateSnapshot(body []byte, provider, fallbackModel, path string) PromptCacheStateSnapshot
type PromptCacheStats ¶
type PromptCacheStats struct {
// Requests is the total number of requests processed.
Requests atomic.Int64
// CacheHits is the number of requests where cache_read_input_tokens > 0.
CacheHits atomic.Int64
// CacheMisses is the number of requests where cache_read_input_tokens == 0.
CacheMisses atomic.Int64
// TotalCacheReadTokens is the cumulative cache_read_input_tokens.
TotalCacheReadTokens atomic.Int64
// TotalCacheCreationTokens is the cumulative cache_creation_input_tokens.
TotalCacheCreationTokens atomic.Int64
// TotalInputTokens is the cumulative input tokens (for computing reuse ratio).
TotalInputTokens atomic.Int64
}
PromptCacheStats tracks prompt caching effectiveness. All fields are updated atomically and safe for concurrent use.
func (*PromptCacheStats) HitRate ¶
func (s *PromptCacheStats) HitRate() float64
HitRate returns the cache hit rate as a float64 [0, 1].
func (*PromptCacheStats) Record ¶
func (s *PromptCacheStats) Record(inputTokens, cacheReadTokens, cacheCreationTokens int)
Record records a single request's cache usage.
func (*PromptCacheStats) ReuseRatio ¶
func (s *PromptCacheStats) ReuseRatio() float64
ReuseRatio returns the fraction of input tokens served from cache.
func (*PromptCacheStats) Snapshot ¶
func (s *PromptCacheStats) Snapshot() PromptCacheSnapshot
Snapshot returns a copy of the current stats for reporting.
type PromptGuard ¶
type PromptGuard struct {
// contains filtered or unexported fields
}
PromptGuard detects potential prompt injection attacks
func NewPromptGuard ¶
func NewPromptGuard(config *GuardConfig) *PromptGuard
NewPromptGuard creates a new prompt guard
func (*PromptGuard) AddPattern ¶
func (pg *PromptGuard) AddPattern(pattern string) error
AddPattern adds a custom detection pattern
func (*PromptGuard) AddRule ¶
func (pg *PromptGuard) AddRule(rule GuardRule) error
AddRule adds a custom guard rule
func (*PromptGuard) AddWhitelistPattern ¶
func (pg *PromptGuard) AddWhitelistPattern(pattern string) error
AddWhitelistPattern adds a whitelist pattern
func (*PromptGuard) Check ¶
func (pg *PromptGuard) Check(prompt string) *GuardResult
Check analyzes a prompt for potential injection attacks
func (*PromptGuard) GetRules ¶
func (pg *PromptGuard) GetRules() []GuardRule
GetRules returns all custom guard rules
func (*PromptGuard) Stats ¶
func (pg *PromptGuard) Stats() map[string]interface{}
Stats returns guard statistics
type Provider ¶
type Provider struct {
Config *ProviderConfig
Healthy bool
LastCheck time.Time
LastError error
LastLatency time.Duration
}
Provider represents an upstream provider
type ProviderConfig ¶
type ProviderConfig struct {
Name string `json:"name" yaml:"name"`
Endpoint string `json:"endpoint" yaml:"endpoint"`
APIKey string `json:"api_key" yaml:"api_key"`
Priority int `json:"priority" yaml:"priority"`
Weight int `json:"weight" yaml:"weight"`
Enabled bool `json:"enabled" yaml:"enabled"`
HealthCheck string `json:"health_check" yaml:"health_check"`
SkipTLSVerify bool `json:"skip_tls_verify" yaml:"skip_tls_verify"`
}
ProviderConfig provider configuration
type ProviderConnectionMetrics ¶
type ProviderConnectionMetrics struct {
Requests int64
Successes int64
Failures int64
TotalLatency int64
Reused int64
}
ProviderConnectionMetrics tracks per-provider connection stats
type ProviderMemory ¶
type ProviderMemory struct {
// contains filtered or unexported fields
}
ProviderMemory remembers provider capabilities discovered at runtime. Uses a single ecache2 with uint64 keys (FNV-1a hashes) for zero-alloc lookups.
func NewProviderMemory ¶
func NewProviderMemory() *ProviderMemory
NewProviderMemory creates a new provider memory with sensible TTLs.
func (*ProviderMemory) BlacklistModel ¶
func (pm *ProviderMemory) BlacklistModel(providerID, baseURL, model string)
BlacklistModel marks a model as unavailable on this provider (skip it next time).
func (*ProviderMemory) ClearModelBlacklist ¶
func (pm *ProviderMemory) ClearModelBlacklist(providerID, baseURL, model string)
ClearModelBlacklist removes a model from the blacklist (user can retry after quota recharge, etc).
func (*ProviderMemory) ClearThrottle ¶
func (pm *ProviderMemory) ClearThrottle(providerID, baseURL string)
ClearThrottle removes throttle restriction (user can retry after service recovery).
func (*ProviderMemory) ForgetFormat ¶
func (pm *ProviderMemory) ForgetFormat(providerID, baseURL string)
ForgetFormat evicts the cached format.
func (*ProviderMemory) ForgetModelAlias ¶
func (pm *ProviderMemory) ForgetModelAlias(providerID, baseURL, requestedModel string)
ForgetModelAlias evicts a cached model alias.
func (*ProviderMemory) ForgetModelFormat ¶
func (pm *ProviderMemory) ForgetModelFormat(providerID, baseURL, requestedModel string)
ForgetModelFormat evicts the cached model-scoped format.
func (*ProviderMemory) ForgetThrottle ¶
func (pm *ProviderMemory) ForgetThrottle(providerID, baseURL string)
ForgetThrottle clears the throttle for a provider.
func (*ProviderMemory) ForgetToolCap ¶
func (pm *ProviderMemory) ForgetToolCap(providerID, baseURL string)
ForgetToolCap evicts the cached tool capability.
func (*ProviderMemory) GetRestrictions ¶
func (pm *ProviderMemory) GetRestrictions(providerID, baseURL string) map[string]interface{}
GetRestrictions returns all current restrictions (throttles + blacklists) for a provider. Used to show users what's temporarily restricted.
func (*ProviderMemory) IsModelBlacklisted ¶
func (pm *ProviderMemory) IsModelBlacklisted(providerID, baseURL, model string) bool
IsModelBlacklisted returns true if a model is known to be unavailable on this provider.
func (*ProviderMemory) IsThrottled ¶
func (pm *ProviderMemory) IsThrottled(providerID, baseURL string) bool
IsThrottled returns true if the provider is currently in a backoff period.
func (*ProviderMemory) RecallFormat ¶
func (pm *ProviderMemory) RecallFormat(providerID, baseURL string) (string, bool)
RecallFormat returns the remembered API format for a provider (as a raw string).
func (*ProviderMemory) RecallModelAlias ¶
func (pm *ProviderMemory) RecallModelAlias(providerID, baseURL, requestedModel string) (string, bool)
RecallModelAlias returns the actual model name that worked for a requested model.
func (*ProviderMemory) RecallModelFormat ¶
func (pm *ProviderMemory) RecallModelFormat(providerID, baseURL, requestedModel string) (string, bool)
RecallModelFormat returns the remembered API format for a specific provider+baseURL+requested model.
func (*ProviderMemory) RecallToolCap ¶
func (pm *ProviderMemory) RecallToolCap(providerID, baseURL string) (ToolCapLevel, bool)
RecallToolCap returns the remembered tool capability level.
func (*ProviderMemory) RememberFormat ¶
func (pm *ProviderMemory) RememberFormat(providerID, baseURL string, format string)
RememberFormat caches the API format that worked for a provider.
func (*ProviderMemory) RememberModelAlias ¶
func (pm *ProviderMemory) RememberModelAlias(providerID, baseURL, requestedModel, actualModel string)
RememberModelAlias caches a model name mapping that worked.
func (*ProviderMemory) RememberModelFormat ¶
func (pm *ProviderMemory) RememberModelFormat(providerID, baseURL, requestedModel, format string)
RememberModelFormat caches the API format that worked for a specific requested model.
func (*ProviderMemory) RememberThrottle ¶
func (pm *ProviderMemory) RememberThrottle(providerID, baseURL string, retryAfter time.Duration)
RememberThrottle records a 429 backoff for a provider.
func (*ProviderMemory) RememberToolCap ¶
func (pm *ProviderMemory) RememberToolCap(providerID, baseURL string, level ToolCapLevel)
RememberToolCap caches the tool capability level that worked.
func (*ProviderMemory) ThrottleRemaining ¶
func (pm *ProviderMemory) ThrottleRemaining(providerID, baseURL string) time.Duration
ThrottleRemaining returns how long the provider should continue backing off.
func (*ProviderMemory) ThrottleUntil ¶
func (pm *ProviderMemory) ThrottleUntil(providerID, baseURL string) (time.Time, bool)
ThrottleUntil returns the time until which a provider remains throttled.
type ProviderMetrics ¶
type ProviderMetrics struct {
Name string `json:"name"`
RequestCount int64 `json:"request_count"`
SuccessCount int64 `json:"success_count"`
ErrorCount int64 `json:"error_count"`
TotalLatency int64 `json:"total_latency_ms"`
AvgLatency float64 `json:"avg_latency_ms"`
TotalTTFT int64 `json:"total_ttft_ms"`
AvgTTFT float64 `json:"avg_ttft_ms"`
TotalTokensIn int64 `json:"total_tokens_in"`
TotalTokensOut int64 `json:"total_tokens_out"`
TokensPerSec float64 `json:"tokens_per_sec"` // Output tokens per second
}
ProviderMetrics holds metrics for a specific provider
type ProviderRaceConfig ¶
type ProviderRaceConfig struct {
Enabled bool `json:"enabled" yaml:"enabled"`
MaxParallel int `json:"max_parallel" yaml:"max_parallel"` // Max providers to race concurrently
MinProviders int `json:"min_providers" yaml:"min_providers"` // Require at least N candidates to start race
EmptyRateMinSamples int `json:"empty_rate_min_samples" yaml:"empty_rate_min_samples"` // Min attempts before applying empty-rate policy
EmptyRateCooldownThreshold float64 `json:"empty_rate_cooldown_threshold" yaml:"empty_rate_cooldown_threshold"` // >= threshold enters temporary cooldown
EmptyRateSinkThreshold float64 `json:"empty_rate_sink_threshold" yaml:"empty_rate_sink_threshold"` // >= threshold sinks to tail
EmptyRateExcludeThreshold float64 `json:"empty_rate_exclude_threshold" yaml:"empty_rate_exclude_threshold"` // >= threshold excluded from race
EmptyRateCooldown time.Duration `json:"empty_rate_cooldown" yaml:"empty_rate_cooldown"` // Cooldown duration after threshold hit
}
ProviderRaceConfig controls concurrent provider racing behavior.
func DefaultProviderRaceConfig ¶
func DefaultProviderRaceConfig() ProviderRaceConfig
DefaultProviderRaceConfig returns default provider race configuration.
type ProviderRaceStats ¶
type ProviderRaceStats struct {
// contains filtered or unexported fields
}
ProviderRaceStats tracks runtime effectiveness of provider racing.
func (*ProviderRaceStats) RecordRequest ¶
func (s *ProviderRaceStats) RecordRequest()
func (*ProviderRaceStats) RecordSuccess ¶
func (s *ProviderRaceStats) RecordSuccess(primaryProviderID, winnerProviderID string, winnerLatency, estimatedSaved time.Duration)
func (*ProviderRaceStats) Snapshot ¶
func (s *ProviderRaceStats) Snapshot() ProviderRaceStatsSnapshot
type ProviderRaceStatsSnapshot ¶
type ProviderRaceStatsSnapshot struct {
RequestsTotal int64 `json:"requests_total"`
SuccessfulRaces int64 `json:"successful_races"`
Hits int64 `json:"hits"`
HitRate float64 `json:"hit_rate"`
AvgWinnerLatencyMs float64 `json:"avg_winner_latency_ms"`
EstimatedLatencySavedMs float64 `json:"estimated_latency_saved_ms"`
EstimatedSavingsSamples int64 `json:"estimated_savings_samples"`
}
ProviderRaceStatsSnapshot is a point-in-time copy for APIs/UI.
type ProviderType ¶
type ProviderType string
ProviderType identifies the API format type
const ( ProviderTypeOpenAI ProviderType = "openai" ProviderTypeAnthropic ProviderType = "anthropic" ProviderTypeGemini ProviderType = "gemini" )
const ProviderTypeCloudCode ProviderType = "cloudcode"
ProviderTypeCloudCode identifies Google Cloud Code Assist API format
const ProviderTypeCopilot ProviderType = "copilot"
ProviderTypeCopilot identifies GitHub Copilot API format
type ProxyAPIHandler ¶
type ProxyAPIHandler struct {
// contains filtered or unexported fields
}
ProxyAPIHandler provides HTTP handlers for proxy management APIs
func NewProxyAPIHandler ¶
func NewProxyAPIHandler( sessionMonitor *SessionMonitor, metricsCollector *MetricsCollector, promptGuard *PromptGuard, configWatcher *ConfigWatcher, modelCompat *ModelCompatLayer, mockHandler *MockHandler, authenticator *Authenticator, ) *ProxyAPIHandler
NewProxyAPIHandler creates a new proxy API handler
func (*ProxyAPIHandler) RegisterRoutes ¶
func (h *ProxyAPIHandler) RegisterRoutes(mux *http.ServeMux)
RegisterRoutes registers all proxy API routes
type ProxyConfig ¶
type ProxyConfig struct {
Enabled bool `json:"enabled" yaml:"enabled"`
Port PortConfig `json:"port" yaml:"port"`
Route *RouteConfig `json:"route" yaml:"route"`
Routing RouteConfig `json:"routing" yaml:"routing"` // Deprecated: use Route
Connection ConnectionConfig `json:"connection" yaml:"connection"`
HealthCheck HealthCheckConfig `json:"health_check" yaml:"health_check"`
Mock *MockConfig `json:"mock" yaml:"mock"`
ModelCompat *ModelCompatConfig `json:"model_compat" yaml:"model_compat"`
Watcher *WatcherConfig `json:"watcher" yaml:"watcher"`
Masking *MaskingConfig `json:"masking" yaml:"masking"`
ModelRouter *ModelRouterConfig `json:"model_router" yaml:"model_router"` // NEW: Model Router
QuotaMonitor *QuotaMonitorConfig `json:"quota_monitor" yaml:"quota_monitor"` // NEW: Quota Monitor
RuleRouting *RoutingConfig `json:"rule_routing" yaml:"rule_routing"` // Condition-based routing rules
}
ProxyConfig is the main proxy configuration
func DefaultProxyConfig ¶
func DefaultProxyConfig() *ProxyConfig
DefaultProxyConfig returns default proxy configuration
type ProxyHandler ¶
type ProxyHandler struct {
// contains filtered or unexported fields
}
ProxyHandler handles incoming proxy requests.
Architecture:
Client --[Proxy API Key]--> Proxy --[Provider API Key]--> Upstream (Anthropic/OpenAI)
- Proxy API Key: Used by Authenticator to validate client requests (optional) - Provider API Key: Retrieved from Provider Pool to call upstream APIs
The Proxy uses Provider Pool's Router to: 1. Select the best provider based on model and routing strategy 2. Get the API key for the selected provider 3. Forward the request to the upstream provider
Routing modes (determined by API Key scope): - route:auto - Auto select best provider (default) - route:cloud - Force cloud provider (zimaos-blue-trial) - route:local - Force local provider Layout: hot-path fields first (same cache line), cold fields after.
func NewProxyHandler ¶
func NewProxyHandler(router *Router, connPool *ConnectionPool, failover *FailoverHandler) *ProxyHandler
NewProxyHandler creates a new proxy handler
func (*ProxyHandler) GetPipelineStats ¶
func (ph *ProxyHandler) GetPipelineStats() *PipelineStatsCollector
GetPipelineStats returns the pipeline stats collector (may be nil).
func (*ProxyHandler) GetPromptCacheStats ¶
func (ph *ProxyHandler) GetPromptCacheStats() PromptCacheSnapshot
GetPromptCacheStats returns prompt cache stats snapshot.
func (*ProxyHandler) GetProviderMemory ¶
func (ph *ProxyHandler) GetProviderMemory() *ProviderMemory
GetProviderMemory returns the provider memory instance for external access.
func (*ProxyHandler) GetProviderRaceStats ¶
func (ph *ProxyHandler) GetProviderRaceStats() ProviderRaceStatsSnapshot
GetProviderRaceStats returns a snapshot of provider-race effectiveness.
func (*ProxyHandler) GetRoutingRules ¶
func (ph *ProxyHandler) GetRoutingRules() []RoutingRule
GetRoutingRules returns the current routing rules with their enabled state.
func (*ProxyHandler) GetRoutingStats ¶
func (ph *ProxyHandler) GetRoutingStats() RoutingStatsSnapshot
GetRoutingStats returns a snapshot of routing cost savings.
func (*ProxyHandler) GetRoutingStatsRef ¶
func (ph *ProxyHandler) GetRoutingStatsRef() *RoutingStats
GetRoutingStatsRef returns the RoutingStats reference for external wiring.
func (*ProxyHandler) IsPromptCacheEnabled ¶
func (ph *ProxyHandler) IsPromptCacheEnabled() bool
IsPromptCacheEnabled returns whether Anthropic prompt caching is enabled.
func (*ProxyHandler) IsRoutingEnabled ¶
func (ph *ProxyHandler) IsRoutingEnabled() bool
IsRoutingEnabled returns whether model routing is enabled.
func (*ProxyHandler) ServeHTTP ¶
func (ph *ProxyHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
ServeHTTP implements http.Handler.
func (*ProxyHandler) SetAPIKeyValidator ¶
func (ph *ProxyHandler) SetAPIKeyValidator(validator func(key string) ([]string, error))
SetAPIKeyValidator sets the API key validator function.
func (*ProxyHandler) SetAuthProber ¶
func (ph *ProxyHandler) SetAuthProber(ap *AuthProber)
SetAuthProber sets the auth strategy prober.
func (*ProxyHandler) SetDataMasker ¶
func (ph *ProxyHandler) SetDataMasker(dm *DataMasker)
SetDataMasker sets the data masker for request/response content masking.
func (*ProxyHandler) SetModelRouter ¶
func (ph *ProxyHandler) SetModelRouter(mr *ModelRouter)
SetModelRouter sets the model router for family-based routing and background downgrade.
func (*ProxyHandler) SetOAuthManager ¶
func (ph *ProxyHandler) SetOAuthManager(m OAuthTokenProvider)
SetOAuthManager sets the OAuth token provider for OAuth-authenticated providers.
func (*ProxyHandler) SetPipelineStats ¶
func (ph *ProxyHandler) SetPipelineStats(ps *PipelineStatsCollector)
SetPipelineStats sets the unified pipeline stats collector.
func (*ProxyHandler) SetPromptCacheEnabled ¶
func (ph *ProxyHandler) SetPromptCacheEnabled(enabled bool)
SetPromptCacheEnabled toggles Anthropic prompt caching on/off at runtime.
func (*ProxyHandler) SetProviderPool ¶
func (ph *ProxyHandler) SetProviderPool(pool *providerpool.Pool)
SetProviderPool sets the Provider Pool for routing and API key lookup. Triggers background auth probing for all enabled providers so the first real request hits the cached-strategy fast path instead of probing live.
func (*ProxyHandler) SetProviderRaceConfig ¶
func (ph *ProxyHandler) SetProviderRaceConfig(cfg ProviderRaceConfig)
SetProviderRaceConfig sets concurrent provider race behavior.
func (*ProxyHandler) SetPruner ¶
func (ph *ProxyHandler) SetPruner(mw *pruner.Middleware)
SetPruner sets the context pruner middleware for the proxy handler.
func (*ProxyHandler) SetPrunerFactory ¶
func (ph *ProxyHandler) SetPrunerFactory(factory func() *pruner.Middleware)
SetPrunerFactory sets a lazy factory that creates the pruner middleware on first use. The factory is called at most once, on the first request that needs pruning.
func (*ProxyHandler) SetResponsesContextCompressor ¶
func (ph *ProxyHandler) SetResponsesContextCompressor(compressor ResponsesContextCompressor)
SetResponsesContextCompressor overrides continuation assistant compression. Pass nil to restore the default heuristic compressor.
func (*ProxyHandler) SetRoutingEnabled ¶
func (ph *ProxyHandler) SetRoutingEnabled(enabled bool)
SetRoutingEnabled toggles model routing on/off at runtime.
func (*ProxyHandler) SetRoutingRuleEnabled ¶
func (ph *ProxyHandler) SetRoutingRuleEnabled(name string, enabled bool) bool
SetRoutingRuleEnabled enables or disables a single routing rule by name.
func (*ProxyHandler) SetRuleEngine ¶
func (ph *ProxyHandler) SetRuleEngine(re *RuleEngine)
SetRuleEngine sets the condition-based rule engine for tier routing.
func (*ProxyHandler) SetSTTService ¶
func (ph *ProxyHandler) SetSTTService(service stt.Service)
SetSTTService sets the STT service used for converting input_audio into text before forwarding to /responses endpoints.
func (*ProxyHandler) SetSessionMonitor ¶
func (ph *ProxyHandler) SetSessionMonitor(sm *SessionMonitor)
SetSessionMonitor wires session monitoring into the real request chain.
func (*ProxyHandler) SetTierResolver ¶
func (ph *ProxyHandler) SetTierResolver(tr *TierResolver)
SetTierResolver sets the dynamic tier resolver and wires it into the rule engine and model router for tier-based model resolution.
type ProxyServer ¶
type ProxyServer struct {
// contains filtered or unexported fields
}
ProxyServer is the main proxy server
func NewProxyServer ¶
func NewProxyServer(config *ProxyConfig) (*ProxyServer, error)
NewProxyServer creates a new proxy server
func (*ProxyServer) GetEndpoint ¶
func (ps *ProxyServer) GetEndpoint() string
GetEndpoint returns the proxy endpoint URL
func (*ProxyServer) GetModelRouter ¶
func (ps *ProxyServer) GetModelRouter() *ModelRouter
GetModelRouter returns the model router
func (*ProxyServer) GetQuotaMonitor ¶
func (ps *ProxyServer) GetQuotaMonitor() *QuotaMonitor
GetQuotaMonitor returns the quota monitor
func (*ProxyServer) Stats ¶
func (ps *ProxyServer) Stats() map[string]interface{}
Stats returns proxy server statistics
type QuotaInfo ¶
type QuotaInfo struct {
Provider string `json:"provider"`
AccountID string `json:"account_id,omitempty"`
TotalQuota int64 `json:"total_quota"` // Total allowed requests/tokens
UsedQuota int64 `json:"used_quota"` // Used requests/tokens
RemainingPct float64 `json:"remaining_pct"` // Remaining percentage (0-100)
ResetTime time.Time `json:"reset_time"` // When quota resets
LastSync time.Time `json:"last_sync"` // Last sync time
Status string `json:"status"` // "healthy", "warning", "critical", "banned", "rate_limited"
RateLimited bool `json:"rate_limited"` // Currently rate limited (429)
BanDetected bool `json:"ban_detected"` // 403 ban detected
}
QuotaInfo represents quota information for a provider/account
type QuotaMonitor ¶
type QuotaMonitor struct {
// contains filtered or unexported fields
}
QuotaMonitor tracks and manages quota information
func NewQuotaMonitor ¶
func NewQuotaMonitor(config *QuotaMonitorConfig) *QuotaMonitor
NewQuotaMonitor creates a new quota monitor
func (*QuotaMonitor) ClearBan ¶
func (qm *QuotaMonitor) ClearBan(provider string)
ClearBan clears ban status for a provider
func (*QuotaMonitor) ClearRateLimit ¶
func (qm *QuotaMonitor) ClearRateLimit(provider string)
ClearRateLimit clears rate limit status for a provider
func (*QuotaMonitor) GetBestProvider ¶
func (qm *QuotaMonitor) GetBestProvider() string
GetBestProvider returns the provider with highest remaining quota
func (*QuotaMonitor) GetQuota ¶
func (qm *QuotaMonitor) GetQuota(provider string) *QuotaInfo
GetQuota returns quota info for a specific provider
func (*QuotaMonitor) GetQuotaSummary ¶
func (qm *QuotaMonitor) GetQuotaSummary() *QuotaSummary
GetQuotaSummary returns a summary of all quotas for dashboard display
func (*QuotaMonitor) IsProviderAvailable ¶
func (qm *QuotaMonitor) IsProviderAvailable(provider string) bool
IsProviderAvailable checks if a provider is available (not banned or rate limited)
func (*QuotaMonitor) RecordError ¶
func (qm *QuotaMonitor) RecordError(provider string, statusCode int)
RecordError records an error response
func (*QuotaMonitor) RecordRequest ¶
func (qm *QuotaMonitor) RecordRequest(provider string)
RecordRequest records a request (without response details)
func (*QuotaMonitor) RecordUsage ¶
func (qm *QuotaMonitor) RecordUsage(provider string, resp *http.Response, tokensUsed int64)
RecordUsage records usage from a response
func (*QuotaMonitor) ResetProvider ¶
func (qm *QuotaMonitor) ResetProvider(provider string)
ResetProvider resets quota tracking for a specific provider
func (*QuotaMonitor) Stats ¶
func (qm *QuotaMonitor) Stats() map[string]interface{}
Stats returns quota monitor statistics
type QuotaMonitorConfig ¶
type QuotaMonitorConfig struct {
Enabled bool `json:"enabled" yaml:"enabled"`
SyncInterval time.Duration `json:"sync_interval" yaml:"sync_interval"` // How often to sync quotas
WarningThreshold float64 `json:"warning_threshold" yaml:"warning_threshold"` // Warn when below this % (default: 20)
CriticalThreshold float64 `json:"critical_threshold" yaml:"critical_threshold"` // Critical when below this % (default: 5)
TrackTokens bool `json:"track_tokens" yaml:"track_tokens"` // Track token usage
TrackRequests bool `json:"track_requests" yaml:"track_requests"` // Track request count
}
QuotaMonitorConfig configuration for quota monitoring
func DefaultQuotaMonitorConfig ¶
func DefaultQuotaMonitorConfig() *QuotaMonitorConfig
DefaultQuotaMonitorConfig returns default configuration
type QuotaSummary ¶
type QuotaSummary struct {
Providers []*QuotaInfo `json:"providers"`
AverageRemaining float64 `json:"average_remaining_pct"`
TotalProviders int `json:"total_providers"`
HealthyCount int `json:"healthy_count"`
WarningCount int `json:"warning_count"`
CriticalCount int `json:"critical_count"`
BannedCount int `json:"banned_count"`
BestProvider string `json:"best_provider"`
}
QuotaSummary provides dashboard-ready quota information
type RateLimitConfig ¶
type RateLimitConfig struct {
Enabled bool `json:"enabled"`
RequestsPerMin int `json:"requests_per_min"`
BurstSize int `json:"burst_size"`
PerIP bool `json:"per_ip"`
PerAPIKey bool `json:"per_api_key"`
CleanupInterval time.Duration `json:"cleanup_interval"`
}
RateLimitConfig holds rate limiting configuration
func DefaultRateLimitConfig ¶
func DefaultRateLimitConfig() *RateLimitConfig
DefaultRateLimitConfig returns default rate limit configuration
type RegexRule ¶
type RegexRule struct {
Pattern string `json:"pattern" yaml:"pattern"` // Regex pattern to match
Target string `json:"target" yaml:"target"` // Target model ID
Provider string `json:"provider" yaml:"provider"` // Target provider
Priority int `json:"priority" yaml:"priority"` // Rule priority (lower = higher)
Description string `json:"description" yaml:"description"` // Human-readable description
}
RegexRule allows expert-level model redirection
type RequestMetrics ¶
type RequestMetrics struct {
Timestamp time.Time `json:"timestamp"`
Provider string `json:"provider"`
Model string `json:"model"`
StatusCode int `json:"status_code"`
Latency time.Duration `json:"latency"` // Total response time
TTFT time.Duration `json:"ttft"` // Time to First Token
TokensIn int64 `json:"tokens_in"`
TokensOut int64 `json:"tokens_out"`
RequestSize int64 `json:"request_size"`
ResponseSize int64 `json:"response_size"`
Success bool `json:"success"`
Cached bool `json:"cached"`
Streaming bool `json:"streaming"`
ProxyOverhead time.Duration `json:"proxy_overhead"` // Time spent in proxy processing
}
RequestMetrics holds metrics for a single request
type ResolvedRoute ¶
type ResolvedRoute struct {
Provider string // display name
ProviderID string // internal ID (for sticky routing)
Model string
}
ResolvedRoute carries the actual provider/model chosen by the router. Callers embed a pointer in the request context; the proxy handler populates it.
func GetResolvedRouteFromContext ¶
func GetResolvedRouteFromContext(ctx context.Context) *ResolvedRoute
GetResolvedRouteFromContext is the exported version of getResolvedRoute.
type ResponseCache ¶
type ResponseCache struct {
// contains filtered or unexported fields
}
ResponseCache provides caching for API responses
func NewResponseCache ¶
func NewResponseCache(config *PerformanceConfig) *ResponseCache
NewResponseCache creates a new response cache
func (*ResponseCache) GenerateCacheKey ¶
func (rc *ResponseCache) GenerateCacheKey(provider, model, prompt string) string
GenerateCacheKey generates a cache key from request parameters
func (*ResponseCache) Get ¶
func (rc *ResponseCache) Get(key string) (*CacheEntry, bool)
Get retrieves a cached response
func (*ResponseCache) Stats ¶
func (rc *ResponseCache) Stats() map[string]interface{}
Stats returns cache statistics
type ResponsesAssistantCompressionInput ¶
type ResponsesAssistantCompressionInput struct {
AssistantText string
UserText string
Mode ResponsesAssistantCompressionMode
MaxRunes int
}
type ResponsesAssistantCompressionMode ¶
type ResponsesAssistantCompressionMode string
const ( ResponsesAssistantCompressionModeGeneral ResponsesAssistantCompressionMode = "general" ResponsesAssistantCompressionModeChoice ResponsesAssistantCompressionMode = "choice" )
type ResponsesContextCompressor ¶
type ResponsesContextCompressor interface {
CompressAssistantContext(input ResponsesAssistantCompressionInput) (string, error)
}
ResponsesContextCompressor is an abstraction layer for continuation context compression. The default implementation is heuristic; callers can plug in a local small model compressor (for example qwen 0.8B) later.
type RestrictionsHandler ¶
type RestrictionsHandler struct {
// contains filtered or unexported fields
}
RestrictionsHandler handles provider restriction management endpoints.
func NewRestrictionsHandler ¶
func NewRestrictionsHandler(pm *ProviderMemory) *RestrictionsHandler
NewRestrictionsHandler creates a new restrictions handler.
func (*RestrictionsHandler) ClearModelBlacklist ¶
func (h *RestrictionsHandler) ClearModelBlacklist(c echo.Context) error
ClearModelBlacklist clears blacklist for a specific model on a provider. POST /api/v1/restrictions/providers/{provider}/clear-model/{model}
func (*RestrictionsHandler) ClearProviderThrottle ¶
func (h *RestrictionsHandler) ClearProviderThrottle(c echo.Context) error
ClearProviderThrottle clears throttle restriction for a provider. POST /api/v1/restrictions/providers/{provider}/clear-throttle
func (*RestrictionsHandler) GetProviderRestrictions ¶
func (h *RestrictionsHandler) GetProviderRestrictions(c echo.Context) error
GetProviderRestrictions returns current restrictions for a provider. GET /api/v1/restrictions/providers/{provider}
func (*RestrictionsHandler) RegisterRoutes ¶
func (h *RestrictionsHandler) RegisterRoutes(g *echo.Group)
RegisterRoutes registers restriction management routes.
type RetryableErrorType ¶
type RetryableErrorType string
RetryableErrorType defines specific error types
const ( // Context/Token Limits - MUST failover to provider with larger context ErrorTypeContextTooLong RetryableErrorType = "context_too_long" ErrorTypeMaxTokensExceeded RetryableErrorType = "max_tokens_exceeded" // Rate/Quota Limits - failover to different provider or API key ErrorTypeRateLimited RetryableErrorType = "rate_limited" ErrorTypeQuotaExceeded RetryableErrorType = "quota_exceeded" ErrorTypeConcurrencyLimit RetryableErrorType = "concurrency_limit" // Provider Issues - failover to different provider ErrorTypeModelOverloaded RetryableErrorType = "model_overloaded" ErrorTypeTimeout RetryableErrorType = "timeout" // Streaming Anomalies - force stop and attempt recovery ErrorTypeRepetitiveOutput RetryableErrorType = "repetitive_output" ErrorTypeInfiniteLoop RetryableErrorType = "infinite_loop" // Model not configured — model appears in list but isn't usable on this provider. // Should failover to next provider (model may work elsewhere). ErrorTypeModelNotConfigured RetryableErrorType = "model_not_configured" // Non-retryable ErrorTypeInvalidRequest RetryableErrorType = "invalid_request" ErrorTypeAuthFailed RetryableErrorType = "auth_failed" ErrorTypeModelNotFound RetryableErrorType = "model_not_found" ErrorTypeNetworkError RetryableErrorType = "network_error" ErrorTypeUnknown RetryableErrorType = "unknown" )
type RouteCondition ¶
type RouteCondition struct {
Header string `yaml:"header,omitempty" json:"header,omitempty"`
HeaderValue string `yaml:"header_value,omitempty" json:"header_value,omitempty"`
MaxBodyBytes int `yaml:"max_body_bytes,omitempty" json:"max_body_bytes,omitempty"`
ToolPattern string `yaml:"tool_pattern,omitempty" json:"tool_pattern,omitempty"`
SystemTag string `yaml:"system_tag,omitempty" json:"system_tag,omitempty"`
}
RouteCondition defines when a rule matches. All non-zero fields must match (AND logic).
type RouteConfig ¶
type RouteConfig struct {
DefaultProvider string `json:"default_provider" yaml:"default_provider"`
LoadBalancing string `json:"load_balancing" yaml:"load_balancing"` // priority, round-robin, weighted
Providers []*ProviderConfig `json:"providers" yaml:"providers"`
Failover FailoverConfig `json:"failover" yaml:"failover"`
}
RouteConfig route configuration
type RouteDecision ¶
type RouteDecision struct {
Matched bool `json:"-"`
Model string `json:"model"`
Origin ModelOrigin `json:"origin"`
Tier ModelTier `json:"tier,omitempty"`
Fallback string `json:"fallback,omitempty"`
Rule string `json:"rule"`
Reason string `json:"reason"`
}
RouteDecision is the output of rule evaluation.
type RouteRequest ¶
type RouteRequest struct {
Headers http.Header
BodySize int
ToolNames []string
SystemMessage string
}
RouteRequest is the input to rule evaluation.
type Router ¶
type Router struct {
// contains filtered or unexported fields
}
Router handles provider selection
func (*Router) AddProvider ¶
func (r *Router) AddProvider(config *ProviderConfig)
AddProvider adds a new provider
func (*Router) GetAllProviders ¶
GetAllProviders returns all providers
func (*Router) GetAvailableProviders ¶
GetAvailableProviders returns enabled and healthy providers
func (*Router) GetProvider ¶
GetProvider returns a provider by name
func (*Router) RemoveProvider ¶
RemoveProvider removes a provider
func (*Router) SelectProvider ¶
SelectProvider selects the best available provider
func (*Router) SetProviderEnabled ¶
SetProviderEnabled enables or disables a provider
func (*Router) UpdateHealth ¶
UpdateHealth updates provider health status
type RoutingConfig ¶
type RoutingConfig struct {
Enabled bool `yaml:"enabled" json:"enabled"`
Rules []RoutingRule `yaml:"rules" json:"rules"`
ModelOrigins map[string]string `yaml:"model_origins" json:"model_origins"`
Pricing map[string]ModelPricing `yaml:"pricing" json:"pricing"`
}
RoutingConfig is the top-level routing configuration.
func DefaultRoutingConfig ¶
func DefaultRoutingConfig() *RoutingConfig
DefaultRoutingConfig returns a RoutingConfig with built-in tier-based rules. Rules reference abstract large/small tiers instead of specific model names. The TierResolver dynamically maps tiers to actual models based on user's available model list and built-in small-model allowlist.
func ParseRoutingConfig ¶
func ParseRoutingConfig(data []byte) (*RoutingConfig, error)
ParseRoutingConfig parses YAML bytes into a RoutingConfig.
func (*RoutingConfig) ToRuleEngine ¶
func (c *RoutingConfig) ToRuleEngine(tierResolver ...*TierResolver) *RuleEngine
ToRuleEngine creates a RuleEngine from this config. The optional TierResolver enables dynamic tier → model resolution.
type RoutingRule ¶
type RoutingRule struct {
Name string `yaml:"name" json:"name"`
Priority int `yaml:"priority" json:"priority"`
Condition RouteCondition `yaml:"condition" json:"condition"`
TargetModel string `yaml:"target_model" json:"target_model"`
Origin ModelOrigin `yaml:"origin" json:"origin"`
Tier ModelTier `yaml:"tier" json:"tier,omitempty"`
Fallback string `yaml:"fallback" json:"fallback,omitempty"`
Enabled *bool `yaml:"enabled,omitempty" json:"enabled,omitempty"` // nil = enabled (default true)
}
RoutingRule defines a single condition-based routing rule. Routes requests to cheaper/smaller models based on task complexity.
func (*RoutingRule) Evaluate ¶
func (r *RoutingRule) Evaluate(req *RouteRequest) *RouteDecision
Evaluate checks if a single rule matches the request (no pre-compiled regex). For hot-path usage, prefer RuleEngine which pre-compiles regexes and reason strings.
func (*RoutingRule) IsEnabled ¶
func (r *RoutingRule) IsEnabled() bool
IsEnabled returns whether this rule is enabled (nil defaults to true).
type RoutingStats ¶
type RoutingStats struct {
// contains filtered or unexported fields
}
RoutingStats tracks cost savings from model routing (rule engine + background downgrade).
func NewRoutingStats ¶
func NewRoutingStats() *RoutingStats
NewRoutingStats creates a new RoutingStats instance.
func (*RoutingStats) Load ¶
func (s *RoutingStats) Load(routedRequests, tokensRouted, costSavedMicro int64)
Load restores persisted counters (called on startup).
func (*RoutingStats) Record ¶
func (s *RoutingStats) Record(originalModel, actualModel string, respBody []byte)
Record calculates and accumulates cost savings for a routed request. originalModel is the model the client requested; actualModel is what was used.
func (*RoutingStats) RecordTokens ¶
func (s *RoutingStats) RecordTokens(originalModel, actualModel string, inputTokens, outputTokens int)
RecordTokens accumulates cost savings from pre-parsed token counts. Used by streaming paths where tokens are already extracted.
func (*RoutingStats) Snapshot ¶
func (s *RoutingStats) Snapshot() RoutingStatsSnapshot
Snapshot returns a point-in-time copy.
type RoutingStatsSnapshot ¶
type RoutingStatsSnapshot struct {
RoutedRequests int64 `json:"routed_requests"`
TokensRouted int64 `json:"tokens_routed"`
CostSavedUSD float64 `json:"cost_saved_usd"`
}
RoutingStatsSnapshot is a point-in-time copy of routing stats.
type RuleEngine ¶
type RuleEngine struct {
// contains filtered or unexported fields
}
RuleEngine evaluates routing rules in priority order with pre-compiled regexes.
func NewRuleEngine ¶
func NewRuleEngine(rules []RoutingRule, tierResolver ...*TierResolver) *RuleEngine
NewRuleEngine creates a rule engine, sorting rules by priority (lower = higher). The optional TierResolver is used to resolve tier-based rules (where TargetModel is empty).
func (*RuleEngine) Evaluate ¶
func (e *RuleEngine) Evaluate(req *RouteRequest) *RouteDecision
Evaluate returns the first matching rule's pre-built decision, or nil if none match. Condition checks are inlined to avoid function call overhead and heap allocation. Rules without conditions are filtered at init time — no need to re-check here.
func (*RuleEngine) GetRules ¶
func (e *RuleEngine) GetRules() []RoutingRule
GetRules returns a snapshot of all rules with their enabled state.
func (*RuleEngine) SetRuleEnabled ¶
func (e *RuleEngine) SetRuleEnabled(name string, enabled bool) bool
SetRuleEnabled enables or disables a rule by name. Returns false if not found.
func (*RuleEngine) SetTierResolver ¶
func (e *RuleEngine) SetTierResolver(tr *TierResolver)
SetTierResolver sets or replaces the tier resolver for dynamic model resolution.
type Session ¶
type Session struct {
ID string `json:"id"`
StartTime time.Time `json:"start_time"`
LastActivity time.Time `json:"last_activity"`
Provider string `json:"provider"`
Model string `json:"model"`
TokensIn int64 `json:"tokens_in"`
TokensOut int64 `json:"tokens_out"`
Status SessionStatus `json:"status"`
RequestCount int `json:"request_count"`
ErrorCount int `json:"error_count"`
ClientIP string `json:"client_ip"`
UserAgent string `json:"user_agent"`
}
Session represents an API session
type SessionConfig ¶
type SessionConfig struct {
Enabled bool `json:"enabled"`
IdleTimeout time.Duration `json:"idle_timeout"`
MaxSessions int `json:"max_sessions"`
CleanupPeriod time.Duration `json:"cleanup_period"`
RetainComplete time.Duration `json:"retain_complete"`
}
SessionConfig holds session monitoring configuration
func DefaultSessionConfig ¶
func DefaultSessionConfig() *SessionConfig
DefaultSessionConfig returns default session configuration
type SessionMonitor ¶
type SessionMonitor struct {
// contains filtered or unexported fields
}
SessionMonitor manages API sessions
func NewSessionMonitor ¶
func NewSessionMonitor(config *SessionConfig) *SessionMonitor
NewSessionMonitor creates a new session monitor
func (*SessionMonitor) CleanupIdleSessions ¶
func (sm *SessionMonitor) CleanupIdleSessions() int
CleanupIdleSessions removes idle sessions
func (*SessionMonitor) CompleteSession ¶
func (sm *SessionMonitor) CompleteSession(id string, status SessionStatus)
CompleteSession marks a session as completed
func (*SessionMonitor) GetSession ¶
func (sm *SessionMonitor) GetSession(id string) (*Session, bool)
GetSession returns a session by ID
func (*SessionMonitor) ListActiveSessions ¶
func (sm *SessionMonitor) ListActiveSessions() []*Session
ListActiveSessions returns only active sessions
func (*SessionMonitor) ListSessions ¶
func (sm *SessionMonitor) ListSessions() []*Session
ListSessions returns all sessions
func (*SessionMonitor) RecordError ¶
func (sm *SessionMonitor) RecordError(id string)
RecordError records an error for a session
func (*SessionMonitor) StartSession ¶
func (sm *SessionMonitor) StartSession(clientIP, userAgent string) *Session
StartSession creates a new session
func (*SessionMonitor) Stats ¶
func (sm *SessionMonitor) Stats() map[string]interface{}
Stats returns session statistics
func (*SessionMonitor) UpdateSession ¶
func (sm *SessionMonitor) UpdateSession(id string, provider, model string, tokensIn, tokensOut int64)
UpdateSession updates session with request data
type SessionStatus ¶
type SessionStatus string
SessionStatus represents the status of a session
const ( SessionStatusActive SessionStatus = "active" SessionStatusCompleted SessionStatus = "completed" SessionStatusFailed SessionStatus = "failed" SessionStatusTimeout SessionStatus = "timeout" )
type SmartFailoverHandler ¶
type SmartFailoverHandler struct {
*FailoverHandler
// contains filtered or unexported fields
}
SmartFailoverHandler extends FailoverHandler with intelligent error classification
func NewSmartFailoverHandler ¶
func NewSmartFailoverHandler(config *FailoverConfig, router *Router) *SmartFailoverHandler
NewSmartFailoverHandler creates a new smart failover handler
func (*SmartFailoverHandler) ExecuteStreamingWithAnomalyDetection ¶
func (sfh *SmartFailoverHandler) ExecuteStreamingWithAnomalyDetection( ctx context.Context, provider *Provider, fn func(*Provider) (*http.Response, error), onAnomaly func(*StreamAnomaly, *StreamRecoveryStrategy), ) (*http.Response, error)
ExecuteStreamingWithAnomalyDetection executes streaming request with anomaly detection
func (*SmartFailoverHandler) ExecuteWithSmartFailover ¶
func (sfh *SmartFailoverHandler) ExecuteWithSmartFailover( ctx context.Context, provider *Provider, fn func(*Provider) (*http.Response, error), ) (*http.Response, error)
ExecuteWithSmartFailover executes request with intelligent failover
func (*SmartFailoverHandler) GetClassifier ¶
func (sfh *SmartFailoverHandler) GetClassifier() *APIErrorClassifier
GetClassifier returns the error classifier
func (*SmartFailoverHandler) GetMetrics ¶
func (sfh *SmartFailoverHandler) GetMetrics() *FailoverMetrics
GetMetrics returns failover metrics
type StreamAnomaly ¶
type StreamAnomaly struct {
Type RetryableErrorType `json:"type"`
Pattern string `json:"pattern"`
RepeatCount int `json:"repeat_count"`
Message string `json:"message"`
Position int `json:"position"` // Position in buffer where anomaly was detected
}
StreamAnomaly represents a detected streaming anomaly
type StreamBuffer ¶
type StreamBuffer struct {
// contains filtered or unexported fields
}
StreamBuffer maintains a sliding window of streamed content
func NewStreamBuffer ¶
func NewStreamBuffer(detector *StreamingAnomalyDetector) *StreamBuffer
NewStreamBuffer creates a new stream buffer
func (*StreamBuffer) GetLastAnomaly ¶
func (sb *StreamBuffer) GetLastAnomaly() *StreamAnomaly
GetLastAnomaly returns the last detected anomaly
func (*StreamBuffer) GetTotalBytes ¶
func (sb *StreamBuffer) GetTotalBytes() int64
GetTotalBytes returns total bytes processed
func (*StreamBuffer) GetValidContent ¶
func (sb *StreamBuffer) GetValidContent() []byte
GetValidContent returns content before the anomaly (if any)
func (*StreamBuffer) HasAnomaly ¶
func (sb *StreamBuffer) HasAnomaly() bool
HasAnomaly returns whether an anomaly was detected
func (*StreamBuffer) Write ¶
func (sb *StreamBuffer) Write(chunk []byte) (*StreamAnomaly, error)
Write adds content to buffer and checks for anomalies
type StreamChunkChoice ¶
type StreamChunkChoice struct {
Index int `json:"index"`
Delta StreamChunkDelta `json:"delta"`
FinishReason *string `json:"finish_reason"`
}
StreamChunkChoice is a single choice in an OpenAI streaming chunk.
type StreamChunkDelta ¶
type StreamChunkDelta struct {
Role string `json:"role,omitempty"`
Content string `json:"content,omitempty"`
ToolCalls []StreamChunkToolCall `json:"tool_calls,omitempty"`
}
StreamChunkDelta is the delta object inside an OpenAI streaming chunk choice.
type StreamChunkToolCall ¶
type StreamChunkToolCall struct {
Index int `json:"index"`
ID string `json:"id,omitempty"`
Type string `json:"type,omitempty"`
Function StreamChunkToolCallFunc `json:"function,omitempty"`
}
StreamChunkToolCall is a single tool call inside a streaming delta.
type StreamChunkToolCallFunc ¶
type StreamChunkToolCallFunc struct {
Name string `json:"name,omitempty"`
Arguments string `json:"arguments,omitempty"`
}
StreamChunkToolCallFunc is the function part of a streaming tool call.
type StreamChunkUsage ¶
type StreamChunkUsage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
CacheReadInputTokens int `json:"cache_read_input_tokens,omitempty"`
CacheCreationInputTokens int `json:"cache_creation_input_tokens,omitempty"`
}
StreamChunkUsage is the usage object in an OpenAI streaming chunk.
type StreamRecoveryStrategy ¶
type StreamRecoveryStrategy struct {
ForceStop bool `json:"force_stop"`
RetryWithTruncation bool `json:"retry_with_truncation"`
TruncateToTokens int `json:"truncate_to_tokens"`
FailoverToProvider string `json:"failover_to_provider"`
AddStopSequences []string `json:"add_stop_sequences"`
}
StreamRecoveryStrategy defines how to recover from streaming anomalies
func GetRecoveryStrategy ¶
func GetRecoveryStrategy(anomaly *StreamAnomaly, config StreamingAnomalyConfig) *StreamRecoveryStrategy
GetRecoveryStrategy returns appropriate recovery strategy for anomaly
type StreamingAnomalyConfig ¶
type StreamingAnomalyConfig struct {
Enabled bool `json:"enabled" yaml:"enabled"`
WindowSize int `json:"window_size" yaml:"window_size"`
RepeatThreshold int `json:"repeat_threshold" yaml:"repeat_threshold"`
MinPatternLength int `json:"min_pattern_length" yaml:"min_pattern_length"`
MaxPatternLength int `json:"max_pattern_length" yaml:"max_pattern_length"`
RecoveryStrategy string `json:"recovery_strategy" yaml:"recovery_strategy"` // truncate_and_retry, failover, stop
}
StreamingAnomalyConfig configuration for anomaly detection
func DefaultStreamingAnomalyConfig ¶
func DefaultStreamingAnomalyConfig() StreamingAnomalyConfig
DefaultStreamingAnomalyConfig returns default configuration
type StreamingAnomalyDetector ¶
type StreamingAnomalyDetector struct {
// contains filtered or unexported fields
}
StreamingAnomalyDetector detects repetitive/looping output in streaming responses
func NewStreamingAnomalyDetector ¶
func NewStreamingAnomalyDetector(config StreamingAnomalyConfig) *StreamingAnomalyDetector
NewStreamingAnomalyDetector creates a new detector
type TierResolver ¶
type TierResolver struct {
// contains filtered or unexported fields
}
TierResolver assigns models into two stable tiers: - TierLarge: all non-small models (typically big LLMs) - TierSmall: fixed built-in small-model allowlist Smart routing is enabled only when both tiers are present.
func NewTierResolver ¶
func NewTierResolver() *TierResolver
NewTierResolver creates an empty TierResolver.
func (*TierResolver) BestModelForTier ¶
func (tr *TierResolver) BestModelForTier(tier ModelTier) string
BestModelForTier returns the top-ranked model in the requested tier. Returns "" if no model is available.
func (*TierResolver) IsEnabled ¶
func (tr *TierResolver) IsEnabled() bool
IsEnabled returns true when both large and small tiers are available.
func (*TierResolver) ModelTierOf ¶
func (tr *TierResolver) ModelTierOf(modelID string) ModelTier
ModelTierOf returns the tier for a specific model ID. Returns "" if the model is not known.
func (*TierResolver) Resolve ¶
func (tr *TierResolver) Resolve(models []*providerpool.Model) bool
Resolve analyzes available models and assigns large/small tiers deterministically. Returns true when both tiers are present (smart routing viable). Called on provider change (same hook as candidateSnapshot rebuild).
func (*TierResolver) Stats ¶
func (tr *TierResolver) Stats() map[string]interface{}
Stats returns a summary of resolved tiers for diagnostics.
type TieredModel ¶
type TieredModel struct {
ModelID string `json:"model_id"`
ProviderID string `json:"provider_id"`
Tier ModelTier `json:"tier"`
TotalCost float64 `json:"total_cost"` // InputPrice + OutputPrice per 1M tokens
}
TieredModel holds a model with its resolved tier and cost.
type ToggleState ¶
type ToggleState struct {
PrunerEnabled bool `json:"pruner_enabled"`
RoutingEnabled bool `json:"routing_enabled"`
MaskingEnabled bool `json:"masking_enabled"`
MaskingRules map[string]bool `json:"masking_rules,omitempty"`
PrunerBackend string `json:"pruner_backend,omitempty"`
RoutingRules map[string]bool `json:"routing_rules,omitempty"`
PromptCacheEnabled bool `json:"prompt_cache_enabled"`
FailoverConfig *FailoverConfig `json:"failover_config,omitempty"`
Version int `json:"version,omitempty"` // migration marker
}
ToggleState holds persisted feature toggle states.
type ToggleStore ¶
type ToggleStore struct {
// contains filtered or unexported fields
}
ToggleStore persists feature toggle states via kvstore.
func NewToggleStore ¶
func NewToggleStore(kv kvstore.Store) *ToggleStore
NewToggleStore creates a new toggle store.
func (*ToggleStore) Load ¶
func (ts *ToggleStore) Load(ctx context.Context) (*ToggleState, error)
Load retrieves persisted toggle state. Returns nil if no state has been saved yet.
func (*ToggleStore) Save ¶
func (ts *ToggleStore) Save(ctx context.Context, state *ToggleState) error
Save persists the current toggle state.
type Tool ¶
type Tool struct {
Name string `json:"name"`
Description string `json:"description"`
Parameters interface{} `json:"parameters"`
}
Tool represents a tool definition
type ToolCapLevel ¶
type ToolCapLevel int
ToolCapLevel represents the tool capability level of a provider endpoint.
const ( ToolCapNative ToolCapLevel = iota // Native tool_use supported ToolCapPrompt // Tools via system prompt injection ToolCapNone // No tools at all ToolCapUnknown ToolCapLevel = -1 // Not yet probed )
type UpstreamRequestBridge ¶
type UpstreamRequestBridge interface {
Name() string
Match(*UpstreamRequestBridgeContext) bool
Build(*UpstreamRequestBridgeContext) error
}
UpstreamRequestBridge adapts an OpenAI-edge request into a provider-specific upstream request.
type UpstreamRequestBridgeContext ¶
type UpstreamRequestBridgeContext struct {
Route *providerpool.RouteResult
Provider *providerpool.Provider
EffectiveFormat providerpool.APIFormat
TargetURL *url.URL
RequestPath string
Body []byte
PromptCacheEnabled bool
AudioTranscriber chatAudioTranscriber
}
UpstreamRequestBridgeContext carries mutable request-building state. Bridges can rewrite path/body according to endpoint or API format requirements.
type WatcherConfig ¶
type WatcherConfig struct {
Enabled bool `json:"enabled"`
PollInterval time.Duration `json:"poll_interval"`
}
WatcherConfig configuration watcher settings
func DefaultWatcherConfig ¶
func DefaultWatcherConfig() *WatcherConfig
DefaultWatcherConfig returns default watcher configuration
Source Files
¶
- anomaly.go
- api_handler.go
- auth.go
- auth_prober.go
- classifier.go
- compat.go
- config.go
- conn_warmup.go
- converter.go
- errors.go
- failover.go
- failover_handler.go
- guard.go
- handler.go
- hash_util.go
- health.go
- json.go
- json_repair.go
- masking.go
- metrics.go
- middleware.go
- mock.go
- model_router.go
- origin_registry.go
- performance.go
- pipeline_stats.go
- pool.go
- port.go
- prompt_cache_breaks.go
- prompt_cache_stats.go
- provider_memory.go
- provider_race_stats.go
- quota.go
- responses_adapter.go
- responses_context_compaction.go
- restrictions_handler.go
- router.go
- routing_config.go
- routing_rules.go
- routing_stats.go
- server.go
- session.go
- session_context.go
- smart_failover.go
- sse.go
- tier_resolver.go
- toggle_store.go
- upstream_request_bridge.go
- watcher.go