proxy

package

v0.0.0-...-8acab51 Latest Latest Go to latest Published: Apr 26, 2026 License: MIT Imports: 46 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/IceWhaleTech/ZimaOS-Blue

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
func ApplyPromptCaching(req *AnthropicRequest)
func BackgroundTaskFromContext(ctx context.Context) bool
func DisableModelRoutingFromContext(ctx context.Context) bool
func DisableResponsesContinuationFromContext(ctx context.Context) bool
func GetExcludedProviders(ctx context.Context) []string
func GetPinnedProvider(ctx context.Context) string
func InjectPromptCaching(body []byte) ([]byte, error)
func IsContextWindowExceededMessage(msg string) bool
func LocaleFromContext(ctx context.Context) string
func LogTokenChurn(provider, model, promptCacheKey string, ...)
func ReadPortFromFile(portFile string) (int, error)
func SanitizeError(err error) string
func SessionIDFromContext(ctx context.Context) string
func ValidateRoutingConfig(cfg *RoutingConfig) []error
func WithBackgroundTask(ctx context.Context) context.Context
func WithDisableModelRouting(ctx context.Context) context.Context
func WithDisableResponsesContinuation(ctx context.Context) context.Context
func WithExcludedProviders(ctx context.Context, providerIDs ...string) context.Context
func WithLocale(ctx context.Context, locale string) context.Context
func WithPinnedProvider(ctx context.Context, providerID string) context.Context
func WithResolvedRoute(ctx context.Context, rr *ResolvedRoute) context.Context
func WithSessionID(ctx context.Context, sessionID string) context.Context
type APIErrorClassifier
- func NewAPIErrorClassifier() *APIErrorClassifier
- func (c *APIErrorClassifier) AddProviderPatterns(provider string, patterns []ErrorPattern)
- func (c *APIErrorClassifier) ClassifyError(provider string, statusCode int, responseBody []byte) *ErrorClassification
type AnthropicCacheControl
type AnthropicContentBlock
type AnthropicMessage
type AnthropicRequest
type AnthropicResponse
type AnthropicStreamContentBlock
type AnthropicStreamDelta
type AnthropicStreamEvent
type AnthropicStreamUsage
type AnthropicSystemBlock
type AnthropicTool
type AuthConfig
- func DefaultAuthConfig() *AuthConfig
type AuthExhaustedError
- func (e *AuthExhaustedError) Error() string
type AuthProber
- func NewAuthProber() *AuthProber
- func (ap *AuthProber) Apply(req *http.Request, strategy AuthStrategy, apiKey *providerpool.APIKey, ...)
- func (ap *AuthProber) Forget(providerID, memoryKey string)
- func (ap *AuthProber) ProbeAndForward(provider *providerpool.Provider, apiKey *providerpool.APIKey, ...) (*http.Response, error)
- func (ap *AuthProber) Recall(providerID, memoryKey string) (AuthStrategy, bool)
- func (ap *AuthProber) Remember(providerID, memoryKey string, strategy AuthStrategy)
- func (ap *AuthProber) Strategies(provider *providerpool.Provider, apiKey *providerpool.APIKey, ...) []AuthStrategy
type AuthStrategy
- func (s AuthStrategy) String() string
type Authenticator
- func NewAuthenticator(authConfig *AuthConfig, rateLimitConfig *RateLimitConfig) *Authenticator
- func (a *Authenticator) AddAPIKey(key string)
- func (a *Authenticator) AddAllowedIP(ip string)
- func (a *Authenticator) Authenticate(r *http.Request) (bool, string)
- func (a *Authenticator) CheckRateLimit(r *http.Request) (bool, string)
- func (a *Authenticator) CleanupRateLimits()
- func (a *Authenticator) ListAPIKeys() []map[string]interface{}
- func (a *Authenticator) RemoveAPIKey(key string)
- func (a *Authenticator) Stats() map[string]interface{}
- func (a *Authenticator) ValidateAPIKey(key string) bool
type BreakerSnapshot
type BufferPool
- func NewBufferPool(config *PerformanceConfig) *BufferPool
- func (bp *BufferPool) Get() *bytes.Buffer
- func (bp *BufferPool) Put(buf *bytes.Buffer)
- func (bp *BufferPool) Stats() map[string]interface{}
type CacheEntry
type ChatMessage
type ChatRequest
- func (r *ChatRequest) HasImages() bool
- func (r *ChatRequest) TokenCount() int
type ConfigReloader
- func NewConfigReloader(router *Router, failover *FailoverHandler, guard *PromptGuard, ...) *ConfigReloader
- func (cr *ConfigReloader) ApplyConfig(config *ProxyConfig) error
type ConfigWatcher
- func NewConfigWatcher(configPath string, pollInterval time.Duration) *ConfigWatcher
- func (cw *ConfigWatcher) ForceReload() error
- func (cw *ConfigWatcher) GetConfigPath() string
- func (cw *ConfigWatcher) SetConfigPath(path string)
- func (cw *ConfigWatcher) Start()
- func (cw *ConfigWatcher) Stats() map[string]interface{}
- func (cw *ConfigWatcher) Stop()
type ConnWarmup
- func NewConnWarmup(pool *ConnectionPool) *ConnWarmup
- func (cw *ConnWarmup) DNSCache() *DNSCache
- func (cw *ConnWarmup) WarmOne(baseURL string)
- func (cw *ConnWarmup) WarmProviders(baseURLs []string)
type ConnectionConfig
- func DefaultConnectionConfig() *ConnectionConfig
type ConnectionMetrics
- func NewConnectionMetrics(config *PerformanceConfig) *ConnectionMetrics
- func (cm *ConnectionMetrics) RecordConnectionClosed()
- func (cm *ConnectionMetrics) RecordConnectionError()
- func (cm *ConnectionMetrics) RecordConnectionOpened()
- func (cm *ConnectionMetrics) RecordRequest(provider string)
- func (cm *ConnectionMetrics) RecordRequestComplete(provider string, success bool, latencyNs int64, reused bool)
- func (cm *ConnectionMetrics) Reset()
- func (cm *ConnectionMetrics) Stats() map[string]interface{}
type ConnectionPool
- func NewConnectionPool(config *ConnectionConfig) *ConnectionPool
- func (cp *ConnectionPool) Close()
- func (cp *ConnectionPool) CloseIdleConnectionsForProfile(profile ConnectionProfile)
- func (cp *ConnectionPool) GetClient(provider string, profiles ...ConnectionProfile) *http.Client
- func (cp *ConnectionPool) GetInsecureClient(provider string, profiles ...ConnectionProfile) *http.Client
- func (cp *ConnectionPool) GetTransport() *http.Transport
- func (cp *ConnectionPool) GetTransportForProfile(profile ConnectionProfile) *http.Transport
- func (cp *ConnectionPool) Stats() map[string]interface{}
type ConnectionProfile
type DNSCache
- func NewDNSCache(ttl time.Duration, maxSize int) *DNSCache
- func (dc *DNSCache) DialContext(dialer *net.Dialer) func(ctx context.Context, network, addr string) (net.Conn, error)
- func (dc *DNSCache) Resolve(host string) ([]string, error)
type DataMasker
- func NewDataMasker(config *MaskingConfig) *DataMasker
- func (dm *DataMasker) AddRule(rule *MaskingRule) error
- func (dm *DataMasker) GetRule(id string) (*MaskingRule, bool)
- func (dm *DataMasker) IsEnabled() bool
- func (dm *DataMasker) ListRules() []*MaskingRule
- func (dm *DataMasker) Mask(content string, direction MaskingDirection) string
- func (dm *DataMasker) MaskBytes(content []byte, direction MaskingDirection) []byte
- func (dm *DataMasker) MaskRequest(content string) string
- func (dm *DataMasker) MaskRequestBytes(content []byte) []byte
- func (dm *DataMasker) MaskResponse(content string) string
- func (dm *DataMasker) MaskResponseBytes(content []byte) []byte
- func (dm *DataMasker) RemoveRule(id string) bool
- func (dm *DataMasker) ResetStats()
- func (dm *DataMasker) SetEnabled(enabled bool)
- func (dm *DataMasker) SetLocaleFunc(f func() string)
- func (dm *DataMasker) SetRuleEnabled(id string, enabled bool) bool
- func (dm *DataMasker) Stats() map[string]interface{}
type ErrorCategory
type ErrorClassification
type ErrorClassificationConfig
type ErrorPattern
type FailoverAPIHandler
- func NewFailoverAPIHandler(smartFailover *SmartFailoverHandler, config *FailoverConfig) *FailoverAPIHandler
- func (h *FailoverAPIHandler) GetCircuitBreakerStatus(c echo.Context) error
- func (h *FailoverAPIHandler) GetConfig(c echo.Context) error
- func (h *FailoverAPIHandler) GetMetrics(c echo.Context) error
- func (h *FailoverAPIHandler) GetOverview(c echo.Context) error
- func (h *FailoverAPIHandler) RegisterRoutes(g *echo.Group)
- func (h *FailoverAPIHandler) ResetCircuitBreakers(c echo.Context) error
- func (h *FailoverAPIHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
- func (h *FailoverAPIHandler) SetOnConfigSave(fn func(*FailoverConfig) error)
- func (h *FailoverAPIHandler) SetOnProviderRaceChange(fn func(ProviderRaceConfig))
- func (h *FailoverAPIHandler) SetProviderRaceStatsProvider(fn func() ProviderRaceStatsSnapshot)
- func (h *FailoverAPIHandler) UpdateConfig(c echo.Context) error
type FailoverCircuitBreakerStatusResponse
type FailoverConfig
type FailoverHandler
- func NewFailoverHandler(config *FailoverConfig, router *Router) *FailoverHandler
- func (fh *FailoverHandler) Config() *FailoverConfig
- func (fh *FailoverHandler) Execute(ctx context.Context, provider *Provider, ...) (*http.Response, error)
- func (fh *FailoverHandler) GetBreakerState(name string) string
- func (fh *FailoverHandler) GetBreakerStats() map[string]interface{}
- func (fh *FailoverHandler) LoadBreakerState(snap BreakerSnapshot)
- func (fh *FailoverHandler) ResetAllBreakers()
- func (fh *FailoverHandler) ResetBreaker(name string)
- func (fh *FailoverHandler) SnapshotBreakers() []BreakerSnapshot
type FailoverMetrics
- func NewFailoverMetrics() *FailoverMetrics
- func (m *FailoverMetrics) GetStats() map[string]interface{}
- func (m *FailoverMetrics) RecordError(provider string, classification *ErrorClassification)
- func (m *FailoverMetrics) RecordFailover(fromProvider, toProvider string, success bool)
- func (m *FailoverMetrics) RecordProviderPoolResult(result *providerpool.FailoverResult)
- func (m *FailoverMetrics) RecordStreamAnomaly()
type FailoverMetricsResponse
type FailoverOverviewResponse
type FailoverSnapshot
type FormatConverter
- func NewFormatConverter() *FormatConverter
- func (fc *FormatConverter) ConvertModelsResponse(body []byte, sourceType ProviderType) ([]byte, error)
- func (fc *FormatConverter) ConvertRequest(body []byte, targetType ProviderType) ([]byte, string, error)
- func (fc *FormatConverter) ConvertRequestWithCaching(body []byte, targetType ProviderType, promptCacheEnabled bool) ([]byte, string, error)
- func (fc *FormatConverter) ConvertResponse(body []byte, sourceType ProviderType) ([]byte, error)
- func (fc *FormatConverter) ConvertStreamingResponse(reader io.Reader, sourceType ProviderType, writer http.ResponseWriter) error
- func (fc *FormatConverter) DetectProviderType(endpoint string) ProviderType
type GeminiCandidate
type GeminiContent
type GeminiFunctionCall
type GeminiFunctionDeclaration
type GeminiFunctionResponse
type GeminiGenerationConfig
type GeminiInlineData
type GeminiModel
type GeminiModelsResponse
type GeminiPart
type GeminiRequest
type GeminiResponse
type GeminiSafetySetting
type GeminiStreamChunk
type GeminiTool
type GeminiUsageMetadata
type GuardConfig
- func DefaultGuardConfig() *GuardConfig
type GuardResult
type GuardRule
type HealthCheckConfig
type HealthChecker
- func NewHealthChecker(router *Router, connPool *ConnectionPool, config *HealthCheckConfig) *HealthChecker
- func (hc *HealthChecker) CheckNow()
- func (hc *HealthChecker) CheckProvider(name string) error
- func (hc *HealthChecker) Start()
- func (hc *HealthChecker) Stop()
type HotReloadableConfig
type MaskingCategory
type MaskingConfig
- func DefaultMaskingConfig() *MaskingConfig
type MaskingDirection
type MaskingRule
- func GetDefaultRules() []*MaskingRule
type MetricsBucket
type MetricsCollector
- func NewMetricsCollector(config *MetricsConfig) *MetricsCollector
- func (mc *MetricsCollector) GetAllProviderMetrics() map[string]*ProviderMetrics
- func (mc *MetricsCollector) GetProviderMetrics(name string) (*ProviderMetrics, bool)
- func (mc *MetricsCollector) GetRecentRequests(limit int) []RequestMetrics
- func (mc *MetricsCollector) GetTimeSeries(start, end time.Time) []*MetricsBucket
- func (mc *MetricsCollector) LatencyStats() map[string]interface{}
- func (mc *MetricsCollector) Record(m RequestMetrics)
- func (mc *MetricsCollector) Reset()
- func (mc *MetricsCollector) Summary() map[string]interface{}
type MetricsConfig
- func DefaultMetricsConfig() *MetricsConfig
type MiddlewareFunc
type MockConfig
- func DefaultMockConfig() *MockConfig
type MockEndpoint
type MockHandler
- func NewMockHandler(config *MockConfig) *MockHandler
- func (mh *MockHandler) AddEndpoint(ep *MockEndpoint)
- func (mh *MockHandler) GetEndpoint(path, method string) (*MockEndpoint, bool)
- func (mh *MockHandler) Handle(w http.ResponseWriter, r *http.Request) bool
- func (mh *MockHandler) IsEnabled() bool
- func (mh *MockHandler) ListEndpoints() []*MockEndpoint
- func (mh *MockHandler) RemoveEndpoint(path, method string) bool
- func (mh *MockHandler) ResetStats()
- func (mh *MockHandler) SetEnabled(path, method string, enabled bool) bool
- func (mh *MockHandler) SetGlobalEnabled(enabled bool)
- func (mh *MockHandler) Stats() map[string]interface{}
type ModelCompatConfig
- func DefaultModelCompatConfig() *ModelCompatConfig
type ModelCompatLayer
- func NewModelCompatLayer(config *ModelCompatConfig) *ModelCompatLayer
- func (mcl *ModelCompatLayer) AdaptRequest(model string, req *ChatRequest) (*ChatRequest, error)
- func (mcl *ModelCompatLayer) GetAllFeatures() map[string]*ModelFeatures
- func (mcl *ModelCompatLayer) GetFeatures(model string) *ModelFeatures
- func (mcl *ModelCompatLayer) ListModels() []*ModelFeatures
- func (mcl *ModelCompatLayer) SetFeatures(model string, features *ModelFeatures)
- func (mcl *ModelCompatLayer) Stats() map[string]interface{}
type ModelFamily
type ModelFeatures
type ModelOrigin
type ModelPricing
type ModelRoute
type ModelRouter
- func NewModelRouter(config *ModelRouterConfig) (*ModelRouter, error)
- func (mr *ModelRouter) AddRule(rule *RegexRule) error
- func (mr *ModelRouter) GetFamilies() []*ModelFamily
- func (mr *ModelRouter) GetRules() []*RegexRule
- func (mr *ModelRouter) IsBackgroundRequest(r *http.Request) bool
- func (mr *ModelRouter) RemoveRule(pattern string) bool
- func (mr *ModelRouter) RouteModel(requestedModel string, isBackground bool) (*ModelRoute, error)
- func (mr *ModelRouter) SetTierResolver(tr *TierResolver)
- func (mr *ModelRouter) Stats() map[string]interface{}
type ModelRouterConfig
- func DefaultModelRouterConfig() *ModelRouterConfig
type ModelTier
type OAuthTokenProvider
type OpenAIChatRequest
type OpenAIChatResponse
type OpenAIMessage
type OpenAIStreamChunk
type OpenAITool
type OpenAIToolCall
type OpenAIToolCallFunc
- func (f *OpenAIToolCallFunc) UnmarshalJSON(data []byte) error
type OpenAIToolFunction
type OriginRegistry
- func NewOriginRegistry(patterns map[string]string) *OriginRegistry
- func (r *OriginRegistry) Resolve(model string) ModelOrigin
type PerformanceConfig
- func DefaultPerformanceConfig() *PerformanceConfig
type PerformanceManager
- func NewPerformanceManager(config *PerformanceConfig) *PerformanceManager
- func (pm *PerformanceManager) GetBufferPool() *BufferPool
- func (pm *PerformanceManager) GetCache() *ResponseCache
- func (pm *PerformanceManager) GetConnectionMetrics() *ConnectionMetrics
- func (pm *PerformanceManager) Stats() map[string]interface{}
type Pipeline
- func NewPipeline(config *PipelineConfig) *Pipeline
- func (p *Pipeline) GetAuth() *Authenticator
- func (p *Pipeline) GetGuard() *PromptGuard
- func (p *Pipeline) GetMetrics() *MetricsCollector
- func (p *Pipeline) GetSessionMonitor() *SessionMonitor
- func (p *Pipeline) Stats() map[string]interface{}
- func (p *Pipeline) Use(mw MiddlewareFunc)
- func (p *Pipeline) Wrap(handler http.Handler) http.Handler
type PipelineConfig
- func DefaultPipelineConfig() *PipelineConfig
type PipelineSnapshot
type PipelineStatsCollector
- func NewPipelineStatsCollector(db *sql.DB, routingStats *RoutingStats) *PipelineStatsCollector
- func NewPipelineStatsCollectorWithReadDB(writeDB, readDB *sql.DB, routingStats *RoutingStats) *PipelineStatsCollector
- func (c *PipelineStatsCollector) Close() error
- func (c *PipelineStatsCollector) LoadBreakerState()
- func (c *PipelineStatsCollector) LoadSmartMetrics()
- func (c *PipelineStatsCollector) OnFailover(result *providerpool.FailoverResult)
- func (c *PipelineStatsCollector) SetFailoverHandler(fh *FailoverHandler)
- func (c *PipelineStatsCollector) SetSmartFailoverMetrics(m *FailoverMetrics)
- func (c *PipelineStatsCollector) Snapshot() PipelineSnapshot
- func (c *PipelineStatsCollector) Start()
- func (c *PipelineStatsCollector) Stop()
type PortAllocator
- func NewPortAllocator(config *PortConfig) *PortAllocator
- func (pa *PortAllocator) Allocate() (int, error)
- func (pa *PortAllocator) GetBindAddress() string
- func (pa *PortAllocator) GetEndpoint() string
- func (pa *PortAllocator) GetPort() int
- func (pa *PortAllocator) Release() error
type PortConfig
type PromptCacheBreakDetector
- func NewPromptCacheBreakDetector(maxTracked int) *PromptCacheBreakDetector
- func (d *PromptCacheBreakDetector) Observe(snapshot PromptCacheStateSnapshot, cacheReadTokens int) *PromptCacheBreakObservation
type PromptCacheBreakObservation
type PromptCacheSnapshot
type PromptCacheStateSnapshot
- func BuildPromptCacheStateSnapshot(body []byte, provider, fallbackModel, path string) PromptCacheStateSnapshot
type PromptCacheStats
- func (s *PromptCacheStats) HitRate() float64
- func (s *PromptCacheStats) Record(inputTokens, cacheReadTokens, cacheCreationTokens int)
- func (s *PromptCacheStats) ReuseRatio() float64
- func (s *PromptCacheStats) Snapshot() PromptCacheSnapshot
type PromptGuard
- func NewPromptGuard(config *GuardConfig) *PromptGuard
- func (pg *PromptGuard) AddPattern(pattern string) error
- func (pg *PromptGuard) AddRule(rule GuardRule) error
- func (pg *PromptGuard) AddWhitelistPattern(pattern string) error
- func (pg *PromptGuard) Check(prompt string) *GuardResult
- func (pg *PromptGuard) GetRules() []GuardRule
- func (pg *PromptGuard) Stats() map[string]interface{}
type Provider
type ProviderConfig
type ProviderConnectionMetrics
type ProviderMemory
- func NewProviderMemory() *ProviderMemory
- func (pm *ProviderMemory) BlacklistModel(providerID, baseURL, model string)
- func (pm *ProviderMemory) ClearModelBlacklist(providerID, baseURL, model string)
- func (pm *ProviderMemory) ClearThrottle(providerID, baseURL string)
- func (pm *ProviderMemory) ForgetFormat(providerID, baseURL string)
- func (pm *ProviderMemory) ForgetModelAlias(providerID, baseURL, requestedModel string)
- func (pm *ProviderMemory) ForgetModelFormat(providerID, baseURL, requestedModel string)
- func (pm *ProviderMemory) ForgetThrottle(providerID, baseURL string)
- func (pm *ProviderMemory) ForgetToolCap(providerID, baseURL string)
- func (pm *ProviderMemory) GetRestrictions(providerID, baseURL string) map[string]interface{}
- func (pm *ProviderMemory) IsModelBlacklisted(providerID, baseURL, model string) bool
- func (pm *ProviderMemory) IsThrottled(providerID, baseURL string) bool
- func (pm *ProviderMemory) RecallFormat(providerID, baseURL string) (string, bool)
- func (pm *ProviderMemory) RecallModelAlias(providerID, baseURL, requestedModel string) (string, bool)
- func (pm *ProviderMemory) RecallModelFormat(providerID, baseURL, requestedModel string) (string, bool)
- func (pm *ProviderMemory) RecallToolCap(providerID, baseURL string) (ToolCapLevel, bool)
- func (pm *ProviderMemory) RememberFormat(providerID, baseURL string, format string)
- func (pm *ProviderMemory) RememberModelAlias(providerID, baseURL, requestedModel, actualModel string)
- func (pm *ProviderMemory) RememberModelFormat(providerID, baseURL, requestedModel, format string)
- func (pm *ProviderMemory) RememberThrottle(providerID, baseURL string, retryAfter time.Duration)
- func (pm *ProviderMemory) RememberToolCap(providerID, baseURL string, level ToolCapLevel)
- func (pm *ProviderMemory) ThrottleRemaining(providerID, baseURL string) time.Duration
- func (pm *ProviderMemory) ThrottleUntil(providerID, baseURL string) (time.Time, bool)
type ProviderMetrics
type ProviderRaceConfig
- func DefaultProviderRaceConfig() ProviderRaceConfig
type ProviderRaceStats
- func (s *ProviderRaceStats) RecordRequest()
- func (s *ProviderRaceStats) RecordSuccess(primaryProviderID, winnerProviderID string, ...)
- func (s *ProviderRaceStats) Snapshot() ProviderRaceStatsSnapshot
type ProviderRaceStatsSnapshot
type ProviderType
type ProxyAPIHandler
- func NewProxyAPIHandler(sessionMonitor *SessionMonitor, metricsCollector *MetricsCollector, ...) *ProxyAPIHandler
- func (h *ProxyAPIHandler) RegisterRoutes(mux *http.ServeMux)
type ProxyConfig
- func DefaultProxyConfig() *ProxyConfig
type ProxyHandler
- func NewProxyHandler(router *Router, connPool *ConnectionPool, failover *FailoverHandler) *ProxyHandler
- func (ph *ProxyHandler) GetPipelineStats() *PipelineStatsCollector
- func (ph *ProxyHandler) GetPromptCacheStats() PromptCacheSnapshot
- func (ph *ProxyHandler) GetProviderMemory() *ProviderMemory
- func (ph *ProxyHandler) GetProviderRaceStats() ProviderRaceStatsSnapshot
- func (ph *ProxyHandler) GetRoutingRules() []RoutingRule
- func (ph *ProxyHandler) GetRoutingStats() RoutingStatsSnapshot
- func (ph *ProxyHandler) GetRoutingStatsRef() *RoutingStats
- func (ph *ProxyHandler) IsPromptCacheEnabled() bool
- func (ph *ProxyHandler) IsRoutingEnabled() bool
- func (ph *ProxyHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
- func (ph *ProxyHandler) SetAPIKeyValidator(validator func(key string) ([]string, error))
- func (ph *ProxyHandler) SetAuthProber(ap *AuthProber)
- func (ph *ProxyHandler) SetDataMasker(dm *DataMasker)
- func (ph *ProxyHandler) SetModelRouter(mr *ModelRouter)
- func (ph *ProxyHandler) SetOAuthManager(m OAuthTokenProvider)
- func (ph *ProxyHandler) SetPipelineStats(ps *PipelineStatsCollector)
- func (ph *ProxyHandler) SetPromptCacheEnabled(enabled bool)
- func (ph *ProxyHandler) SetProviderPool(pool *providerpool.Pool)
- func (ph *ProxyHandler) SetProviderRaceConfig(cfg ProviderRaceConfig)
- func (ph *ProxyHandler) SetPruner(mw *pruner.Middleware)
- func (ph *ProxyHandler) SetPrunerFactory(factory func() *pruner.Middleware)
- func (ph *ProxyHandler) SetResponsesContextCompressor(compressor ResponsesContextCompressor)
- func (ph *ProxyHandler) SetRoutingEnabled(enabled bool)
- func (ph *ProxyHandler) SetRoutingRuleEnabled(name string, enabled bool) bool
- func (ph *ProxyHandler) SetRuleEngine(re *RuleEngine)
- func (ph *ProxyHandler) SetSTTService(service stt.Service)
- func (ph *ProxyHandler) SetSessionMonitor(sm *SessionMonitor)
- func (ph *ProxyHandler) SetTierResolver(tr *TierResolver)
type ProxyServer
- func NewProxyServer(config *ProxyConfig) (*ProxyServer, error)
- func (ps *ProxyServer) GetEndpoint() string
- func (ps *ProxyServer) GetModelRouter() *ModelRouter
- func (ps *ProxyServer) GetPort() int
- func (ps *ProxyServer) GetQuotaMonitor() *QuotaMonitor
- func (ps *ProxyServer) Start() error
- func (ps *ProxyServer) Stats() map[string]interface{}
- func (ps *ProxyServer) Stop(ctx context.Context) error
type QuotaInfo
type QuotaMonitor
- func NewQuotaMonitor(config *QuotaMonitorConfig) *QuotaMonitor
- func (qm *QuotaMonitor) ClearBan(provider string)
- func (qm *QuotaMonitor) ClearRateLimit(provider string)
- func (qm *QuotaMonitor) GetBestProvider() string
- func (qm *QuotaMonitor) GetQuota(provider string) *QuotaInfo
- func (qm *QuotaMonitor) GetQuotaSummary() *QuotaSummary
- func (qm *QuotaMonitor) IsProviderAvailable(provider string) bool
- func (qm *QuotaMonitor) RecordError(provider string, statusCode int)
- func (qm *QuotaMonitor) RecordRequest(provider string)
- func (qm *QuotaMonitor) RecordUsage(provider string, resp *http.Response, tokensUsed int64)
- func (qm *QuotaMonitor) Reset()
- func (qm *QuotaMonitor) ResetProvider(provider string)
- func (qm *QuotaMonitor) Stats() map[string]interface{}
- func (qm *QuotaMonitor) Stop()
type QuotaMonitorConfig
- func DefaultQuotaMonitorConfig() *QuotaMonitorConfig
type QuotaSummary
type RateLimitConfig
- func DefaultRateLimitConfig() *RateLimitConfig
type RegexRule
type RequestMetrics
type ResolvedRoute
- func GetResolvedRouteFromContext(ctx context.Context) *ResolvedRoute
type ResponseCache
- func NewResponseCache(config *PerformanceConfig) *ResponseCache
- func (rc *ResponseCache) Clear()
- func (rc *ResponseCache) GenerateCacheKey(provider, model, prompt string) string
- func (rc *ResponseCache) Get(key string) (*CacheEntry, bool)
- func (rc *ResponseCache) Set(key string, value []byte, statusCode int, headers map[string]string)
- func (rc *ResponseCache) Stats() map[string]interface{}
type ResponsesAssistantCompressionInput
type ResponsesAssistantCompressionMode
type ResponsesContextCompressor
type RestrictionsHandler
- func NewRestrictionsHandler(pm *ProviderMemory) *RestrictionsHandler
- func (h *RestrictionsHandler) ClearModelBlacklist(c echo.Context) error
- func (h *RestrictionsHandler) ClearProviderThrottle(c echo.Context) error
- func (h *RestrictionsHandler) GetProviderRestrictions(c echo.Context) error
- func (h *RestrictionsHandler) RegisterRoutes(g *echo.Group)
type RetryableErrorType
type RouteCondition
type RouteConfig
type RouteDecision
type RouteRequest
type Router
- func NewRouter(config *RouteConfig) *Router
- func (r *Router) AddProvider(config *ProviderConfig)
- func (r *Router) GetAllProviders() []*Provider
- func (r *Router) GetAvailableProviders() []*Provider
- func (r *Router) GetProvider(name string) (*Provider, bool)
- func (r *Router) RemoveProvider(name string) error
- func (r *Router) SelectProvider(req *http.Request) (*Provider, error)
- func (r *Router) SetProviderEnabled(name string, enabled bool) error
- func (r *Router) Stats() map[string]interface{}
- func (r *Router) UpdateHealth(name string, healthy bool, err error)
- func (r *Router) UpdateHealthWithLatency(name string, healthy bool, err error, latency time.Duration)
type RoutingConfig
- func DefaultRoutingConfig() *RoutingConfig
- func ParseRoutingConfig(data []byte) (*RoutingConfig, error)
- func (c *RoutingConfig) ToRuleEngine(tierResolver ...*TierResolver) *RuleEngine
type RoutingRule
- func (r *RoutingRule) Evaluate(req *RouteRequest) *RouteDecision
- func (r *RoutingRule) IsEnabled() bool
type RoutingStats
- func NewRoutingStats() *RoutingStats
- func (s *RoutingStats) Load(routedRequests, tokensRouted, costSavedMicro int64)
- func (s *RoutingStats) Record(originalModel, actualModel string, respBody []byte)
- func (s *RoutingStats) RecordTokens(originalModel, actualModel string, inputTokens, outputTokens int)
- func (s *RoutingStats) Snapshot() RoutingStatsSnapshot
type RoutingStatsSnapshot
type RuleEngine
- func NewRuleEngine(rules []RoutingRule, tierResolver ...*TierResolver) *RuleEngine
- func (e *RuleEngine) Evaluate(req *RouteRequest) *RouteDecision
- func (e *RuleEngine) GetRules() []RoutingRule
- func (e *RuleEngine) SetRuleEnabled(name string, enabled bool) bool
- func (e *RuleEngine) SetTierResolver(tr *TierResolver)
type Session
type SessionConfig
- func DefaultSessionConfig() *SessionConfig
type SessionMonitor
- func NewSessionMonitor(config *SessionConfig) *SessionMonitor
- func (sm *SessionMonitor) CleanupIdleSessions() int
- func (sm *SessionMonitor) CompleteSession(id string, status SessionStatus)
- func (sm *SessionMonitor) GetSession(id string) (*Session, bool)
- func (sm *SessionMonitor) ListActiveSessions() []*Session
- func (sm *SessionMonitor) ListSessions() []*Session
- func (sm *SessionMonitor) RecordError(id string)
- func (sm *SessionMonitor) StartSession(clientIP, userAgent string) *Session
- func (sm *SessionMonitor) Stats() map[string]interface{}
- func (sm *SessionMonitor) UpdateSession(id string, provider, model string, tokensIn, tokensOut int64)
type SessionStatus
type SmartFailoverHandler
- func NewSmartFailoverHandler(config *FailoverConfig, router *Router) *SmartFailoverHandler
- func (sfh *SmartFailoverHandler) ExecuteStreamingWithAnomalyDetection(ctx context.Context, provider *Provider, ...) (*http.Response, error)
- func (sfh *SmartFailoverHandler) ExecuteWithSmartFailover(ctx context.Context, provider *Provider, ...) (*http.Response, error)
- func (sfh *SmartFailoverHandler) GetClassifier() *APIErrorClassifier
- func (sfh *SmartFailoverHandler) GetMetrics() *FailoverMetrics
type StreamAnomaly
type StreamBuffer
- func NewStreamBuffer(detector *StreamingAnomalyDetector) *StreamBuffer
- func (sb *StreamBuffer) GetLastAnomaly() *StreamAnomaly
- func (sb *StreamBuffer) GetTotalBytes() int64
- func (sb *StreamBuffer) GetValidContent() []byte
- func (sb *StreamBuffer) HasAnomaly() bool
- func (sb *StreamBuffer) Reset()
- func (sb *StreamBuffer) Write(chunk []byte) (*StreamAnomaly, error)
type StreamChunkChoice
type StreamChunkDelta
type StreamChunkToolCall
type StreamChunkToolCallFunc
type StreamChunkUsage
type StreamRecoveryStrategy
- func GetRecoveryStrategy(anomaly *StreamAnomaly, config StreamingAnomalyConfig) *StreamRecoveryStrategy
type StreamingAnomalyConfig
- func DefaultStreamingAnomalyConfig() StreamingAnomalyConfig
type StreamingAnomalyDetector
- func NewStreamingAnomalyDetector(config StreamingAnomalyConfig) *StreamingAnomalyDetector
type TierResolver
- func NewTierResolver() *TierResolver
- func (tr *TierResolver) BestModelForTier(tier ModelTier) string
- func (tr *TierResolver) IsEnabled() bool
- func (tr *TierResolver) ModelTierOf(modelID string) ModelTier
- func (tr *TierResolver) Resolve(models []*providerpool.Model) bool
- func (tr *TierResolver) Stats() map[string]interface{}
type TieredModel
type ToggleState
type ToggleStore
- func NewToggleStore(kv kvstore.Store) *ToggleStore
- func (ts *ToggleStore) Load(ctx context.Context) (*ToggleState, error)
- func (ts *ToggleStore) Save(ctx context.Context, state *ToggleState) error
type Tool
type ToolCapLevel
type UpstreamRequestBridge
type UpstreamRequestBridgeContext
type WatcherConfig
- func DefaultWatcherConfig() *WatcherConfig

Constants ¶

View Source

const BackgroundTaskHeader = "X-Background-Task"

View Source

const DisableResponsesContinuationHeader = "X-Zima-Disable-Responses-Continuation"

DisableResponsesContinuationHeader tells proxy request shaping logic to strip previous_response_id before forwarding to Responses endpoints.

View Source

const ResponsesContinuationDisabledHeader = "X-Zima-Responses-Continuation-Disabled"

ResponsesContinuationDisabledHeader is set when proxy detects upstream does not honor previous_response_id continuation semantics for the routed provider.

View Source

const ResponsesPreviousIDHeader = "X-Zima-Previous-Response-ID"

ResponsesPreviousIDHeader exposes the latest Responses response.id observed by the proxy for this session/request.

View Source

const ResponsesUsedHeader = "X-Zima-Responses-Used"

ResponsesUsedHeader tells downstream callers the proxy actually used a Responses endpoint upstream for this request.

Variables ¶

View Source

var (
	// Port errors
	ErrPortAllocationFailed = errors.New("failed to allocate port")
	ErrPortRangeInvalid     = errors.New("invalid port range")
	ErrPortInUse            = errors.New("port already in use")

	// Provider errors
	ErrNoAvailableProvider = errors.New("no available provider")
	ErrAllProvidersFailed  = errors.New("all providers failed")
	ErrProviderNotFound    = errors.New("provider not found")
	ErrProviderDisabled    = errors.New("provider is disabled")

	// Request errors
	ErrRequestFailed  = errors.New("request failed")
	ErrRequestTimeout = errors.New("request timeout")
	ErrUpstreamError  = errors.New("upstream error")
	ErrInvalidRequest = errors.New("invalid request")

	// Circuit breaker errors
	ErrCircuitOpen = errors.New("circuit breaker is open")

	// Configuration errors
	ErrConfigInvalid  = errors.New("invalid configuration")
	ErrConfigNotFound = errors.New("configuration not found")

	// Model compatibility errors
	ErrToolCallingNotSupported = errors.New("tool calling not supported by model")
	ErrVisionNotSupported      = errors.New("vision not supported by model")
	ErrContextTooLong          = errors.New("context exceeds model limit")

	// Mock errors
	ErrMockEndpointNotFound = errors.New("mock endpoint not found")
	ErrMockDisabled         = errors.New("mock endpoints disabled")
)

Proxy errors

View Source

var (
	ModelAliases map[string][]string
)

ModelAliases maps model names to common alternatives for relay compatibility.

Functions ¶

func ApplyPromptCaching ¶

func ApplyPromptCaching(req *AnthropicRequest)

ApplyPromptCaching adds cache_control breakpoints to an Anthropic request. Breakpoints are placed on:

(1) the system prompt static block(s) — up to 2 blocks for static+config
(2) the last tool definition
(3) a turn-boundary message (4th-from-last) for long conversations

This enables Anthropic's prompt caching, which can save up to 90% on input token costs. Anthropic allows up to 4 cache breakpoints per request.

func BackgroundTaskFromContext ¶

func BackgroundTaskFromContext(ctx context.Context) bool

BackgroundTaskFromContext reports whether the request context is marked as a background/internal task.

func DisableModelRoutingFromContext ¶

func DisableModelRoutingFromContext(ctx context.Context) bool

DisableModelRoutingFromContext reports whether proxy-side model routing should be skipped for this request.

func DisableResponsesContinuationFromContext ¶

func DisableResponsesContinuationFromContext(ctx context.Context) bool

DisableResponsesContinuationFromContext reports whether continuation should be disabled.

func GetExcludedProviders ¶

func GetExcludedProviders(ctx context.Context) []string

GetExcludedProviders returns the excluded provider IDs from context.

func GetPinnedProvider ¶

func GetPinnedProvider(ctx context.Context) string

GetPinnedProvider returns the pinned provider ID from context, or "".

func InjectPromptCaching ¶

func InjectPromptCaching(body []byte) ([]byte, error)

InjectPromptCaching unmarshals an Anthropic request body, applies cache breakpoints, and re-marshals.

func IsContextWindowExceededMessage ¶

func IsContextWindowExceededMessage(msg string) bool

IsContextWindowExceededMessage returns true when an error message clearly indicates input/context overflow, including relay-wrapped variants that may be surfaced with a generic 5xx status code.

func LocaleFromContext ¶

func LocaleFromContext(ctx context.Context) string

LocaleFromContext returns locale from request context.

func LogTokenChurn ¶

func LogTokenChurn(provider, model, promptCacheKey string, inputTokens, cacheRead, cacheCreation int, observation *PromptCacheBreakObservation)

LogTokenChurn logs a per-request summary of token reuse vs churn.

func ReadPortFromFile ¶

func ReadPortFromFile(portFile string) (int, error)

ReadPortFromFile reads the proxy port from file

func SanitizeError ¶

func SanitizeError(err error) string

SanitizeError converts raw Go network/TLS errors into user-friendly messages, stripping internal URLs and technical details.

func SessionIDFromContext ¶

func SessionIDFromContext(ctx context.Context) string

SessionIDFromContext returns proxy session ID from request context.

func ValidateRoutingConfig ¶

func ValidateRoutingConfig(cfg *RoutingConfig) []error

ValidateRoutingConfig checks for common configuration errors.

func WithBackgroundTask ¶

func WithBackgroundTask(ctx context.Context) context.Context

WithBackgroundTask marks a request context as an internal/background task so the proxy can apply background-specific routing policies.

func WithDisableModelRouting ¶

func WithDisableModelRouting(ctx context.Context) context.Context

WithDisableModelRouting marks a request context so proxy-side model routing leaves an explicit model untouched for this request.

func WithDisableResponsesContinuation ¶

func WithDisableResponsesContinuation(ctx context.Context) context.Context

WithDisableResponsesContinuation marks a request context so bridge/proxy strips previous_response_id before forwarding to Responses endpoints.

func WithExcludedProviders ¶

func WithExcludedProviders(ctx context.Context, providerIDs ...string) context.Context

WithExcludedProviders returns a context carrying the providers that routing should skip for the current request.

func WithLocale ¶

func WithLocale(ctx context.Context, locale string) context.Context

WithLocale stores the request locale in context (e.g. "en-US", "zh-CN").

func WithPinnedProvider ¶

func WithPinnedProvider(ctx context.Context, providerID string) context.Context

WithPinnedProvider returns a context carrying a preferred provider ID. The proxy handler reads this to set RouteRequest.PreferredProviderID.

func WithResolvedRoute ¶

func WithResolvedRoute(ctx context.Context, rr *ResolvedRoute) context.Context

WithResolvedRoute returns a context carrying a ResolvedRoute pointer. After the proxy handler completes, the struct will be populated.

func WithSessionID ¶

func WithSessionID(ctx context.Context, sessionID string) context.Context

WithSessionID stores the proxy session ID in request context.

Types ¶

type APIErrorClassifier ¶

type APIErrorClassifier struct {
	// contains filtered or unexported fields
}

APIErrorClassifier classifies API errors from different providers

func NewAPIErrorClassifier ¶

func NewAPIErrorClassifier() *APIErrorClassifier

NewAPIErrorClassifier creates a new error classifier with default patterns

func (*APIErrorClassifier) AddProviderPatterns ¶

func (c *APIErrorClassifier) AddProviderPatterns(provider string, patterns []ErrorPattern)

AddProviderPatterns adds custom patterns for a provider

func (*APIErrorClassifier) ClassifyError ¶

func (c *APIErrorClassifier) ClassifyError(
	provider string,
	statusCode int,
	responseBody []byte,
) *ErrorClassification

ClassifyError analyzes response and returns error classification

type AnthropicCacheControl ¶

type AnthropicCacheControl struct {
	Type string `json:"type"` // "ephemeral"
}

AnthropicCacheControl is the cache_control block for Anthropic prompt caching.

type AnthropicContentBlock ¶

type AnthropicContentBlock struct {
	Type      string      `json:"type"`
	Text      string      `json:"text,omitempty"`
	ID        string      `json:"id,omitempty"`
	Name      string      `json:"name,omitempty"`
	Input     interface{} `json:"input,omitempty"`
	ToolUseID string      `json:"tool_use_id,omitempty"`
	Content   string      `json:"content,omitempty"`
	Source    *struct {
		Type      string `json:"type"`
		MediaType string `json:"media_type"`
		Data      string `json:"data"`
	} `json:"source,omitempty"`
	CacheControl *AnthropicCacheControl `json:"cache_control,omitempty"`
}

type AnthropicMessage ¶

type AnthropicMessage struct {
	Role    string      `json:"role"`
	Content interface{} `json:"content"` // string or []ContentBlock
}

type AnthropicResponse ¶

type AnthropicResponse struct {
	ID           string                  `json:"id"`
	Type         string                  `json:"type"`
	Role         string                  `json:"role"`
	Content      []AnthropicContentBlock `json:"content"`
	Model        string                  `json:"model"`
	StopReason   string                  `json:"stop_reason"`
	StopSequence string                  `json:"stop_sequence,omitempty"`
	Usage        struct {
		InputTokens  int `json:"input_tokens"`
		OutputTokens int `json:"output_tokens"`
	} `json:"usage"`
}

type AnthropicStreamContentBlock ¶

type AnthropicStreamContentBlock struct {
	Type string `json:"type"`
	Text string `json:"text,omitempty"`
	ID   string `json:"id,omitempty"`
	Name string `json:"name,omitempty"`
}

AnthropicStreamContentBlock is the content_block in a content_block_start event.

type AnthropicStreamDelta ¶

type AnthropicStreamDelta struct {
	Type        string `json:"type"`
	Text        string `json:"text,omitempty"`
	PartialJSON string `json:"partial_json,omitempty"`
	StopReason  string `json:"stop_reason,omitempty"`
}

AnthropicStreamDelta is the delta in content_block_delta / message_delta events.

type AnthropicStreamEvent ¶

type AnthropicStreamEvent struct {
	Type         string                       `json:"type"`
	Index        int                          `json:"index,omitempty"`
	ContentBlock *AnthropicStreamContentBlock `json:"content_block,omitempty"`
	Delta        *AnthropicStreamDelta        `json:"delta,omitempty"`
	Message      *AnthropicResponse           `json:"message,omitempty"`
	Usage        *AnthropicStreamUsage        `json:"usage,omitempty"`
}

type AnthropicStreamUsage ¶

type AnthropicStreamUsage struct {
	InputTokens              int `json:"input_tokens,omitempty"`
	OutputTokens             int `json:"output_tokens,omitempty"`
	CacheReadInputTokens     int `json:"cache_read_input_tokens,omitempty"`
	CacheCreationInputTokens int `json:"cache_creation_input_tokens,omitempty"`
}

AnthropicStreamUsage is the usage in message_start / message_delta events.

type AnthropicSystemBlock ¶

type AnthropicSystemBlock struct {
	Type         string                 `json:"type"`
	Text         string                 `json:"text"`
	CacheControl *AnthropicCacheControl `json:"cache_control,omitempty"`
}

AnthropicSystemBlock is a content block in the system prompt array (for prompt caching).

type AnthropicTool ¶

type AnthropicTool struct {
	Name         string                 `json:"name"`
	Description  string                 `json:"description,omitempty"`
	InputSchema  interface{}            `json:"input_schema"`
	CacheControl *AnthropicCacheControl `json:"cache_control,omitempty"`
}

type AuthConfig ¶

type AuthConfig struct {
	Enabled    bool     `json:"enabled"`
	Type       string   `json:"type"` // "none", "api_key", "bearer"
	APIKeys    []string `json:"api_keys"`
	AllowedIPs []string `json:"allowed_ips"`
	SkipPaths  []string `json:"skip_paths"`
	HeaderName string   `json:"header_name"`
}

AuthConfig holds authentication configuration

func DefaultAuthConfig ¶

func DefaultAuthConfig() *AuthConfig

DefaultAuthConfig returns default auth configuration

type AuthExhaustedError ¶

type AuthExhaustedError struct {
	ProviderID     string
	LastStatusCode int
	LastBody       string
}

AuthExhaustedError indicates all auth strategies failed for a provider.

func (*AuthExhaustedError) Error ¶

func (e *AuthExhaustedError) Error() string

type AuthProber ¶

type AuthProber struct {
	// contains filtered or unexported fields
}

AuthProber manages auth strategy probing and remembers what works.

func NewAuthProber ¶

func NewAuthProber() *AuthProber

NewAuthProber creates a new auth prober with 1-hour TTL memory.

func (*AuthProber) Apply ¶

func (ap *AuthProber) Apply(req *http.Request, strategy AuthStrategy, apiKey *providerpool.APIKey, provider *providerpool.Provider)

Apply sets the appropriate auth headers on the request for the strategy.

func (*AuthProber) Forget ¶

func (ap *AuthProber) Forget(providerID, memoryKey string)

Forget evicts the cached strategy (e.g. on provider config change).

func (*AuthProber) ProbeAndForward ¶

func (ap *AuthProber) ProbeAndForward(
	provider *providerpool.Provider,
	apiKey *providerpool.APIKey,
	effectiveFormat providerpool.APIFormat,
	buildRequest func() (*http.Request, error),
	doRequest func(*http.Request) (*http.Response, error),
) (*http.Response, error)

ProbeAndForward tries auth strategies in order until one succeeds (non-401/403). Returns the successful response, or an error if all strategies are exhausted. effectiveFormat is the API format being used (openai, anthropic, etc.) to determine auth strategy.

func (*AuthProber) Recall ¶

func (ap *AuthProber) Recall(providerID, memoryKey string) (AuthStrategy, bool)

Recall returns the cached winning strategy, if any.

func (*AuthProber) Remember ¶

func (ap *AuthProber) Remember(providerID, memoryKey string, strategy AuthStrategy)

Remember caches the winning auth strategy for a provider+endpoint key.

func (*AuthProber) Strategies ¶

func (ap *AuthProber) Strategies(provider *providerpool.Provider, apiKey *providerpool.APIKey, effectiveFormat providerpool.APIFormat) []AuthStrategy

Strategies returns an ordered list of auth strategies to try. The cached winner (if any) is placed first for zero-latency happy path. effectiveFormat is the API format being used for this request (may differ from provider.APIFormat).

type AuthStrategy ¶

type AuthStrategy int

AuthStrategy represents an authentication method to try against an upstream provider.

const (
	AuthBearer    AuthStrategy = iota // Authorization: Bearer <key>
	AuthXAPIKey                       // x-api-key: <key>
	AuthAnthropic                     // x-api-key + anthropic-version header
	AuthNone                          // No auth header
)

func (AuthStrategy) String ¶

func (s AuthStrategy) String() string

type Authenticator ¶

type Authenticator struct {
	// contains filtered or unexported fields
}

Authenticator handles authentication and rate limiting

func NewAuthenticator ¶

func NewAuthenticator(authConfig *AuthConfig, rateLimitConfig *RateLimitConfig) *Authenticator

NewAuthenticator creates a new authenticator

func (*Authenticator) AddAPIKey ¶

func (a *Authenticator) AddAPIKey(key string)

AddAPIKey adds an API key

func (*Authenticator) AddAllowedIP ¶

func (a *Authenticator) AddAllowedIP(ip string)

AddAllowedIP adds an allowed IP

func (*Authenticator) Authenticate ¶

func (a *Authenticator) Authenticate(r *http.Request) (bool, string)

Authenticate checks if a request is authenticated

func (*Authenticator) CheckRateLimit ¶

func (a *Authenticator) CheckRateLimit(r *http.Request) (bool, string)

CheckRateLimit checks if a request is within rate limits

func (*Authenticator) CleanupRateLimits ¶

func (a *Authenticator) CleanupRateLimits()

CleanupRateLimits removes stale rate limit entries

func (*Authenticator) ListAPIKeys ¶

func (a *Authenticator) ListAPIKeys() []map[string]interface{}

ListAPIKeys returns all API keys (masked for security)

func (*Authenticator) RemoveAPIKey ¶

func (a *Authenticator) RemoveAPIKey(key string)

RemoveAPIKey removes an API key

func (*Authenticator) Stats ¶

func (a *Authenticator) Stats() map[string]interface{}

Stats returns authenticator statistics

func (*Authenticator) ValidateAPIKey ¶

func (a *Authenticator) ValidateAPIKey(key string) bool

ValidateAPIKey checks if an API key is valid

type BreakerSnapshot ¶

type BreakerSnapshot struct {
	Name            string
	State           string
	Failures        int
	Successes       int
	LastFailureTime time.Time
	LastStateChange time.Time
}

BreakerSnapshot holds persisted circuit breaker state for a single provider.

type BufferPool ¶

type BufferPool struct {
	// contains filtered or unexported fields
}

BufferPool provides reusable byte buffers to reduce GC pressure

func NewBufferPool ¶

func NewBufferPool(config *PerformanceConfig) *BufferPool

NewBufferPool creates a new buffer pool

func (*BufferPool) Get ¶

func (bp *BufferPool) Get() *bytes.Buffer

Get retrieves a buffer from the pool

func (*BufferPool) Put ¶

func (bp *BufferPool) Put(buf *bytes.Buffer)

Put returns a buffer to the pool

func (*BufferPool) Stats ¶

func (bp *BufferPool) Stats() map[string]interface{}

Stats returns buffer pool statistics

type CacheEntry ¶

type CacheEntry struct {
	Key        string
	Value      []byte
	StatusCode int
	Headers    map[string]string
	CreatedAt  time.Time
	ExpiresAt  time.Time
	HitCount   int64
}

CacheEntry represents a cached response

type ChatMessage ¶

type ChatMessage struct {
	Role    string      `json:"role"`
	Content interface{} `json:"content"`
}

ChatMessage represents a chat message

type ChatRequest ¶

type ChatRequest struct {
	Model       string        `json:"model"`
	Messages    []ChatMessage `json:"messages"`
	System      string        `json:"system,omitempty"`
	Tools       []Tool        `json:"tools,omitempty"`
	Stream      bool          `json:"stream,omitempty"`
	MaxTokens   int           `json:"max_tokens,omitempty"`
	Temperature float64       `json:"temperature,omitempty"`
}

ChatRequest represents a chat completion request

func (*ChatRequest) HasImages ¶

func (r *ChatRequest) HasImages() bool

HasImages checks if request contains images

func (*ChatRequest) TokenCount ¶

func (r *ChatRequest) TokenCount() int

TokenCount estimates token count (simplified)

type ConfigReloader ¶

type ConfigReloader struct {
	// contains filtered or unexported fields
}

ConfigReloader handles applying configuration changes

func NewConfigReloader ¶

func NewConfigReloader(
	router *Router,
	failover *FailoverHandler,
	guard *PromptGuard,
	auth *Authenticator,
	mock *MockHandler,
	compat *ModelCompatLayer,
) *ConfigReloader

NewConfigReloader creates a new config reloader

func (*ConfigReloader) ApplyConfig ¶

func (cr *ConfigReloader) ApplyConfig(config *ProxyConfig) error

ApplyConfig applies a new configuration

type ConfigWatcher ¶

type ConfigWatcher struct {
	OnChange func(oldConfig, newConfig *ProxyConfig)
	OnError  func(error)
	// contains filtered or unexported fields
}

ConfigWatcher watches for configuration changes

func NewConfigWatcher ¶

func NewConfigWatcher(configPath string, pollInterval time.Duration) *ConfigWatcher

NewConfigWatcher creates a new config watcher

func (*ConfigWatcher) ForceReload ¶

func (cw *ConfigWatcher) ForceReload() error

ForceReload forces a configuration reload

func (*ConfigWatcher) GetConfigPath ¶

func (cw *ConfigWatcher) GetConfigPath() string

GetConfigPath returns the config file path

func (*ConfigWatcher) SetConfigPath ¶

func (cw *ConfigWatcher) SetConfigPath(path string)

SetConfigPath sets a new config file path

func (*ConfigWatcher) Start ¶

func (cw *ConfigWatcher) Start()

Start starts watching for config changes

func (*ConfigWatcher) Stats ¶

func (cw *ConfigWatcher) Stats() map[string]interface{}

Stats returns watcher statistics

func (*ConfigWatcher) Stop ¶

func (cw *ConfigWatcher) Stop()

Stop stops the config watcher

type ConnWarmup ¶

type ConnWarmup struct {
	// contains filtered or unexported fields
}

ConnWarmup pre-establishes TCP+TLS connections to provider endpoints so the first real request doesn't pay the handshake cost.

func NewConnWarmup ¶

func NewConnWarmup(pool *ConnectionPool) *ConnWarmup

NewConnWarmup creates a new connection warmup manager.

func (*ConnWarmup) DNSCache ¶

func (cw *ConnWarmup) DNSCache() *DNSCache

DNSCache returns the DNS cache for use by the transport dialer.

func (*ConnWarmup) WarmOne ¶

func (cw *ConnWarmup) WarmOne(baseURL string)

WarmOne pre-establishes a TCP+TLS connection to a single provider base URL. Safe to call from warmup paths to pre-warm a likely provider.

func (*ConnWarmup) WarmProviders ¶

func (cw *ConnWarmup) WarmProviders(baseURLs []string)

WarmProviders pre-connects to all provider base URLs concurrently. Called once at startup after providers are loaded.

type ConnectionConfig ¶

type ConnectionConfig struct {
	MaxIdleConns          int           `json:"max_idle_conns" yaml:"max_idle_conns"`
	MaxIdleConnsPerHost   int           `json:"max_idle_conns_per_host" yaml:"max_idle_conns_per_host"`
	MaxConnsPerHost       int           `json:"max_conns_per_host" yaml:"max_conns_per_host"`
	IdleConnTimeout       time.Duration `json:"idle_conn_timeout" yaml:"idle_conn_timeout"`
	KeepAlive             bool          `json:"keep_alive" yaml:"keep_alive"`
	KeepAliveInterval     time.Duration `json:"keep_alive_interval" yaml:"keep_alive_interval"`
	DialTimeout           time.Duration `json:"dial_timeout" yaml:"dial_timeout"`
	TLSHandshakeTimeout   time.Duration `json:"tls_handshake_timeout" yaml:"tls_handshake_timeout"`
	ResponseHeaderTimeout time.Duration `json:"response_header_timeout" yaml:"response_header_timeout"`
	ForceHTTP2            bool          `json:"force_http2" yaml:"force_http2"`
}

ConnectionConfig connection pool configuration

func DefaultConnectionConfig ¶

func DefaultConnectionConfig() *ConnectionConfig

DefaultConnectionConfig returns default connection configuration

type ConnectionMetrics ¶

type ConnectionMetrics struct {
	// contains filtered or unexported fields
}

ConnectionMetrics tracks connection pool performance

func NewConnectionMetrics ¶

func NewConnectionMetrics(config *PerformanceConfig) *ConnectionMetrics

NewConnectionMetrics creates a new connection metrics tracker

func (*ConnectionMetrics) RecordConnectionClosed ¶

func (cm *ConnectionMetrics) RecordConnectionClosed()

RecordConnectionClosed records a closed connection

func (*ConnectionMetrics) RecordConnectionError ¶

func (cm *ConnectionMetrics) RecordConnectionError()

RecordConnectionError records a connection error

func (*ConnectionMetrics) RecordConnectionOpened ¶

func (cm *ConnectionMetrics) RecordConnectionOpened()

RecordConnectionOpened records a new connection

func (*ConnectionMetrics) RecordRequest ¶

func (cm *ConnectionMetrics) RecordRequest(provider string)

RecordRequest records a request start

func (*ConnectionMetrics) RecordRequestComplete ¶

func (cm *ConnectionMetrics) RecordRequestComplete(provider string, success bool, latencyNs int64, reused bool)

RecordRequestComplete records a request completion

func (*ConnectionMetrics) Reset ¶

func (cm *ConnectionMetrics) Reset()

Reset resets all metrics

func (*ConnectionMetrics) Stats ¶

func (cm *ConnectionMetrics) Stats() map[string]interface{}

Stats returns connection metrics

type ConnectionPool ¶

type ConnectionPool struct {
	// contains filtered or unexported fields
}

ConnectionPool manages HTTP connections to upstream providers

func NewConnectionPool ¶

func NewConnectionPool(config *ConnectionConfig) *ConnectionPool

NewConnectionPool creates a new connection pool

func (*ConnectionPool) Close ¶

func (cp *ConnectionPool) Close()

Close closes all idle connections

func (*ConnectionPool) CloseIdleConnectionsForProfile ¶

func (cp *ConnectionPool) CloseIdleConnectionsForProfile(profile ConnectionProfile)

CloseIdleConnectionsForProfile closes idle connections for both secure and insecure transports of a profile.

func (*ConnectionPool) GetClient ¶

func (cp *ConnectionPool) GetClient(provider string, profiles ...ConnectionProfile) *http.Client

GetClient returns an HTTP client for the given provider

func (*ConnectionPool) GetInsecureClient ¶

func (cp *ConnectionPool) GetInsecureClient(provider string, profiles ...ConnectionProfile) *http.Client

GetInsecureClient returns an HTTP client that skips TLS verification

func (*ConnectionPool) GetTransport ¶

func (cp *ConnectionPool) GetTransport() *http.Transport

GetTransport returns the underlying transport

func (*ConnectionPool) GetTransportForProfile ¶

func (cp *ConnectionPool) GetTransportForProfile(profile ConnectionProfile) *http.Transport

GetTransportForProfile returns the transport for the given profile.

func (*ConnectionPool) Stats ¶

func (cp *ConnectionPool) Stats() map[string]interface{}

Stats returns connection pool statistics

type ConnectionProfile ¶

type ConnectionProfile string

const (
	ConnectionProfileLong  ConnectionProfile = "long"
	ConnectionProfileProbe ConnectionProfile = "probe"
)

type DNSCache ¶

type DNSCache struct {
	// contains filtered or unexported fields
}

DNSCache caches DNS lookups to avoid repeated resolution on the hot path.

func NewDNSCache ¶

func NewDNSCache(ttl time.Duration, maxSize int) *DNSCache

NewDNSCache creates a DNS cache with the given TTL and max entries.

func (*DNSCache) DialContext ¶

func (dc *DNSCache) DialContext(dialer *net.Dialer) func(ctx context.Context, network, addr string) (net.Conn, error)

DialContext is a drop-in replacement for net.Dialer.DialContext that uses cached DNS.

func (*DNSCache) Resolve ¶

func (dc *DNSCache) Resolve(host string) ([]string, error)

Resolve looks up a hostname, returning cached results when available.

type DataMasker ¶

type DataMasker struct {
	// contains filtered or unexported fields
}

DataMasker handles data masking (stub implementation)

func NewDataMasker ¶

func NewDataMasker(config *MaskingConfig) *DataMasker

NewDataMasker creates a new data masker

func (*DataMasker) AddRule ¶

func (dm *DataMasker) AddRule(rule *MaskingRule) error

AddRule adds a masking rule

func (*DataMasker) GetRule ¶

func (dm *DataMasker) GetRule(id string) (*MaskingRule, bool)

GetRule returns a masking rule by ID

func (*DataMasker) IsEnabled ¶

func (dm *DataMasker) IsEnabled() bool

IsEnabled returns whether masking is enabled

func (*DataMasker) ListRules ¶

func (dm *DataMasker) ListRules() []*MaskingRule

ListRules returns all masking rules

func (*DataMasker) Mask ¶

func (dm *DataMasker) Mask(content string, direction MaskingDirection) string

Mask masks sensitive data based on direction

func (*DataMasker) MaskBytes ¶

func (dm *DataMasker) MaskBytes(content []byte, direction MaskingDirection) []byte

MaskBytes masks sensitive data in byte content, avoiding []byte→string→[]byte round-trips. Uses regexp.ReplaceAll which operates on []byte directly.

func (*DataMasker) MaskRequest ¶

func (dm *DataMasker) MaskRequest(content string) string

MaskRequest masks sensitive data in request

func (*DataMasker) MaskRequestBytes ¶

func (dm *DataMasker) MaskRequestBytes(content []byte) []byte

MaskRequestBytes masks sensitive data in request body bytes.

func (*DataMasker) MaskResponse ¶

func (dm *DataMasker) MaskResponse(content string) string

MaskResponse masks sensitive data in response

func (*DataMasker) MaskResponseBytes ¶

func (dm *DataMasker) MaskResponseBytes(content []byte) []byte

MaskResponseBytes masks sensitive data in response body bytes.

func (*DataMasker) RemoveRule ¶

func (dm *DataMasker) RemoveRule(id string) bool

RemoveRule removes a masking rule

func (*DataMasker) ResetStats ¶

func (dm *DataMasker) ResetStats()

ResetStats resets mask counters

func (*DataMasker) SetEnabled ¶

func (dm *DataMasker) SetEnabled(enabled bool)

SetEnabled enables or disables masking globally

func (*DataMasker) SetLocaleFunc ¶

func (dm *DataMasker) SetLocaleFunc(f func() string)

SetLocaleFunc sets the function used to resolve the current locale for i18n replacement labels.

func (*DataMasker) SetRuleEnabled ¶

func (dm *DataMasker) SetRuleEnabled(id string, enabled bool) bool

SetRuleEnabled enables or disables a rule

func (*DataMasker) Stats ¶

func (dm *DataMasker) Stats() map[string]interface{}

Stats returns masker statistics

type ErrorCategory ¶

type ErrorCategory string

ErrorCategory classifies API errors for failover decisions

const (
	// ErrorCategoryNonRetryable - do not failover or retry
	ErrorCategoryNonRetryable ErrorCategory = "non_retryable"

	// ErrorCategoryRetryable - retry with same provider (transient errors)
	ErrorCategoryRetryable ErrorCategory = "retryable"

	// ErrorCategoryFailover - retry with different provider (provider-specific limits)
	ErrorCategoryFailover ErrorCategory = "failover"

	// ErrorCategoryStreamAnomaly - force stop and recover
	ErrorCategoryStreamAnomaly ErrorCategory = "stream_anomaly"
)

type ErrorClassification ¶

type ErrorClassification struct {
	Type                   RetryableErrorType `json:"type"`
	Category               ErrorCategory      `json:"category"`
	Message                string             `json:"message"`
	Retryable              bool               `json:"retryable"`
	ShouldFailover         bool               `json:"should_failover"`
	SuggestedContextWindow int                `json:"suggested_context_window,omitempty"`
	RetryAfter             time.Duration      `json:"retry_after,omitempty"`
	OriginalStatusCode     int                `json:"original_status_code"`
}

ErrorClassification result

type ErrorClassificationConfig ¶

type ErrorClassificationConfig struct {
	Enabled         bool     `json:"enabled" yaml:"enabled"`
	FailoverErrors  []string `json:"failover_errors" yaml:"failover_errors"`   // Error types that trigger failover
	RetryableErrors []string `json:"retryable_errors" yaml:"retryable_errors"` // Error types that trigger retry
}

ErrorClassificationConfig configuration for error classification

type ErrorPattern ¶

type ErrorPattern struct {
	Type            RetryableErrorType
	Category        ErrorCategory
	StatusCodes     []int
	MessagePatterns []*regexp.Regexp
}

ErrorPattern defines a pattern to match API errors

type FailoverAPIHandler ¶

type FailoverAPIHandler struct {
	// contains filtered or unexported fields
}

FailoverAPIHandler provides HTTP handlers for failover management

func NewFailoverAPIHandler ¶

func NewFailoverAPIHandler(smartFailover *SmartFailoverHandler, config *FailoverConfig) *FailoverAPIHandler

NewFailoverAPIHandler creates a new failover API handler

func (*FailoverAPIHandler) GetCircuitBreakerStatus ¶

func (h *FailoverAPIHandler) GetCircuitBreakerStatus(c echo.Context) error

GetCircuitBreakerStatus returns status of all circuit breakers GET /api/v1/proxy/failover/breakers

func (*FailoverAPIHandler) GetConfig ¶

func (h *FailoverAPIHandler) GetConfig(c echo.Context) error

GetConfig returns failover configuration GET /api/v1/proxy/failover/config

func (*FailoverAPIHandler) GetMetrics ¶

func (h *FailoverAPIHandler) GetMetrics(c echo.Context) error

GetMetrics returns failover metrics GET /api/v1/proxy/failover/metrics

func (*FailoverAPIHandler) GetOverview ¶

func (h *FailoverAPIHandler) GetOverview(c echo.Context) error

GetOverview returns aggregated failover state for sparse dashboard/status surfaces. GET /api/v1/proxy/failover/overview

func (*FailoverAPIHandler) RegisterRoutes ¶

func (h *FailoverAPIHandler) RegisterRoutes(g *echo.Group)

RegisterRoutes registers failover API routes

func (*FailoverAPIHandler) ResetCircuitBreakers ¶

func (h *FailoverAPIHandler) ResetCircuitBreakers(c echo.Context) error

ResetCircuitBreakers resets all circuit breakers POST /api/v1/proxy/failover/reset

func (*FailoverAPIHandler) ServeHTTP ¶

func (h *FailoverAPIHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)

ServeHTTP implements http.Handler for standalone use

func (*FailoverAPIHandler) SetOnConfigSave ¶

func (h *FailoverAPIHandler) SetOnConfigSave(fn func(*FailoverConfig) error)

SetOnConfigSave sets a callback invoked after config updates.

func (*FailoverAPIHandler) SetOnProviderRaceChange ¶

func (h *FailoverAPIHandler) SetOnProviderRaceChange(fn func(ProviderRaceConfig))

SetOnProviderRaceChange sets a callback for provider-race runtime updates.

func (*FailoverAPIHandler) SetProviderRaceStatsProvider ¶

func (h *FailoverAPIHandler) SetProviderRaceStatsProvider(fn func() ProviderRaceStatsSnapshot)

SetProviderRaceStatsProvider sets a callback for provider-race overview stats.

func (*FailoverAPIHandler) UpdateConfig ¶

func (h *FailoverAPIHandler) UpdateConfig(c echo.Context) error

UpdateConfig updates failover configuration. PUT /api/v1/proxy/failover/config

type FailoverCircuitBreakerStatusResponse ¶

type FailoverCircuitBreakerStatusResponse struct {
	State       string `json:"state"`
	Failures    int    `json:"failures"`
	LastFailure string `json:"last_failure,omitempty"`
}

type FailoverConfig ¶

type FailoverConfig struct {
	Enabled          bool          `json:"enabled" yaml:"enabled"`
	MaxRetries       int           `json:"max_retries" yaml:"max_retries"`
	RetryDelay       time.Duration `json:"retry_delay" yaml:"retry_delay"`
	CircuitBreaker   bool          `json:"circuit_breaker" yaml:"circuit_breaker"`
	FailureThreshold int           `json:"failure_threshold" yaml:"failure_threshold"`
	RecoveryTimeout  time.Duration `json:"recovery_timeout" yaml:"recovery_timeout"`

	// Transient error (502/503) circuit breaker overrides — shorter recovery for temporary blips
	TransientRecoveryTimeout time.Duration `json:"transient_recovery_timeout" yaml:"transient_recovery_timeout"` // 0 = use RecoveryTimeout

	// Smart failover settings
	ErrorClassification   ErrorClassificationConfig `json:"error_classification" yaml:"error_classification"`
	StreamingAnomaly      StreamingAnomalyConfig    `json:"streaming_anomaly" yaml:"streaming_anomaly"`
	ContextWindowCheck    bool                      `json:"context_window_check" yaml:"context_window_check"`       // Skip providers with insufficient context window
	QuotaCooldown         time.Duration             `json:"quota_cooldown" yaml:"quota_cooldown"`                   // Skip recently-errored providers for quota errors (0 = disabled)
	ContextWindowOverride map[string]int            `json:"context_window_override" yaml:"context_window_override"` // Provider name -> max context tokens override
	ProviderRace          ProviderRaceConfig        `json:"provider_race" yaml:"provider_race"`                     // Multi-provider concurrent race
}

FailoverConfig failover configuration

type FailoverHandler ¶

type FailoverHandler struct {
	// contains filtered or unexported fields
}

FailoverHandler handles request failover

func NewFailoverHandler ¶

func NewFailoverHandler(config *FailoverConfig, router *Router) *FailoverHandler

NewFailoverHandler creates a new failover handler

func (*FailoverHandler) Config ¶

func (fh *FailoverHandler) Config() *FailoverConfig

Config returns the failover configuration pointer.

func (*FailoverHandler) Execute ¶

func (fh *FailoverHandler) Execute(
	ctx context.Context,
	provider *Provider,
	fn func(*Provider) (*http.Response, error),
) (*http.Response, error)

Execute executes request with failover support

func (*FailoverHandler) GetBreakerState ¶

func (fh *FailoverHandler) GetBreakerState(name string) string

GetBreakerState returns the circuit breaker state for a provider

func (*FailoverHandler) GetBreakerStats ¶

func (fh *FailoverHandler) GetBreakerStats() map[string]interface{}

GetBreakerStats returns circuit breaker statistics

func (*FailoverHandler) LoadBreakerState ¶

func (fh *FailoverHandler) LoadBreakerState(snap BreakerSnapshot)

LoadBreakerState restores a circuit breaker's state from persisted data. Creates the breaker if it doesn't exist yet.

func (*FailoverHandler) ResetAllBreakers ¶

func (fh *FailoverHandler) ResetAllBreakers()

ResetAllBreakers resets all circuit breakers

func (*FailoverHandler) ResetBreaker ¶

func (fh *FailoverHandler) ResetBreaker(name string)

ResetBreaker resets a circuit breaker

func (*FailoverHandler) SnapshotBreakers ¶

func (fh *FailoverHandler) SnapshotBreakers() []BreakerSnapshot

SnapshotBreakers returns a snapshot of all circuit breaker states for persistence.

type FailoverMetrics ¶

type FailoverMetrics struct {

	// Error counts by type
	ErrorsByType map[RetryableErrorType]int64 `json:"errors_by_type"`

	// Failover counts
	FailoverTotal   int64 `json:"failover_total"`
	FailoverSuccess int64 `json:"failover_success"`
	FailoverFailure int64 `json:"failover_failure"`

	// Provider-specific stats
	ProviderErrors    map[string]map[RetryableErrorType]int64 `json:"provider_errors"`
	ProviderFailovers map[string]int64                        `json:"provider_failovers"`

	// Streaming anomaly stats
	StreamAnomalies int64 `json:"stream_anomalies"`
	// contains filtered or unexported fields
}

FailoverMetrics tracks failover statistics

func NewFailoverMetrics ¶

func NewFailoverMetrics() *FailoverMetrics

NewFailoverMetrics creates new metrics tracker

func (*FailoverMetrics) GetStats ¶

func (m *FailoverMetrics) GetStats() map[string]interface{}

GetStats returns a copy of current stats

func (*FailoverMetrics) RecordError ¶

func (m *FailoverMetrics) RecordError(provider string, classification *ErrorClassification)

RecordError records an error occurrence

func (*FailoverMetrics) RecordFailover ¶

func (m *FailoverMetrics) RecordFailover(fromProvider, toProvider string, success bool)

RecordFailover records a failover event

func (*FailoverMetrics) RecordProviderPoolResult ¶

func (m *FailoverMetrics) RecordProviderPoolResult(result *providerpool.FailoverResult)

RecordProviderPoolResult maps ProviderPool failover callbacks into smart failover metrics.

func (*FailoverMetrics) RecordStreamAnomaly ¶

func (m *FailoverMetrics) RecordStreamAnomaly()

RecordStreamAnomaly records a streaming anomaly

type FailoverMetricsResponse ¶

type FailoverMetricsResponse struct {
	ErrorsByType      map[string]int64            `json:"errors_by_type"`
	FailoverTotal     int64                       `json:"failover_total"`
	FailoverSuccess   int64                       `json:"failover_success"`
	FailoverFailure   int64                       `json:"failover_failure"`
	ProviderErrors    map[string]map[string]int64 `json:"provider_errors"`
	ProviderFailovers map[string]int64            `json:"provider_failovers"`
	StreamAnomalies   int64                       `json:"stream_anomalies"`
}

FailoverMetricsResponse is the response for metrics endpoint

type FailoverOverviewResponse ¶

type FailoverOverviewResponse struct {
	Metrics         FailoverMetricsResponse                         `json:"metrics"`
	Config          FailoverConfig                                  `json:"config"`
	ProviderRace    ProviderRaceStatsSnapshot                       `json:"provider_race"`
	CircuitBreakers map[string]FailoverCircuitBreakerStatusResponse `json:"circuit_breakers"`
}

type FailoverSnapshot ¶

type FailoverSnapshot struct {
	Total    int64               `json:"total"`
	Success  int64               `json:"success"`
	Failure  int64               `json:"failure"`
	ByReason map[string]int64    `json:"by_reason"`
	Recent   []*failoverLogEntry `json:"recent"`
}

FailoverSnapshot contains failover statistics.

type FormatConverter ¶

type FormatConverter struct{}

FormatConverter handles conversion between OpenAI and Anthropic API formats

func NewFormatConverter ¶

func NewFormatConverter() *FormatConverter

NewFormatConverter creates a new format converter

func (*FormatConverter) ConvertModelsResponse ¶

func (fc *FormatConverter) ConvertModelsResponse(body []byte, sourceType ProviderType) ([]byte, error)

ConvertModelsResponse converts provider models list to OpenAI format

func (*FormatConverter) ConvertRequest ¶

func (fc *FormatConverter) ConvertRequest(body []byte, targetType ProviderType) ([]byte, string, error)

ConvertRequest converts OpenAI request to target provider format

func (*FormatConverter) ConvertRequestWithCaching ¶

func (fc *FormatConverter) ConvertRequestWithCaching(body []byte, targetType ProviderType, promptCacheEnabled bool) ([]byte, string, error)

ConvertRequestWithCaching converts OpenAI request to target provider format and optionally applies prompt caching in a single unmarshal/marshal pass. This eliminates the double unmarshal/marshal that ConvertRequest + InjectPromptCaching does.

func (*FormatConverter) ConvertResponse ¶

func (fc *FormatConverter) ConvertResponse(body []byte, sourceType ProviderType) ([]byte, error)

ConvertResponse converts provider response to OpenAI format

func (*FormatConverter) ConvertStreamingResponse ¶

func (fc *FormatConverter) ConvertStreamingResponse(reader io.Reader, sourceType ProviderType, writer http.ResponseWriter) error

ConvertStreamingResponse creates a streaming response converter

func (*FormatConverter) DetectProviderType ¶

func (fc *FormatConverter) DetectProviderType(endpoint string) ProviderType

DetectProviderType detects the provider type from endpoint URL

type GeminiCandidate ¶

type GeminiCandidate struct {
	Content       GeminiContent `json:"content"`
	FinishReason  string        `json:"finishReason,omitempty"`
	SafetyRatings []struct {
		Category    string `json:"category"`
		Probability string `json:"probability"`
	} `json:"safetyRatings,omitempty"`
}

type GeminiContent ¶

type GeminiContent struct {
	Role  string       `json:"role,omitempty"`
	Parts []GeminiPart `json:"parts"`
}

type GeminiFunctionCall ¶

type GeminiFunctionCall struct {
	Name string                 `json:"name"`
	Args map[string]interface{} `json:"args"`
}

type GeminiFunctionDeclaration ¶

type GeminiFunctionDeclaration struct {
	Name        string      `json:"name"`
	Description string      `json:"description,omitempty"`
	Parameters  interface{} `json:"parameters,omitempty"`
}

type GeminiFunctionResponse ¶

type GeminiFunctionResponse struct {
	Name     string                 `json:"name"`
	Response map[string]interface{} `json:"response"`
}

type GeminiGenerationConfig ¶

type GeminiGenerationConfig struct {
	Temperature     float64  `json:"temperature,omitempty"`
	TopP            float64  `json:"topP,omitempty"`
	TopK            int      `json:"topK,omitempty"`
	MaxOutputTokens int      `json:"maxOutputTokens,omitempty"`
	StopSequences   []string `json:"stopSequences,omitempty"`
}

type GeminiInlineData ¶

type GeminiInlineData struct {
	MimeType string `json:"mimeType"`
	Data     string `json:"data"`
}

type GeminiModel ¶

type GeminiModel struct {
	Name                       string   `json:"name"`
	Version                    string   `json:"version,omitempty"`
	DisplayName                string   `json:"displayName,omitempty"`
	Description                string   `json:"description,omitempty"`
	InputTokenLimit            int      `json:"inputTokenLimit,omitempty"`
	OutputTokenLimit           int      `json:"outputTokenLimit,omitempty"`
	SupportedGenerationMethods []string `json:"supportedGenerationMethods,omitempty"`
}

type GeminiModelsResponse ¶

type GeminiModelsResponse struct {
	Models []GeminiModel `json:"models"`
}

GeminiModelsResponse represents Gemini models list response

type GeminiPart ¶

type GeminiPart struct {
	Text             string                  `json:"text,omitempty"`
	InlineData       *GeminiInlineData       `json:"inlineData,omitempty"`
	FunctionCall     *GeminiFunctionCall     `json:"functionCall,omitempty"`
	FunctionResponse *GeminiFunctionResponse `json:"functionResponse,omitempty"`
}

type GeminiRequest ¶

type GeminiRequest struct {
	Contents          []GeminiContent         `json:"contents"`
	SystemInstruction *GeminiContent          `json:"systemInstruction,omitempty"`
	GenerationConfig  *GeminiGenerationConfig `json:"generationConfig,omitempty"`
	Tools             []GeminiTool            `json:"tools,omitempty"`
	SafetySettings    []GeminiSafetySetting   `json:"safetySettings,omitempty"`
}

Gemini request/response types

type GeminiResponse ¶

type GeminiResponse struct {
	Candidates    []GeminiCandidate    `json:"candidates"`
	UsageMetadata *GeminiUsageMetadata `json:"usageMetadata,omitempty"`
	ModelVersion  string               `json:"modelVersion,omitempty"`
}

type GeminiSafetySetting ¶

type GeminiSafetySetting struct {
	Category  string `json:"category"`
	Threshold string `json:"threshold"`
}

type GeminiStreamChunk ¶

type GeminiStreamChunk struct {
	Candidates    []GeminiCandidate    `json:"candidates,omitempty"`
	UsageMetadata *GeminiUsageMetadata `json:"usageMetadata,omitempty"`
}

type GeminiTool ¶

type GeminiTool struct {
	FunctionDeclarations []GeminiFunctionDeclaration `json:"functionDeclarations,omitempty"`
}

type GeminiUsageMetadata ¶

type GeminiUsageMetadata struct {
	PromptTokenCount     int `json:"promptTokenCount"`
	CandidatesTokenCount int `json:"candidatesTokenCount"`
	TotalTokenCount      int `json:"totalTokenCount"`
}

type GuardConfig ¶

type GuardConfig struct {
	Enabled           bool     `json:"enabled"`
	BlockOnDetection  bool     `json:"block_on_detection"`
	LogDetections     bool     `json:"log_detections"`
	CustomPatterns    []string `json:"custom_patterns"`
	WhitelistPatterns []string `json:"whitelist_patterns"`
	MaxPromptLength   int      `json:"max_prompt_length"`
}

GuardConfig holds prompt guard configuration

func DefaultGuardConfig ¶

func DefaultGuardConfig() *GuardConfig

DefaultGuardConfig returns default guard configuration

type GuardResult ¶

type GuardResult struct {
	Blocked     bool     `json:"blocked"`
	Reason      string   `json:"reason,omitempty"`
	RiskLevel   string   `json:"risk_level"`
	Matches     []string `json:"matches,omitempty"`
	Suggestions []string `json:"suggestions,omitempty"`
}

GuardResult represents the result of a prompt guard check

type GuardRule ¶

type GuardRule struct {
	ID          string `json:"id"`
	Name        string `json:"name"`
	Pattern     string `json:"pattern"`
	Description string `json:"description,omitempty"`
	Enabled     bool   `json:"enabled"`
	RiskLevel   string `json:"risk_level"` // low, medium, high
	Action      string `json:"action"`     // log, warn, block
}

GuardRule represents a custom guard rule

type HealthCheckConfig ¶

type HealthCheckConfig struct {
	Enabled  bool          `json:"enabled" yaml:"enabled"`
	Interval time.Duration `json:"interval" yaml:"interval"`
	Timeout  time.Duration `json:"timeout" yaml:"timeout"`
}

HealthCheckConfig health check configuration

type HealthChecker ¶

type HealthChecker struct {
	// contains filtered or unexported fields
}

HealthChecker performs health checks on providers

func NewHealthChecker ¶

func NewHealthChecker(router *Router, connPool *ConnectionPool, config *HealthCheckConfig) *HealthChecker

NewHealthChecker creates a new health checker

func (*HealthChecker) CheckNow ¶

func (hc *HealthChecker) CheckNow()

CheckNow performs an immediate health check on all providers

func (*HealthChecker) CheckProvider ¶

func (hc *HealthChecker) CheckProvider(name string) error

CheckProvider performs an immediate health check on a specific provider

func (*HealthChecker) Start ¶

func (hc *HealthChecker) Start()

Start starts the health check loop

func (*HealthChecker) Stop ¶

func (hc *HealthChecker) Stop()

Stop stops the health checker

type HotReloadableConfig ¶

type HotReloadableConfig struct {
	// Can be hot-reloaded
	Providers   []*ProviderConfig  `json:"providers"`
	Failover    *FailoverConfig    `json:"failover"`
	Guard       *GuardConfig       `json:"guard"`
	Auth        *AuthConfig        `json:"auth"`
	RateLimit   *RateLimitConfig   `json:"rate_limit"`
	Mock        *MockConfig        `json:"mock"`
	ModelCompat *ModelCompatConfig `json:"model_compat"`
}

HotReloadableConfig defines which config sections can be hot-reloaded

type MaskingCategory ¶

type MaskingCategory string

MaskingCategory reserved masking categories

const (
	MaskingPII         MaskingCategory = "pii"         // Personal Identifiable Information
	MaskingCredentials MaskingCategory = "credentials" // API keys, passwords, tokens
	MaskingFinancial   MaskingCategory = "financial"   // Credit card numbers, bank accounts
	MaskingCustom      MaskingCategory = "custom"      // User-defined patterns
)

type MaskingConfig ¶

type MaskingConfig struct {
	Enabled bool           `json:"enabled"`
	Rules   []*MaskingRule `json:"rules"`
	OnMask  func(ruleID, original, masked string)
}

MaskingConfig data masking configuration

func DefaultMaskingConfig ¶

func DefaultMaskingConfig() *MaskingConfig

DefaultMaskingConfig returns default masking configuration

type MaskingDirection ¶

type MaskingDirection string

MaskingDirection specifies when to apply masking

const (
	MaskingRequest  MaskingDirection = "request"
	MaskingResponse MaskingDirection = "response"
	MaskingBoth     MaskingDirection = "both"
)

type MaskingRule ¶

type MaskingRule struct {
	ID          string           `json:"id"`
	Name        string           `json:"name"`
	Category    MaskingCategory  `json:"category"`
	Pattern     string           `json:"pattern"`     // Regex pattern
	Replacement string           `json:"replacement"` // e.g., "[REDACTED]", "***"
	Direction   MaskingDirection `json:"direction"`   // request, response, both
	Enabled     bool             `json:"enabled"`
}

MaskingRule defines what to mask

func GetDefaultRules ¶

func GetDefaultRules() []*MaskingRule

GetDefaultRules returns predefined masking rules. Replacement text uses {MASKED} placeholder — resolved at runtime via DataMasker.maskLabel().

type MetricsBucket ¶

type MetricsBucket struct {
	StartTime      time.Time `json:"start_time"`
	EndTime        time.Time `json:"end_time"`
	RequestCount   int64     `json:"request_count"`
	SuccessCount   int64     `json:"success_count"`
	ErrorCount     int64     `json:"error_count"`
	TotalLatency   int64     `json:"total_latency_ms"`
	TotalTTFT      int64     `json:"total_ttft_ms"`
	TotalTokensIn  int64     `json:"total_tokens_in"`
	TotalTokensOut int64     `json:"total_tokens_out"`
	TotalBytes     int64     `json:"total_bytes"`
}

MetricsBucket holds aggregated metrics for a time period

type MetricsCollector ¶

type MetricsCollector struct {
	// contains filtered or unexported fields
}

MetricsCollector collects and aggregates usage metrics

func NewMetricsCollector ¶

func NewMetricsCollector(config *MetricsConfig) *MetricsCollector

NewMetricsCollector creates a new metrics collector

func (*MetricsCollector) GetAllProviderMetrics ¶

func (mc *MetricsCollector) GetAllProviderMetrics() map[string]*ProviderMetrics

GetAllProviderMetrics returns metrics for all providers

func (*MetricsCollector) GetProviderMetrics ¶

func (mc *MetricsCollector) GetProviderMetrics(name string) (*ProviderMetrics, bool)

GetProviderMetrics returns metrics for a specific provider

func (*MetricsCollector) GetRecentRequests ¶

func (mc *MetricsCollector) GetRecentRequests(limit int) []RequestMetrics

GetRecentRequests returns recent request metrics

func (*MetricsCollector) GetTimeSeries ¶

func (mc *MetricsCollector) GetTimeSeries(start, end time.Time) []*MetricsBucket

GetTimeSeries returns time-series data for a time range

func (*MetricsCollector) LatencyStats ¶

func (mc *MetricsCollector) LatencyStats() map[string]interface{}

LatencyStats returns detailed latency statistics

func (*MetricsCollector) Record ¶

func (mc *MetricsCollector) Record(m RequestMetrics)

Record records a request metric

func (*MetricsCollector) Reset ¶

func (mc *MetricsCollector) Reset()

Reset resets all metrics

func (*MetricsCollector) Summary ¶

func (mc *MetricsCollector) Summary() map[string]interface{}

Summary returns a summary of all metrics

type MetricsConfig ¶

type MetricsConfig struct {
	Enabled         bool          `json:"enabled"`
	RetentionPeriod time.Duration `json:"retention_period"`
	BucketSize      time.Duration `json:"bucket_size"`
}

MetricsConfig holds metrics configuration

func DefaultMetricsConfig ¶

func DefaultMetricsConfig() *MetricsConfig

DefaultMetricsConfig returns default metrics configuration

type MiddlewareFunc ¶

type MiddlewareFunc func(http.Handler) http.Handler

MiddlewareFunc is a function that wraps an http.Handler

type MockConfig ¶

type MockConfig struct {
	Enabled   bool            `json:"enabled"`
	Endpoints []*MockEndpoint `json:"endpoints"`
}

MockConfig mock endpoint configuration

func DefaultMockConfig ¶

func DefaultMockConfig() *MockConfig

DefaultMockConfig returns default mock configuration

type MockEndpoint ¶

type MockEndpoint struct {
	Path       string        `json:"path"`
	Method     string        `json:"method"`
	Response   interface{}   `json:"response"`
	StatusCode int           `json:"status_code"`
	Delay      time.Duration `json:"delay"`
	Enabled    bool          `json:"enabled"`
	Headers    http.Header   `json:"headers,omitempty"`
}

MockEndpoint defines a mock endpoint

type MockHandler ¶

type MockHandler struct {
	// contains filtered or unexported fields
}

MockHandler handles mock endpoints

func NewMockHandler ¶

func NewMockHandler(config *MockConfig) *MockHandler

NewMockHandler creates a new mock handler

func (*MockHandler) AddEndpoint ¶

func (mh *MockHandler) AddEndpoint(ep *MockEndpoint)

AddEndpoint adds a mock endpoint

func (*MockHandler) GetEndpoint ¶

func (mh *MockHandler) GetEndpoint(path, method string) (*MockEndpoint, bool)

GetEndpoint returns a mock endpoint

func (*MockHandler) Handle ¶

func (mh *MockHandler) Handle(w http.ResponseWriter, r *http.Request) bool

Handle checks if request matches a mock endpoint

func (*MockHandler) IsEnabled ¶

func (mh *MockHandler) IsEnabled() bool

IsEnabled returns whether mock handling is enabled

func (*MockHandler) ListEndpoints ¶

func (mh *MockHandler) ListEndpoints() []*MockEndpoint

ListEndpoints returns all mock endpoints

func (*MockHandler) RemoveEndpoint ¶

func (mh *MockHandler) RemoveEndpoint(path, method string) bool

RemoveEndpoint removes a mock endpoint

func (*MockHandler) ResetStats ¶

func (mh *MockHandler) ResetStats()

ResetStats resets hit counters

func (*MockHandler) SetEnabled ¶

func (mh *MockHandler) SetEnabled(path, method string, enabled bool) bool

SetEnabled enables or disables a mock endpoint

func (*MockHandler) SetGlobalEnabled ¶

func (mh *MockHandler) SetGlobalEnabled(enabled bool)

SetGlobalEnabled enables or disables all mock endpoints

func (*MockHandler) Stats ¶

func (mh *MockHandler) Stats() map[string]interface{}

Stats returns mock handler statistics

type ModelCompatConfig ¶

type ModelCompatConfig struct {
	AutoDetect       bool                      `json:"auto_detect"`
	DetectionCache   time.Duration             `json:"detection_cache"`
	ModelOverrides   map[string]*ModelFeatures `json:"model_overrides"`
	ToolCallFallback string                    `json:"tool_call_fallback"` // error, prompt, skip
}

ModelCompatConfig model compatibility configuration

func DefaultModelCompatConfig ¶

func DefaultModelCompatConfig() *ModelCompatConfig

DefaultModelCompatConfig returns default model compatibility config

type ModelCompatLayer ¶

type ModelCompatLayer struct {
	// contains filtered or unexported fields
}

ModelCompatLayer handles model compatibility

func NewModelCompatLayer ¶

func NewModelCompatLayer(config *ModelCompatConfig) *ModelCompatLayer

NewModelCompatLayer creates a new compatibility layer

func (*ModelCompatLayer) AdaptRequest ¶

func (mcl *ModelCompatLayer) AdaptRequest(model string, req *ChatRequest) (*ChatRequest, error)

AdaptRequest adapts request for model compatibility

func (*ModelCompatLayer) GetAllFeatures ¶

func (mcl *ModelCompatLayer) GetAllFeatures() map[string]*ModelFeatures

GetAllFeatures returns all model features as a map

func (*ModelCompatLayer) GetFeatures ¶

func (mcl *ModelCompatLayer) GetFeatures(model string) *ModelFeatures

GetFeatures returns features for a model

func (*ModelCompatLayer) ListModels ¶

func (mcl *ModelCompatLayer) ListModels() []*ModelFeatures

ListModels returns all known models

func (*ModelCompatLayer) SetFeatures ¶

func (mcl *ModelCompatLayer) SetFeatures(model string, features *ModelFeatures)

SetFeatures sets features for a model

func (*ModelCompatLayer) Stats ¶

func (mcl *ModelCompatLayer) Stats() map[string]interface{}

Stats returns compatibility layer statistics

type ModelFamily ¶

type ModelFamily struct {
	Name     string   `json:"name" yaml:"name"`         // e.g., "claude-3", "gpt-4", "gemini-pro"
	Patterns []string `json:"patterns" yaml:"patterns"` // Regex patterns to match model IDs
	Provider string   `json:"provider" yaml:"provider"` // Target provider for this family
	Fallback string   `json:"fallback" yaml:"fallback"` // Fallback model for background tasks
}

ModelFamily represents a model series/family

type ModelFeatures ¶

type ModelFeatures struct {
	Model            string `json:"model"`
	ToolCalling      bool   `json:"tool_calling"`
	Vision           bool   `json:"vision"`
	Streaming        bool   `json:"streaming"`
	SystemPrompt     bool   `json:"system_prompt"`
	MaxContextTokens int    `json:"max_context_tokens"`
	MaxOutputTokens  int    `json:"max_output_tokens"`
}

ModelFeatures describes model capabilities

type ModelOrigin ¶

type ModelOrigin string

ModelOrigin indicates where a model runs.

const (
	OriginCloud ModelOrigin = "cloud"
	OriginEdge  ModelOrigin = "edge"
	OriginLocal ModelOrigin = "local"
)

type ModelPricing ¶

type ModelPricing struct {
	Input  float64 `yaml:"input" json:"input"`
	Output float64 `yaml:"output" json:"output"`
}

ModelPricing holds per-model token pricing (USD per 1M tokens).

type ModelRoute ¶

type ModelRoute struct {
	OriginalModel string `json:"original_model"`
	TargetModel   string `json:"target_model"`
	Provider      string `json:"provider"`
	Family        string `json:"family,omitempty"`
	RuleApplied   string `json:"rule_applied,omitempty"`
	Downgraded    bool   `json:"downgraded,omitempty"`
}

ModelRoute represents the routing decision

type ModelRouter ¶

type ModelRouter struct {
	// contains filtered or unexported fields
}

ModelRouter handles intelligent model-based routing

func NewModelRouter ¶

func NewModelRouter(config *ModelRouterConfig) (*ModelRouter, error)

NewModelRouter creates a new model router

func (*ModelRouter) AddRule ¶

func (mr *ModelRouter) AddRule(rule *RegexRule) error

AddRule adds a new regex rule

func (*ModelRouter) GetFamilies ¶

func (mr *ModelRouter) GetFamilies() []*ModelFamily

GetFamilies returns all configured model families

func (*ModelRouter) GetRules ¶

func (mr *ModelRouter) GetRules() []*RegexRule

GetRules returns all configured regex rules

func (*ModelRouter) IsBackgroundRequest ¶

func (mr *ModelRouter) IsBackgroundRequest(r *http.Request) bool

IsBackgroundRequest detects if request is a background task (e.g., title generation)

func (*ModelRouter) RemoveRule ¶

func (mr *ModelRouter) RemoveRule(pattern string) bool

RemoveRule removes a regex rule by pattern

func (*ModelRouter) RouteModel ¶

func (mr *ModelRouter) RouteModel(requestedModel string, isBackground bool) (*ModelRoute, error)

RouteModel determines the target provider and model for a request

func (*ModelRouter) SetTierResolver ¶

func (mr *ModelRouter) SetTierResolver(tr *TierResolver)

SetTierResolver sets the tier resolver for dynamic background downgrade.

func (*ModelRouter) Stats ¶

func (mr *ModelRouter) Stats() map[string]interface{}

Stats returns model router statistics

type ModelRouterConfig ¶

type ModelRouterConfig struct {
	Enabled          bool           `json:"enabled" yaml:"enabled"`
	Families         []*ModelFamily `json:"families" yaml:"families"`
	BackgroundModels []string       `json:"background_models" yaml:"background_models"` // Models for background tasks
	DefaultFamily    string         `json:"default_family" yaml:"default_family"`
	RegexCustomRules []*RegexRule   `json:"regex_rules" yaml:"regex_rules"` // Expert-level regex rules
}

ModelRouterConfig configuration for model routing

func DefaultModelRouterConfig ¶

func DefaultModelRouterConfig() *ModelRouterConfig

DefaultModelRouterConfig returns default model router configuration. Families and BackgroundModels are intentionally empty — the TierResolver dynamically handles model classification based on actual pricing data. Users can still add custom RegexCustomRules for explicit overrides.

type ModelTier ¶

type ModelTier string

ModelTier indicates the routing tier of a model.

const (
	// Current tiers
	TierLarge ModelTier = "large" // primary LLM tier
	TierSmall ModelTier = "small" // built-in small-model tier
)

type OAuthTokenProvider ¶

type OAuthTokenProvider interface {
	GetAccessToken(providerID string) (string, error)
	GetCopilotAccessToken(providerID string) (string, error)
	GetCopilotEndpoint() string
}

OAuthTokenProvider provides OAuth access tokens for providers.

type OpenAIChatRequest ¶

type OpenAIChatRequest struct {
	Model            string                 `json:"model"`
	Messages         []OpenAIMessage        `json:"messages"`
	MaxTokens        int                    `json:"max_tokens,omitempty"`
	Temperature      float64                `json:"temperature,omitempty"`
	TopP             float64                `json:"top_p,omitempty"`
	Stream           bool                   `json:"stream,omitempty"`
	Stop             []string               `json:"stop,omitempty"`
	PresencePenalty  float64                `json:"presence_penalty,omitempty"`
	FrequencyPenalty float64                `json:"frequency_penalty,omitempty"`
	Tools            []OpenAITool           `json:"tools,omitempty"`
	ToolChoice       interface{}            `json:"tool_choice,omitempty"`
	ResponseFormat   map[string]interface{} `json:"response_format,omitempty"`
}

OpenAI request/response types

type OpenAIChatResponse ¶

type OpenAIChatResponse struct {
	ID      string `json:"id"`
	Object  string `json:"object"`
	Created int64  `json:"created"`
	Model   string `json:"model"`
	Choices []struct {
		Index        int           `json:"index"`
		Message      OpenAIMessage `json:"message"`
		FinishReason string        `json:"finish_reason"`
	} `json:"choices"`
	Usage struct {
		PromptTokens     int `json:"prompt_tokens"`
		CompletionTokens int `json:"completion_tokens"`
		TotalTokens      int `json:"total_tokens"`
	} `json:"usage"`
}

type OpenAIMessage ¶

type OpenAIMessage struct {
	Role       string           `json:"role"`
	Content    interface{}      `json:"content"` // string or []ContentPart
	Name       string           `json:"name,omitempty"`
	ToolCalls  []OpenAIToolCall `json:"tool_calls,omitempty"`
	ToolCallID string           `json:"tool_call_id,omitempty"`
}

type OpenAIStreamChunk ¶

type OpenAIStreamChunk struct {
	ID      string              `json:"id"`
	Object  string              `json:"object"`
	Created int64               `json:"created,omitempty"`
	Model   string              `json:"model,omitempty"`
	Choices []StreamChunkChoice `json:"choices"`
	Usage   *StreamChunkUsage   `json:"usage,omitempty"`
}

type OpenAITool ¶

type OpenAITool struct {
	Type     string             `json:"type"`
	Function OpenAIToolFunction `json:"function"`
	Strict   bool               `json:"strict,omitempty"`
}

type OpenAIToolCall ¶

type OpenAIToolCall struct {
	ID       string             `json:"id"`
	CallID   string             `json:"call_id,omitempty"`
	Type     string             `json:"type"`
	Function OpenAIToolCallFunc `json:"function"`
}

type OpenAIToolCallFunc ¶

type OpenAIToolCallFunc struct {
	Name      string `json:"name"`
	Arguments string `json:"arguments"`
}

OpenAIToolCallFunc is the function part of an OpenAI tool call. Arguments is normally a JSON string, but some providers send it as a JSON object.

func (*OpenAIToolCallFunc) UnmarshalJSON ¶

func (f *OpenAIToolCallFunc) UnmarshalJSON(data []byte) error

UnmarshalJSON handles Arguments being either a JSON string or a JSON object.

type OpenAIToolFunction ¶

type OpenAIToolFunction struct {
	Name        string      `json:"name"`
	Description string      `json:"description,omitempty"`
	Parameters  interface{} `json:"parameters,omitempty"`
}

type OriginRegistry ¶

type OriginRegistry struct {
	// contains filtered or unexported fields
}

OriginRegistry resolves model IDs to their origin (cloud/edge/local) using glob patterns with an LRU cache for repeated lookups.

func NewOriginRegistry ¶

func NewOriginRegistry(patterns map[string]string) *OriginRegistry

NewOriginRegistry creates a registry from a map of glob→origin strings.

func (*OriginRegistry) Resolve ¶

func (r *OriginRegistry) Resolve(model string) ModelOrigin

Resolve returns the origin for a model ID. Defaults to OriginCloud. Results are cached after first lookup.

type PerformanceConfig ¶

type PerformanceConfig struct {
	// Buffer pool settings
	BufferPoolEnabled bool `json:"buffer_pool_enabled"`
	BufferPoolSize    int  `json:"buffer_pool_size"`    // Number of buffers
	BufferInitialSize int  `json:"buffer_initial_size"` // Initial buffer size in bytes
	BufferMaxSize     int  `json:"buffer_max_size"`     // Max buffer size in bytes

	// Response cache settings
	CacheEnabled         bool          `json:"cache_enabled"`
	CacheMaxSize         int           `json:"cache_max_size"`       // Max cache entries
	CacheMaxEntrySize    int           `json:"cache_max_entry_size"` // Max size per entry in bytes
	CacheTTL             time.Duration `json:"cache_ttl"`
	CacheableStatusCodes []int         `json:"cacheable_status_codes"`

	// Connection metrics
	MetricsEnabled  bool          `json:"metrics_enabled"`
	MetricsInterval time.Duration `json:"metrics_interval"`
}

PerformanceConfig holds performance optimization settings

func DefaultPerformanceConfig ¶

func DefaultPerformanceConfig() *PerformanceConfig

DefaultPerformanceConfig returns default performance configuration

type PerformanceManager ¶

type PerformanceManager struct {
	// contains filtered or unexported fields
}

PerformanceManager coordinates all performance optimizations

func NewPerformanceManager ¶

func NewPerformanceManager(config *PerformanceConfig) *PerformanceManager

NewPerformanceManager creates a new performance manager

func (*PerformanceManager) GetBufferPool ¶

func (pm *PerformanceManager) GetBufferPool() *BufferPool

GetBufferPool returns the buffer pool

func (*PerformanceManager) GetCache ¶

func (pm *PerformanceManager) GetCache() *ResponseCache

GetCache returns the response cache

func (*PerformanceManager) GetConnectionMetrics ¶

func (pm *PerformanceManager) GetConnectionMetrics() *ConnectionMetrics

GetConnectionMetrics returns the connection metrics

func (*PerformanceManager) Stats ¶

func (pm *PerformanceManager) Stats() map[string]interface{}

Stats returns all performance statistics

type Pipeline ¶

type Pipeline struct {
	// contains filtered or unexported fields
}

Pipeline represents a request processing pipeline

func NewPipeline ¶

func NewPipeline(config *PipelineConfig) *Pipeline

NewPipeline creates a new request pipeline

func (*Pipeline) GetAuth ¶

func (p *Pipeline) GetAuth() *Authenticator

GetAuth returns the authenticator

func (*Pipeline) GetGuard ¶

func (p *Pipeline) GetGuard() *PromptGuard

GetGuard returns the prompt guard

func (*Pipeline) GetMetrics ¶

func (p *Pipeline) GetMetrics() *MetricsCollector

GetMetrics returns the metrics collector

func (*Pipeline) GetSessionMonitor ¶

func (p *Pipeline) GetSessionMonitor() *SessionMonitor

GetSessionMonitor returns the session monitor

func (*Pipeline) Stats ¶

func (p *Pipeline) Stats() map[string]interface{}

Stats returns pipeline statistics

func (*Pipeline) Use ¶

func (p *Pipeline) Use(mw MiddlewareFunc)

Use adds a middleware to the pipeline

func (*Pipeline) Wrap ¶

func (p *Pipeline) Wrap(handler http.Handler) http.Handler

Wrap wraps a handler with the pipeline middlewares

type PipelineConfig ¶

type PipelineConfig struct {
	AuthConfig      *AuthConfig      `json:"auth"`
	RateLimitConfig *RateLimitConfig `json:"rate_limit"`
	GuardConfig     *GuardConfig     `json:"guard"`
	SessionConfig   *SessionConfig   `json:"session"`
	MetricsConfig   *MetricsConfig   `json:"metrics"`
}

PipelineConfig holds pipeline configuration

func DefaultPipelineConfig ¶

func DefaultPipelineConfig() *PipelineConfig

DefaultPipelineConfig returns default pipeline configuration

type PipelineSnapshot ¶

type PipelineSnapshot struct {
	Routing  RoutingStatsSnapshot `json:"routing"`
	Failover FailoverSnapshot     `json:"failover"`
}

PipelineSnapshot is the combined stats snapshot returned by the API.

type PipelineStatsCollector ¶

type PipelineStatsCollector struct {
	// contains filtered or unexported fields
}

PipelineStatsCollector collects and batch-persists proxy pipeline statistics (routing, failover) asynchronously.

func NewPipelineStatsCollector ¶

func NewPipelineStatsCollector(db *sql.DB, routingStats *RoutingStats) *PipelineStatsCollector

NewPipelineStatsCollector creates a new collector. Call Start() to begin background persistence.

func NewPipelineStatsCollectorWithReadDB ¶

func NewPipelineStatsCollectorWithReadDB(writeDB, readDB *sql.DB, routingStats *RoutingStats) *PipelineStatsCollector

NewPipelineStatsCollectorWithReadDB creates a new collector with separate write and read database handles.

func (*PipelineStatsCollector) Close ¶

func (c *PipelineStatsCollector) Close() error

Close implements io.Closer for lifecycle shutdown hooks.

func (*PipelineStatsCollector) LoadBreakerState ¶

func (c *PipelineStatsCollector) LoadBreakerState()

LoadBreakerState restores circuit breaker state from DB. Must be called after SetFailoverHandler.

func (*PipelineStatsCollector) LoadSmartMetrics ¶

func (c *PipelineStatsCollector) LoadSmartMetrics()

LoadSmartMetrics restores smart failover metrics from DB. Must be called after SetSmartFailoverMetrics.

func (*PipelineStatsCollector) OnFailover ¶

func (c *PipelineStatsCollector) OnFailover(result *providerpool.FailoverResult)

func (*PipelineStatsCollector) SetFailoverHandler ¶

func (c *PipelineStatsCollector) SetFailoverHandler(fh *FailoverHandler)

SetFailoverHandler sets the failover handler reference for circuit breaker persistence.

func (*PipelineStatsCollector) SetSmartFailoverMetrics ¶

func (c *PipelineStatsCollector) SetSmartFailoverMetrics(m *FailoverMetrics)

SetSmartFailoverMetrics sets the smart failover metrics reference for persistence.

func (*PipelineStatsCollector) Snapshot ¶

func (c *PipelineStatsCollector) Snapshot() PipelineSnapshot

Snapshot returns a combined pipeline stats snapshot.

func (*PipelineStatsCollector) Start ¶

func (c *PipelineStatsCollector) Start()

Start begins the background flush goroutine.

func (*PipelineStatsCollector) Stop ¶

func (c *PipelineStatsCollector) Stop()

Stop flushes remaining data and stops the background goroutine.

type PortAllocator ¶

type PortAllocator struct {
	// contains filtered or unexported fields
}

PortAllocator handles dynamic port allocation

func NewPortAllocator ¶

func NewPortAllocator(config *PortConfig) *PortAllocator

NewPortAllocator creates a new port allocator

func (*PortAllocator) Allocate ¶

func (pa *PortAllocator) Allocate() (int, error)

Allocate finds and binds to an available port

func (*PortAllocator) GetBindAddress ¶

func (pa *PortAllocator) GetBindAddress() string

GetBindAddress returns the bind address with port

func (*PortAllocator) GetEndpoint ¶

func (pa *PortAllocator) GetEndpoint() string

GetEndpoint returns the full proxy endpoint URL

func (*PortAllocator) GetPort ¶

func (pa *PortAllocator) GetPort() int

GetPort returns the currently allocated port

func (*PortAllocator) Release ¶

func (pa *PortAllocator) Release() error

Release releases the allocated port

type PortConfig ¶

type PortConfig struct {
	Value       int    `json:"value" yaml:"value"`               // 0 = dynamic allocation
	Range       string `json:"range" yaml:"range"`               // e.g., "9000-9100"
	BindAddress string `json:"bind_address" yaml:"bind_address"` // Default: "127.0.0.1"
	PortFile    string `json:"port_file" yaml:"port_file"`       // Write allocated port to file
}

PortConfig port configuration

type PromptCacheBreakDetector ¶

type PromptCacheBreakDetector struct {
	// contains filtered or unexported fields
}

func NewPromptCacheBreakDetector ¶

func NewPromptCacheBreakDetector(maxTracked int) *PromptCacheBreakDetector

func (*PromptCacheBreakDetector) Observe ¶

func (d *PromptCacheBreakDetector) Observe(snapshot PromptCacheStateSnapshot, cacheReadTokens int) *PromptCacheBreakObservation

type PromptCacheBreakObservation ¶

type PromptCacheBreakObservation struct {
	Reasons []string
	Summary string
}

type PromptCacheSnapshot ¶

type PromptCacheSnapshot struct {
	Requests             int64   `json:"requests"`
	CacheHits            int64   `json:"cache_hits"`
	CacheMisses          int64   `json:"cache_misses"`
	TotalCacheReadTokens int64   `json:"total_cache_read_tokens"`
	TotalCacheCreation   int64   `json:"total_cache_creation_tokens"`
	TotalInputTokens     int64   `json:"total_input_tokens"`
	HitRate              float64 `json:"hit_rate"`
	ReuseRatio           float64 `json:"reuse_ratio"`
}

PromptCacheSnapshot is a JSON-serializable snapshot of cache stats.

type PromptCacheStateSnapshot ¶

type PromptCacheStateSnapshot struct {
	TrackingKey      string
	Provider         string
	Model            string
	Path             string
	SystemHash       string
	CacheControlHash string
	ToolsHash        string
	ExtraBodyHash    string
	SystemBlockCount int
	ToolCount        int
	ToolNames        []string
	PerToolHashes    map[string]string
}

func BuildPromptCacheStateSnapshot ¶

func BuildPromptCacheStateSnapshot(body []byte, provider, fallbackModel, path string) PromptCacheStateSnapshot

type PromptCacheStats ¶

type PromptCacheStats struct {
	// Requests is the total number of requests processed.
	Requests atomic.Int64
	// CacheHits is the number of requests where cache_read_input_tokens > 0.
	CacheHits atomic.Int64
	// CacheMisses is the number of requests where cache_read_input_tokens == 0.
	CacheMisses atomic.Int64
	// TotalCacheReadTokens is the cumulative cache_read_input_tokens.
	TotalCacheReadTokens atomic.Int64
	// TotalCacheCreationTokens is the cumulative cache_creation_input_tokens.
	TotalCacheCreationTokens atomic.Int64
	// TotalInputTokens is the cumulative input tokens (for computing reuse ratio).
	TotalInputTokens atomic.Int64
}

PromptCacheStats tracks prompt caching effectiveness. All fields are updated atomically and safe for concurrent use.

func (*PromptCacheStats) HitRate ¶

func (s *PromptCacheStats) HitRate() float64

HitRate returns the cache hit rate as a float64 [0, 1].

func (*PromptCacheStats) Record ¶

func (s *PromptCacheStats) Record(inputTokens, cacheReadTokens, cacheCreationTokens int)

Record records a single request's cache usage.

func (*PromptCacheStats) ReuseRatio ¶

func (s *PromptCacheStats) ReuseRatio() float64

ReuseRatio returns the fraction of input tokens served from cache.

func (*PromptCacheStats) Snapshot ¶

func (s *PromptCacheStats) Snapshot() PromptCacheSnapshot

Snapshot returns a copy of the current stats for reporting.

type PromptGuard ¶

type PromptGuard struct {
	// contains filtered or unexported fields
}

PromptGuard detects potential prompt injection attacks

func NewPromptGuard ¶

func NewPromptGuard(config *GuardConfig) *PromptGuard

NewPromptGuard creates a new prompt guard

func (*PromptGuard) AddPattern ¶

func (pg *PromptGuard) AddPattern(pattern string) error

AddPattern adds a custom detection pattern

func (*PromptGuard) AddRule ¶

func (pg *PromptGuard) AddRule(rule GuardRule) error

AddRule adds a custom guard rule

func (*PromptGuard) AddWhitelistPattern ¶

func (pg *PromptGuard) AddWhitelistPattern(pattern string) error

AddWhitelistPattern adds a whitelist pattern

func (*PromptGuard) Check ¶

func (pg *PromptGuard) Check(prompt string) *GuardResult

Check analyzes a prompt for potential injection attacks

func (*PromptGuard) GetRules ¶

func (pg *PromptGuard) GetRules() []GuardRule

GetRules returns all custom guard rules

func (*PromptGuard) Stats ¶

func (pg *PromptGuard) Stats() map[string]interface{}

Stats returns guard statistics

type Provider ¶

type Provider struct {
	Config      *ProviderConfig
	Healthy     bool
	LastCheck   time.Time
	LastError   error
	LastLatency time.Duration
}

Provider represents an upstream provider

type ProviderConfig ¶

type ProviderConfig struct {
	Name          string `json:"name" yaml:"name"`
	Endpoint      string `json:"endpoint" yaml:"endpoint"`
	APIKey        string `json:"api_key" yaml:"api_key"`
	Priority      int    `json:"priority" yaml:"priority"`
	Weight        int    `json:"weight" yaml:"weight"`
	Enabled       bool   `json:"enabled" yaml:"enabled"`
	HealthCheck   string `json:"health_check" yaml:"health_check"`
	SkipTLSVerify bool   `json:"skip_tls_verify" yaml:"skip_tls_verify"`
}

ProviderConfig provider configuration

type ProviderConnectionMetrics ¶

type ProviderConnectionMetrics struct {
	Requests     int64
	Successes    int64
	Failures     int64
	TotalLatency int64
	Reused       int64
}

ProviderConnectionMetrics tracks per-provider connection stats

type ProviderMemory ¶

type ProviderMemory struct {
	// contains filtered or unexported fields
}

ProviderMemory remembers provider capabilities discovered at runtime. Uses a single ecache2 with uint64 keys (FNV-1a hashes) for zero-alloc lookups.

func NewProviderMemory ¶

func NewProviderMemory() *ProviderMemory

NewProviderMemory creates a new provider memory with sensible TTLs.

func (*ProviderMemory) BlacklistModel ¶

func (pm *ProviderMemory) BlacklistModel(providerID, baseURL, model string)

BlacklistModel marks a model as unavailable on this provider (skip it next time).

func (*ProviderMemory) ClearModelBlacklist ¶

func (pm *ProviderMemory) ClearModelBlacklist(providerID, baseURL, model string)

ClearModelBlacklist removes a model from the blacklist (user can retry after quota recharge, etc).

func (*ProviderMemory) ClearThrottle ¶

func (pm *ProviderMemory) ClearThrottle(providerID, baseURL string)

ClearThrottle removes throttle restriction (user can retry after service recovery).

func (*ProviderMemory) ForgetFormat ¶

func (pm *ProviderMemory) ForgetFormat(providerID, baseURL string)

ForgetFormat evicts the cached format.

func (*ProviderMemory) ForgetModelAlias ¶

func (pm *ProviderMemory) ForgetModelAlias(providerID, baseURL, requestedModel string)

ForgetModelAlias evicts a cached model alias.

func (*ProviderMemory) ForgetModelFormat ¶

func (pm *ProviderMemory) ForgetModelFormat(providerID, baseURL, requestedModel string)

ForgetModelFormat evicts the cached model-scoped format.

func (*ProviderMemory) ForgetThrottle ¶

func (pm *ProviderMemory) ForgetThrottle(providerID, baseURL string)

ForgetThrottle clears the throttle for a provider.

func (*ProviderMemory) ForgetToolCap ¶

func (pm *ProviderMemory) ForgetToolCap(providerID, baseURL string)

ForgetToolCap evicts the cached tool capability.

func (*ProviderMemory) GetRestrictions ¶

func (pm *ProviderMemory) GetRestrictions(providerID, baseURL string) map[string]interface{}

GetRestrictions returns all current restrictions (throttles + blacklists) for a provider. Used to show users what's temporarily restricted.

func (*ProviderMemory) IsModelBlacklisted ¶

func (pm *ProviderMemory) IsModelBlacklisted(providerID, baseURL, model string) bool

IsModelBlacklisted returns true if a model is known to be unavailable on this provider.

func (*ProviderMemory) IsThrottled ¶

func (pm *ProviderMemory) IsThrottled(providerID, baseURL string) bool

IsThrottled returns true if the provider is currently in a backoff period.

func (*ProviderMemory) RecallFormat ¶

func (pm *ProviderMemory) RecallFormat(providerID, baseURL string) (string, bool)

RecallFormat returns the remembered API format for a provider (as a raw string).

func (*ProviderMemory) RecallModelAlias ¶

func (pm *ProviderMemory) RecallModelAlias(providerID, baseURL, requestedModel string) (string, bool)

RecallModelAlias returns the actual model name that worked for a requested model.

func (*ProviderMemory) RecallModelFormat ¶

func (pm *ProviderMemory) RecallModelFormat(providerID, baseURL, requestedModel string) (string, bool)

RecallModelFormat returns the remembered API format for a specific provider+baseURL+requested model.

func (*ProviderMemory) RecallToolCap ¶

func (pm *ProviderMemory) RecallToolCap(providerID, baseURL string) (ToolCapLevel, bool)

RecallToolCap returns the remembered tool capability level.

func (*ProviderMemory) RememberFormat ¶

func (pm *ProviderMemory) RememberFormat(providerID, baseURL string, format string)

RememberFormat caches the API format that worked for a provider.

func (*ProviderMemory) RememberModelAlias ¶

func (pm *ProviderMemory) RememberModelAlias(providerID, baseURL, requestedModel, actualModel string)

RememberModelAlias caches a model name mapping that worked.

func (*ProviderMemory) RememberModelFormat ¶

func (pm *ProviderMemory) RememberModelFormat(providerID, baseURL, requestedModel, format string)

RememberModelFormat caches the API format that worked for a specific requested model.

func (*ProviderMemory) RememberThrottle ¶

func (pm *ProviderMemory) RememberThrottle(providerID, baseURL string, retryAfter time.Duration)

RememberThrottle records a 429 backoff for a provider.

func (*ProviderMemory) RememberToolCap ¶

func (pm *ProviderMemory) RememberToolCap(providerID, baseURL string, level ToolCapLevel)

RememberToolCap caches the tool capability level that worked.

func (*ProviderMemory) ThrottleRemaining ¶

func (pm *ProviderMemory) ThrottleRemaining(providerID, baseURL string) time.Duration

ThrottleRemaining returns how long the provider should continue backing off.

func (*ProviderMemory) ThrottleUntil ¶

func (pm *ProviderMemory) ThrottleUntil(providerID, baseURL string) (time.Time, bool)

ThrottleUntil returns the time until which a provider remains throttled.

type ProviderMetrics ¶

type ProviderMetrics struct {
	Name           string  `json:"name"`
	RequestCount   int64   `json:"request_count"`
	SuccessCount   int64   `json:"success_count"`
	ErrorCount     int64   `json:"error_count"`
	TotalLatency   int64   `json:"total_latency_ms"`
	AvgLatency     float64 `json:"avg_latency_ms"`
	TotalTTFT      int64   `json:"total_ttft_ms"`
	AvgTTFT        float64 `json:"avg_ttft_ms"`
	TotalTokensIn  int64   `json:"total_tokens_in"`
	TotalTokensOut int64   `json:"total_tokens_out"`
	TokensPerSec   float64 `json:"tokens_per_sec"` // Output tokens per second
}

ProviderMetrics holds metrics for a specific provider

type ProviderRaceConfig ¶

type ProviderRaceConfig struct {
	Enabled                    bool          `json:"enabled" yaml:"enabled"`
	MaxParallel                int           `json:"max_parallel" yaml:"max_parallel"`                                   // Max providers to race concurrently
	MinProviders               int           `json:"min_providers" yaml:"min_providers"`                                 // Require at least N candidates to start race
	EmptyRateMinSamples        int           `json:"empty_rate_min_samples" yaml:"empty_rate_min_samples"`               // Min attempts before applying empty-rate policy
	EmptyRateCooldownThreshold float64       `json:"empty_rate_cooldown_threshold" yaml:"empty_rate_cooldown_threshold"` // >= threshold enters temporary cooldown
	EmptyRateSinkThreshold     float64       `json:"empty_rate_sink_threshold" yaml:"empty_rate_sink_threshold"`         // >= threshold sinks to tail
	EmptyRateExcludeThreshold  float64       `json:"empty_rate_exclude_threshold" yaml:"empty_rate_exclude_threshold"`   // >= threshold excluded from race
	EmptyRateCooldown          time.Duration `json:"empty_rate_cooldown" yaml:"empty_rate_cooldown"`                     // Cooldown duration after threshold hit
}

ProviderRaceConfig controls concurrent provider racing behavior.

func DefaultProviderRaceConfig ¶

func DefaultProviderRaceConfig() ProviderRaceConfig

DefaultProviderRaceConfig returns default provider race configuration.

type ProviderRaceStats ¶

type ProviderRaceStats struct {
	// contains filtered or unexported fields
}

ProviderRaceStats tracks runtime effectiveness of provider racing.

func (*ProviderRaceStats) RecordRequest ¶

func (s *ProviderRaceStats) RecordRequest()

func (*ProviderRaceStats) RecordSuccess ¶

func (s *ProviderRaceStats) RecordSuccess(primaryProviderID, winnerProviderID string, winnerLatency, estimatedSaved time.Duration)

func (*ProviderRaceStats) Snapshot ¶

func (s *ProviderRaceStats) Snapshot() ProviderRaceStatsSnapshot

type ProviderRaceStatsSnapshot ¶

type ProviderRaceStatsSnapshot struct {
	RequestsTotal           int64   `json:"requests_total"`
	SuccessfulRaces         int64   `json:"successful_races"`
	Hits                    int64   `json:"hits"`
	HitRate                 float64 `json:"hit_rate"`
	AvgWinnerLatencyMs      float64 `json:"avg_winner_latency_ms"`
	EstimatedLatencySavedMs float64 `json:"estimated_latency_saved_ms"`
	EstimatedSavingsSamples int64   `json:"estimated_savings_samples"`
}

ProviderRaceStatsSnapshot is a point-in-time copy for APIs/UI.

type ProviderType ¶

type ProviderType string

ProviderType identifies the API format type

const (
	ProviderTypeOpenAI    ProviderType = "openai"
	ProviderTypeAnthropic ProviderType = "anthropic"
	ProviderTypeGemini    ProviderType = "gemini"
)

const ProviderTypeCloudCode ProviderType = "cloudcode"

ProviderTypeCloudCode identifies Google Cloud Code Assist API format

const ProviderTypeCopilot ProviderType = "copilot"

ProviderTypeCopilot identifies GitHub Copilot API format

type ProxyAPIHandler ¶

type ProxyAPIHandler struct {
	// contains filtered or unexported fields
}

ProxyAPIHandler provides HTTP handlers for proxy management APIs

func NewProxyAPIHandler ¶

func NewProxyAPIHandler(
	sessionMonitor *SessionMonitor,
	metricsCollector *MetricsCollector,
	promptGuard *PromptGuard,
	configWatcher *ConfigWatcher,
	modelCompat *ModelCompatLayer,
	mockHandler *MockHandler,
	authenticator *Authenticator,
) *ProxyAPIHandler

NewProxyAPIHandler creates a new proxy API handler

func (*ProxyAPIHandler) RegisterRoutes ¶

func (h *ProxyAPIHandler) RegisterRoutes(mux *http.ServeMux)

RegisterRoutes registers all proxy API routes

type ProxyConfig ¶

type ProxyConfig struct {
	Enabled      bool                `json:"enabled" yaml:"enabled"`
	Port         PortConfig          `json:"port" yaml:"port"`
	Route        *RouteConfig        `json:"route" yaml:"route"`
	Routing      RouteConfig         `json:"routing" yaml:"routing"` // Deprecated: use Route
	Connection   ConnectionConfig    `json:"connection" yaml:"connection"`
	HealthCheck  HealthCheckConfig   `json:"health_check" yaml:"health_check"`
	Mock         *MockConfig         `json:"mock" yaml:"mock"`
	ModelCompat  *ModelCompatConfig  `json:"model_compat" yaml:"model_compat"`
	Watcher      *WatcherConfig      `json:"watcher" yaml:"watcher"`
	Masking      *MaskingConfig      `json:"masking" yaml:"masking"`
	ModelRouter  *ModelRouterConfig  `json:"model_router" yaml:"model_router"`   // NEW: Model Router
	QuotaMonitor *QuotaMonitorConfig `json:"quota_monitor" yaml:"quota_monitor"` // NEW: Quota Monitor
	RuleRouting  *RoutingConfig      `json:"rule_routing" yaml:"rule_routing"`   // Condition-based routing rules
}

ProxyConfig is the main proxy configuration

func DefaultProxyConfig ¶

func DefaultProxyConfig() *ProxyConfig

DefaultProxyConfig returns default proxy configuration

type ProxyHandler ¶

type ProxyHandler struct {
	// contains filtered or unexported fields
}

ProxyHandler handles incoming proxy requests.

Architecture:

Client --[Proxy API Key]--> Proxy --[Provider API Key]--> Upstream (Anthropic/OpenAI)

- Proxy API Key: Used by Authenticator to validate client requests (optional) - Provider API Key: Retrieved from Provider Pool to call upstream APIs

The Proxy uses Provider Pool's Router to: 1. Select the best provider based on model and routing strategy 2. Get the API key for the selected provider 3. Forward the request to the upstream provider

Routing modes (determined by API Key scope): - route:auto - Auto select best provider (default) - route:cloud - Force cloud provider (zimaos-blue-trial) - route:local - Force local provider Layout: hot-path fields first (same cache line), cold fields after.

func NewProxyHandler ¶

func NewProxyHandler(router *Router, connPool *ConnectionPool, failover *FailoverHandler) *ProxyHandler

NewProxyHandler creates a new proxy handler

func (*ProxyHandler) GetPipelineStats ¶

func (ph *ProxyHandler) GetPipelineStats() *PipelineStatsCollector

GetPipelineStats returns the pipeline stats collector (may be nil).

func (*ProxyHandler) GetPromptCacheStats ¶

func (ph *ProxyHandler) GetPromptCacheStats() PromptCacheSnapshot

GetPromptCacheStats returns prompt cache stats snapshot.

func (*ProxyHandler) GetProviderMemory ¶

func (ph *ProxyHandler) GetProviderMemory() *ProviderMemory

GetProviderMemory returns the provider memory instance for external access.

func (*ProxyHandler) GetProviderRaceStats ¶

func (ph *ProxyHandler) GetProviderRaceStats() ProviderRaceStatsSnapshot

GetProviderRaceStats returns a snapshot of provider-race effectiveness.

func (*ProxyHandler) GetRoutingRules ¶

func (ph *ProxyHandler) GetRoutingRules() []RoutingRule

GetRoutingRules returns the current routing rules with their enabled state.

func (*ProxyHandler) GetRoutingStats ¶

func (ph *ProxyHandler) GetRoutingStats() RoutingStatsSnapshot

GetRoutingStats returns a snapshot of routing cost savings.

func (*ProxyHandler) GetRoutingStatsRef ¶

func (ph *ProxyHandler) GetRoutingStatsRef() *RoutingStats

GetRoutingStatsRef returns the RoutingStats reference for external wiring.

func (*ProxyHandler) IsPromptCacheEnabled ¶

func (ph *ProxyHandler) IsPromptCacheEnabled() bool

IsPromptCacheEnabled returns whether Anthropic prompt caching is enabled.

func (*ProxyHandler) IsRoutingEnabled ¶

func (ph *ProxyHandler) IsRoutingEnabled() bool

IsRoutingEnabled returns whether model routing is enabled.

func (*ProxyHandler) ServeHTTP ¶

func (ph *ProxyHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)

ServeHTTP implements http.Handler.

func (*ProxyHandler) SetAPIKeyValidator ¶

func (ph *ProxyHandler) SetAPIKeyValidator(validator func(key string) ([]string, error))

SetAPIKeyValidator sets the API key validator function.

func (*ProxyHandler) SetAuthProber ¶

func (ph *ProxyHandler) SetAuthProber(ap *AuthProber)

SetAuthProber sets the auth strategy prober.

func (*ProxyHandler) SetDataMasker ¶

func (ph *ProxyHandler) SetDataMasker(dm *DataMasker)

SetDataMasker sets the data masker for request/response content masking.

func (*ProxyHandler) SetModelRouter ¶

func (ph *ProxyHandler) SetModelRouter(mr *ModelRouter)

SetModelRouter sets the model router for family-based routing and background downgrade.

func (*ProxyHandler) SetOAuthManager ¶

func (ph *ProxyHandler) SetOAuthManager(m OAuthTokenProvider)

SetOAuthManager sets the OAuth token provider for OAuth-authenticated providers.

func (*ProxyHandler) SetPipelineStats ¶

func (ph *ProxyHandler) SetPipelineStats(ps *PipelineStatsCollector)

SetPipelineStats sets the unified pipeline stats collector.

func (*ProxyHandler) SetPromptCacheEnabled ¶

func (ph *ProxyHandler) SetPromptCacheEnabled(enabled bool)

SetPromptCacheEnabled toggles Anthropic prompt caching on/off at runtime.

func (*ProxyHandler) SetProviderPool ¶

func (ph *ProxyHandler) SetProviderPool(pool *providerpool.Pool)

SetProviderPool sets the Provider Pool for routing and API key lookup. Triggers background auth probing for all enabled providers so the first real request hits the cached-strategy fast path instead of probing live.

func (*ProxyHandler) SetProviderRaceConfig ¶

func (ph *ProxyHandler) SetProviderRaceConfig(cfg ProviderRaceConfig)

SetProviderRaceConfig sets concurrent provider race behavior.

func (*ProxyHandler) SetPruner ¶

func (ph *ProxyHandler) SetPruner(mw *pruner.Middleware)

SetPruner sets the context pruner middleware for the proxy handler.

func (*ProxyHandler) SetPrunerFactory ¶

func (ph *ProxyHandler) SetPrunerFactory(factory func() *pruner.Middleware)

SetPrunerFactory sets a lazy factory that creates the pruner middleware on first use. The factory is called at most once, on the first request that needs pruning.

func (*ProxyHandler) SetResponsesContextCompressor ¶

func (ph *ProxyHandler) SetResponsesContextCompressor(compressor ResponsesContextCompressor)

SetResponsesContextCompressor overrides continuation assistant compression. Pass nil to restore the default heuristic compressor.

func (*ProxyHandler) SetRoutingEnabled ¶

func (ph *ProxyHandler) SetRoutingEnabled(enabled bool)

SetRoutingEnabled toggles model routing on/off at runtime.

func (*ProxyHandler) SetRoutingRuleEnabled ¶

func (ph *ProxyHandler) SetRoutingRuleEnabled(name string, enabled bool) bool

SetRoutingRuleEnabled enables or disables a single routing rule by name.

func (*ProxyHandler) SetRuleEngine ¶

func (ph *ProxyHandler) SetRuleEngine(re *RuleEngine)

SetRuleEngine sets the condition-based rule engine for tier routing.

func (*ProxyHandler) SetSTTService ¶

func (ph *ProxyHandler) SetSTTService(service stt.Service)

SetSTTService sets the STT service used for converting input_audio into text before forwarding to /responses endpoints.

func (*ProxyHandler) SetSessionMonitor ¶

func (ph *ProxyHandler) SetSessionMonitor(sm *SessionMonitor)

SetSessionMonitor wires session monitoring into the real request chain.

func (*ProxyHandler) SetTierResolver ¶

func (ph *ProxyHandler) SetTierResolver(tr *TierResolver)

SetTierResolver sets the dynamic tier resolver and wires it into the rule engine and model router for tier-based model resolution.

type ProxyServer ¶

type ProxyServer struct {
	// contains filtered or unexported fields
}

ProxyServer is the main proxy server

func NewProxyServer ¶

func NewProxyServer(config *ProxyConfig) (*ProxyServer, error)

NewProxyServer creates a new proxy server

func (*ProxyServer) GetEndpoint ¶

func (ps *ProxyServer) GetEndpoint() string

GetEndpoint returns the proxy endpoint URL

func (*ProxyServer) GetModelRouter ¶

func (ps *ProxyServer) GetModelRouter() *ModelRouter

GetModelRouter returns the model router

func (*ProxyServer) GetPort ¶

func (ps *ProxyServer) GetPort() int

GetPort returns the proxy port

func (*ProxyServer) GetQuotaMonitor ¶

func (ps *ProxyServer) GetQuotaMonitor() *QuotaMonitor

GetQuotaMonitor returns the quota monitor

func (*ProxyServer) Start ¶

func (ps *ProxyServer) Start() error

Start starts the proxy server

func (*ProxyServer) Stats ¶

func (ps *ProxyServer) Stats() map[string]interface{}

Stats returns proxy server statistics

func (*ProxyServer) Stop ¶

func (ps *ProxyServer) Stop(ctx context.Context) error

Stop gracefully stops the proxy server

type QuotaInfo ¶

type QuotaInfo struct {
	Provider     string    `json:"provider"`
	AccountID    string    `json:"account_id,omitempty"`
	TotalQuota   int64     `json:"total_quota"`   // Total allowed requests/tokens
	UsedQuota    int64     `json:"used_quota"`    // Used requests/tokens
	RemainingPct float64   `json:"remaining_pct"` // Remaining percentage (0-100)
	ResetTime    time.Time `json:"reset_time"`    // When quota resets
	LastSync     time.Time `json:"last_sync"`     // Last sync time
	Status       string    `json:"status"`        // "healthy", "warning", "critical", "banned", "rate_limited"
	RateLimited  bool      `json:"rate_limited"`  // Currently rate limited (429)
	BanDetected  bool      `json:"ban_detected"`  // 403 ban detected
}

QuotaInfo represents quota information for a provider/account

type QuotaMonitor ¶

type QuotaMonitor struct {
	// contains filtered or unexported fields
}

QuotaMonitor tracks and manages quota information

func NewQuotaMonitor ¶

func NewQuotaMonitor(config *QuotaMonitorConfig) *QuotaMonitor

NewQuotaMonitor creates a new quota monitor

func (*QuotaMonitor) ClearBan ¶

func (qm *QuotaMonitor) ClearBan(provider string)

ClearBan clears ban status for a provider

func (*QuotaMonitor) ClearRateLimit ¶

func (qm *QuotaMonitor) ClearRateLimit(provider string)

ClearRateLimit clears rate limit status for a provider

func (*QuotaMonitor) GetBestProvider ¶

func (qm *QuotaMonitor) GetBestProvider() string

GetBestProvider returns the provider with highest remaining quota

func (*QuotaMonitor) GetQuota ¶

func (qm *QuotaMonitor) GetQuota(provider string) *QuotaInfo

GetQuota returns quota info for a specific provider

func (*QuotaMonitor) GetQuotaSummary ¶

func (qm *QuotaMonitor) GetQuotaSummary() *QuotaSummary

GetQuotaSummary returns a summary of all quotas for dashboard display

func (*QuotaMonitor) IsProviderAvailable ¶

func (qm *QuotaMonitor) IsProviderAvailable(provider string) bool

IsProviderAvailable checks if a provider is available (not banned or rate limited)

func (*QuotaMonitor) RecordError ¶

func (qm *QuotaMonitor) RecordError(provider string, statusCode int)

RecordError records an error response

func (*QuotaMonitor) RecordRequest ¶

func (qm *QuotaMonitor) RecordRequest(provider string)

RecordRequest records a request (without response details)

func (*QuotaMonitor) RecordUsage ¶

func (qm *QuotaMonitor) RecordUsage(provider string, resp *http.Response, tokensUsed int64)

RecordUsage records usage from a response

func (*QuotaMonitor) Reset ¶

func (qm *QuotaMonitor) Reset()

Reset resets all quota tracking

func (*QuotaMonitor) ResetProvider ¶

func (qm *QuotaMonitor) ResetProvider(provider string)

ResetProvider resets quota tracking for a specific provider

func (*QuotaMonitor) Stats ¶

func (qm *QuotaMonitor) Stats() map[string]interface{}

Stats returns quota monitor statistics

func (*QuotaMonitor) Stop ¶

func (qm *QuotaMonitor) Stop()

Stop stops the quota monitor

type QuotaMonitorConfig ¶

type QuotaMonitorConfig struct {
	Enabled           bool          `json:"enabled" yaml:"enabled"`
	SyncInterval      time.Duration `json:"sync_interval" yaml:"sync_interval"`           // How often to sync quotas
	WarningThreshold  float64       `json:"warning_threshold" yaml:"warning_threshold"`   // Warn when below this % (default: 20)
	CriticalThreshold float64       `json:"critical_threshold" yaml:"critical_threshold"` // Critical when below this % (default: 5)
	TrackTokens       bool          `json:"track_tokens" yaml:"track_tokens"`             // Track token usage
	TrackRequests     bool          `json:"track_requests" yaml:"track_requests"`         // Track request count
}

QuotaMonitorConfig configuration for quota monitoring

func DefaultQuotaMonitorConfig ¶

func DefaultQuotaMonitorConfig() *QuotaMonitorConfig

DefaultQuotaMonitorConfig returns default configuration

type QuotaSummary ¶

type QuotaSummary struct {
	Providers        []*QuotaInfo `json:"providers"`
	AverageRemaining float64      `json:"average_remaining_pct"`
	TotalProviders   int          `json:"total_providers"`
	HealthyCount     int          `json:"healthy_count"`
	WarningCount     int          `json:"warning_count"`
	CriticalCount    int          `json:"critical_count"`
	BannedCount      int          `json:"banned_count"`
	BestProvider     string       `json:"best_provider"`
}

QuotaSummary provides dashboard-ready quota information

type RateLimitConfig ¶

type RateLimitConfig struct {
	Enabled         bool          `json:"enabled"`
	RequestsPerMin  int           `json:"requests_per_min"`
	BurstSize       int           `json:"burst_size"`
	PerIP           bool          `json:"per_ip"`
	PerAPIKey       bool          `json:"per_api_key"`
	CleanupInterval time.Duration `json:"cleanup_interval"`
}

RateLimitConfig holds rate limiting configuration

func DefaultRateLimitConfig ¶

func DefaultRateLimitConfig() *RateLimitConfig

DefaultRateLimitConfig returns default rate limit configuration

type RegexRule ¶

type RegexRule struct {
	Pattern     string `json:"pattern" yaml:"pattern"`         // Regex pattern to match
	Target      string `json:"target" yaml:"target"`           // Target model ID
	Provider    string `json:"provider" yaml:"provider"`       // Target provider
	Priority    int    `json:"priority" yaml:"priority"`       // Rule priority (lower = higher)
	Description string `json:"description" yaml:"description"` // Human-readable description
}

RegexRule allows expert-level model redirection

type RequestMetrics ¶

type RequestMetrics struct {
	Timestamp     time.Time     `json:"timestamp"`
	Provider      string        `json:"provider"`
	Model         string        `json:"model"`
	StatusCode    int           `json:"status_code"`
	Latency       time.Duration `json:"latency"` // Total response time
	TTFT          time.Duration `json:"ttft"`    // Time to First Token
	TokensIn      int64         `json:"tokens_in"`
	TokensOut     int64         `json:"tokens_out"`
	RequestSize   int64         `json:"request_size"`
	ResponseSize  int64         `json:"response_size"`
	Success       bool          `json:"success"`
	Cached        bool          `json:"cached"`
	Streaming     bool          `json:"streaming"`
	ProxyOverhead time.Duration `json:"proxy_overhead"` // Time spent in proxy processing
}

RequestMetrics holds metrics for a single request

type ResolvedRoute ¶

type ResolvedRoute struct {
	Provider   string // display name
	ProviderID string // internal ID (for sticky routing)
	Model      string
}

ResolvedRoute carries the actual provider/model chosen by the router. Callers embed a pointer in the request context; the proxy handler populates it.

func GetResolvedRouteFromContext ¶

func GetResolvedRouteFromContext(ctx context.Context) *ResolvedRoute

GetResolvedRouteFromContext is the exported version of getResolvedRoute.

type ResponseCache ¶

type ResponseCache struct {
	// contains filtered or unexported fields
}

ResponseCache provides caching for API responses

func NewResponseCache ¶

func NewResponseCache(config *PerformanceConfig) *ResponseCache

NewResponseCache creates a new response cache

func (*ResponseCache) Clear ¶

func (rc *ResponseCache) Clear()

Clear clears all cache entries

func (*ResponseCache) GenerateCacheKey ¶

func (rc *ResponseCache) GenerateCacheKey(provider, model, prompt string) string

GenerateCacheKey generates a cache key from request parameters

func (*ResponseCache) Get ¶

func (rc *ResponseCache) Get(key string) (*CacheEntry, bool)

Get retrieves a cached response

func (*ResponseCache) Set ¶

func (rc *ResponseCache) Set(key string, value []byte, statusCode int, headers map[string]string)

Set stores a response in the cache

func (*ResponseCache) Stats ¶

func (rc *ResponseCache) Stats() map[string]interface{}

Stats returns cache statistics

type ResponsesAssistantCompressionInput ¶

type ResponsesAssistantCompressionInput struct {
	AssistantText string
	UserText      string
	Mode          ResponsesAssistantCompressionMode
	MaxRunes      int
}

type ResponsesAssistantCompressionMode ¶

type ResponsesAssistantCompressionMode string

const (
	ResponsesAssistantCompressionModeGeneral ResponsesAssistantCompressionMode = "general"
	ResponsesAssistantCompressionModeChoice  ResponsesAssistantCompressionMode = "choice"
)

type ResponsesContextCompressor ¶

type ResponsesContextCompressor interface {
	CompressAssistantContext(input ResponsesAssistantCompressionInput) (string, error)
}

ResponsesContextCompressor is an abstraction layer for continuation context compression. The default implementation is heuristic; callers can plug in a local small model compressor (for example qwen 0.8B) later.

type RestrictionsHandler ¶

type RestrictionsHandler struct {
	// contains filtered or unexported fields
}

RestrictionsHandler handles provider restriction management endpoints.

func NewRestrictionsHandler ¶

func NewRestrictionsHandler(pm *ProviderMemory) *RestrictionsHandler

NewRestrictionsHandler creates a new restrictions handler.

func (*RestrictionsHandler) ClearModelBlacklist ¶

func (h *RestrictionsHandler) ClearModelBlacklist(c echo.Context) error

ClearModelBlacklist clears blacklist for a specific model on a provider. POST /api/v1/restrictions/providers/{provider}/clear-model/{model}

func (*RestrictionsHandler) ClearProviderThrottle ¶

func (h *RestrictionsHandler) ClearProviderThrottle(c echo.Context) error

ClearProviderThrottle clears throttle restriction for a provider. POST /api/v1/restrictions/providers/{provider}/clear-throttle

func (*RestrictionsHandler) GetProviderRestrictions ¶

func (h *RestrictionsHandler) GetProviderRestrictions(c echo.Context) error

GetProviderRestrictions returns current restrictions for a provider. GET /api/v1/restrictions/providers/{provider}

func (*RestrictionsHandler) RegisterRoutes ¶

func (h *RestrictionsHandler) RegisterRoutes(g *echo.Group)

RegisterRoutes registers restriction management routes.

type RetryableErrorType ¶

type RetryableErrorType string

RetryableErrorType defines specific error types

const (
	// Context/Token Limits - MUST failover to provider with larger context
	ErrorTypeContextTooLong    RetryableErrorType = "context_too_long"
	ErrorTypeMaxTokensExceeded RetryableErrorType = "max_tokens_exceeded"

	// Rate/Quota Limits - failover to different provider or API key
	ErrorTypeRateLimited      RetryableErrorType = "rate_limited"
	ErrorTypeQuotaExceeded    RetryableErrorType = "quota_exceeded"
	ErrorTypeConcurrencyLimit RetryableErrorType = "concurrency_limit"

	// Provider Issues - failover to different provider
	ErrorTypeModelOverloaded    RetryableErrorType = "model_overloaded"
	ErrorTypeServiceUnavailable RetryableErrorType = "service_unavailable"
	ErrorTypeTimeout            RetryableErrorType = "timeout"

	// Streaming Anomalies - force stop and attempt recovery
	ErrorTypeRepetitiveOutput RetryableErrorType = "repetitive_output"
	ErrorTypeInfiniteLoop     RetryableErrorType = "infinite_loop"

	// Model not configured — model appears in list but isn't usable on this provider.
	// Should failover to next provider (model may work elsewhere).
	ErrorTypeModelNotConfigured RetryableErrorType = "model_not_configured"

	// Non-retryable
	ErrorTypeInvalidRequest RetryableErrorType = "invalid_request"
	ErrorTypeAuthFailed     RetryableErrorType = "auth_failed"
	ErrorTypeModelNotFound  RetryableErrorType = "model_not_found"
	ErrorTypeNetworkError   RetryableErrorType = "network_error"
	ErrorTypeUnknown        RetryableErrorType = "unknown"
)

type RouteCondition ¶

type RouteCondition struct {
	Header       string `yaml:"header,omitempty" json:"header,omitempty"`
	HeaderValue  string `yaml:"header_value,omitempty" json:"header_value,omitempty"`
	MaxBodyBytes int    `yaml:"max_body_bytes,omitempty" json:"max_body_bytes,omitempty"`
	ToolPattern  string `yaml:"tool_pattern,omitempty" json:"tool_pattern,omitempty"`
	SystemTag    string `yaml:"system_tag,omitempty" json:"system_tag,omitempty"`
}

RouteCondition defines when a rule matches. All non-zero fields must match (AND logic).

type RouteConfig ¶

type RouteConfig struct {
	DefaultProvider string            `json:"default_provider" yaml:"default_provider"`
	LoadBalancing   string            `json:"load_balancing" yaml:"load_balancing"` // priority, round-robin, weighted
	Providers       []*ProviderConfig `json:"providers" yaml:"providers"`
	Failover        FailoverConfig    `json:"failover" yaml:"failover"`
}

RouteConfig route configuration

type RouteDecision ¶

type RouteDecision struct {
	Matched  bool        `json:"-"`
	Model    string      `json:"model"`
	Origin   ModelOrigin `json:"origin"`
	Tier     ModelTier   `json:"tier,omitempty"`
	Fallback string      `json:"fallback,omitempty"`
	Rule     string      `json:"rule"`
	Reason   string      `json:"reason"`
}

RouteDecision is the output of rule evaluation.

type RouteRequest ¶

type RouteRequest struct {
	Headers       http.Header
	BodySize      int
	ToolNames     []string
	SystemMessage string
}

RouteRequest is the input to rule evaluation.

type Router ¶

type Router struct {
	// contains filtered or unexported fields
}

Router handles provider selection

func NewRouter ¶

func NewRouter(config *RouteConfig) *Router

NewRouter creates a new router

func (*Router) AddProvider ¶

func (r *Router) AddProvider(config *ProviderConfig)

AddProvider adds a new provider

func (*Router) GetAllProviders ¶

func (r *Router) GetAllProviders() []*Provider

GetAllProviders returns all providers

func (*Router) GetAvailableProviders ¶

func (r *Router) GetAvailableProviders() []*Provider

GetAvailableProviders returns enabled and healthy providers

func (*Router) GetProvider ¶

func (r *Router) GetProvider(name string) (*Provider, bool)

GetProvider returns a provider by name

func (*Router) RemoveProvider ¶

func (r *Router) RemoveProvider(name string) error

RemoveProvider removes a provider

func (*Router) SelectProvider ¶

func (r *Router) SelectProvider(req *http.Request) (*Provider, error)

SelectProvider selects the best available provider

func (*Router) SetProviderEnabled ¶

func (r *Router) SetProviderEnabled(name string, enabled bool) error

SetProviderEnabled enables or disables a provider

func (*Router) Stats ¶

func (r *Router) Stats() map[string]interface{}

Stats returns router statistics

func (*Router) UpdateHealth ¶

func (r *Router) UpdateHealth(name string, healthy bool, err error)

UpdateHealth updates provider health status

func (*Router) UpdateHealthWithLatency ¶

func (r *Router) UpdateHealthWithLatency(name string, healthy bool, err error, latency time.Duration)

UpdateHealthWithLatency updates provider health status with latency

type RoutingConfig ¶

type RoutingConfig struct {
	Enabled      bool                    `yaml:"enabled" json:"enabled"`
	Rules        []RoutingRule           `yaml:"rules" json:"rules"`
	ModelOrigins map[string]string       `yaml:"model_origins" json:"model_origins"`
	Pricing      map[string]ModelPricing `yaml:"pricing" json:"pricing"`
}

RoutingConfig is the top-level routing configuration.

func DefaultRoutingConfig ¶

func DefaultRoutingConfig() *RoutingConfig

DefaultRoutingConfig returns a RoutingConfig with built-in tier-based rules. Rules reference abstract large/small tiers instead of specific model names. The TierResolver dynamically maps tiers to actual models based on user's available model list and built-in small-model allowlist.

func ParseRoutingConfig ¶

func ParseRoutingConfig(data []byte) (*RoutingConfig, error)

ParseRoutingConfig parses YAML bytes into a RoutingConfig.

func (*RoutingConfig) ToRuleEngine ¶

func (c *RoutingConfig) ToRuleEngine(tierResolver ...*TierResolver) *RuleEngine

ToRuleEngine creates a RuleEngine from this config. The optional TierResolver enables dynamic tier → model resolution.

type RoutingRule ¶

type RoutingRule struct {
	Name        string         `yaml:"name" json:"name"`
	Priority    int            `yaml:"priority" json:"priority"`
	Condition   RouteCondition `yaml:"condition" json:"condition"`
	TargetModel string         `yaml:"target_model" json:"target_model"`
	Origin      ModelOrigin    `yaml:"origin" json:"origin"`
	Tier        ModelTier      `yaml:"tier" json:"tier,omitempty"`
	Fallback    string         `yaml:"fallback" json:"fallback,omitempty"`
	Enabled     *bool          `yaml:"enabled,omitempty" json:"enabled,omitempty"` // nil = enabled (default true)
}

RoutingRule defines a single condition-based routing rule. Routes requests to cheaper/smaller models based on task complexity.

func (*RoutingRule) Evaluate ¶

func (r *RoutingRule) Evaluate(req *RouteRequest) *RouteDecision

Evaluate checks if a single rule matches the request (no pre-compiled regex). For hot-path usage, prefer RuleEngine which pre-compiles regexes and reason strings.

func (*RoutingRule) IsEnabled ¶

func (r *RoutingRule) IsEnabled() bool

IsEnabled returns whether this rule is enabled (nil defaults to true).

type RoutingStats ¶

type RoutingStats struct {
	// contains filtered or unexported fields
}

RoutingStats tracks cost savings from model routing (rule engine + background downgrade).

func NewRoutingStats ¶

func NewRoutingStats() *RoutingStats

NewRoutingStats creates a new RoutingStats instance.

func (*RoutingStats) Load ¶

func (s *RoutingStats) Load(routedRequests, tokensRouted, costSavedMicro int64)

Load restores persisted counters (called on startup).

func (*RoutingStats) Record ¶

func (s *RoutingStats) Record(originalModel, actualModel string, respBody []byte)

Record calculates and accumulates cost savings for a routed request. originalModel is the model the client requested; actualModel is what was used.

func (*RoutingStats) RecordTokens ¶

func (s *RoutingStats) RecordTokens(originalModel, actualModel string, inputTokens, outputTokens int)

RecordTokens accumulates cost savings from pre-parsed token counts. Used by streaming paths where tokens are already extracted.

func (*RoutingStats) Snapshot ¶

func (s *RoutingStats) Snapshot() RoutingStatsSnapshot

Snapshot returns a point-in-time copy.

type RoutingStatsSnapshot ¶

type RoutingStatsSnapshot struct {
	RoutedRequests int64   `json:"routed_requests"`
	TokensRouted   int64   `json:"tokens_routed"`
	CostSavedUSD   float64 `json:"cost_saved_usd"`
}

RoutingStatsSnapshot is a point-in-time copy of routing stats.

type RuleEngine ¶

type RuleEngine struct {
	// contains filtered or unexported fields
}

RuleEngine evaluates routing rules in priority order with pre-compiled regexes.

func NewRuleEngine ¶

func NewRuleEngine(rules []RoutingRule, tierResolver ...*TierResolver) *RuleEngine

NewRuleEngine creates a rule engine, sorting rules by priority (lower = higher). The optional TierResolver is used to resolve tier-based rules (where TargetModel is empty).

func (*RuleEngine) Evaluate ¶

func (e *RuleEngine) Evaluate(req *RouteRequest) *RouteDecision

Evaluate returns the first matching rule's pre-built decision, or nil if none match. Condition checks are inlined to avoid function call overhead and heap allocation. Rules without conditions are filtered at init time — no need to re-check here.

func (*RuleEngine) GetRules ¶

func (e *RuleEngine) GetRules() []RoutingRule

GetRules returns a snapshot of all rules with their enabled state.

func (*RuleEngine) SetRuleEnabled ¶

func (e *RuleEngine) SetRuleEnabled(name string, enabled bool) bool

SetRuleEnabled enables or disables a rule by name. Returns false if not found.

func (*RuleEngine) SetTierResolver ¶

func (e *RuleEngine) SetTierResolver(tr *TierResolver)

SetTierResolver sets or replaces the tier resolver for dynamic model resolution.

type Session ¶

type Session struct {
	ID           string        `json:"id"`
	StartTime    time.Time     `json:"start_time"`
	LastActivity time.Time     `json:"last_activity"`
	Provider     string        `json:"provider"`
	Model        string        `json:"model"`
	TokensIn     int64         `json:"tokens_in"`
	TokensOut    int64         `json:"tokens_out"`
	Status       SessionStatus `json:"status"`
	RequestCount int           `json:"request_count"`
	ErrorCount   int           `json:"error_count"`
	ClientIP     string        `json:"client_ip"`
	UserAgent    string        `json:"user_agent"`
}

Session represents an API session

type SessionConfig ¶

type SessionConfig struct {
	Enabled        bool          `json:"enabled"`
	IdleTimeout    time.Duration `json:"idle_timeout"`
	MaxSessions    int           `json:"max_sessions"`
	CleanupPeriod  time.Duration `json:"cleanup_period"`
	RetainComplete time.Duration `json:"retain_complete"`
}

SessionConfig holds session monitoring configuration

func DefaultSessionConfig ¶

func DefaultSessionConfig() *SessionConfig

DefaultSessionConfig returns default session configuration

type SessionMonitor ¶

type SessionMonitor struct {
	// contains filtered or unexported fields
}

SessionMonitor manages API sessions

func NewSessionMonitor ¶

func NewSessionMonitor(config *SessionConfig) *SessionMonitor

NewSessionMonitor creates a new session monitor

func (*SessionMonitor) CleanupIdleSessions ¶

func (sm *SessionMonitor) CleanupIdleSessions() int

CleanupIdleSessions removes idle sessions

func (*SessionMonitor) CompleteSession ¶

func (sm *SessionMonitor) CompleteSession(id string, status SessionStatus)

CompleteSession marks a session as completed

func (*SessionMonitor) GetSession ¶

func (sm *SessionMonitor) GetSession(id string) (*Session, bool)

GetSession returns a session by ID

func (*SessionMonitor) ListActiveSessions ¶

func (sm *SessionMonitor) ListActiveSessions() []*Session

ListActiveSessions returns only active sessions

func (*SessionMonitor) ListSessions ¶

func (sm *SessionMonitor) ListSessions() []*Session

ListSessions returns all sessions

func (*SessionMonitor) RecordError ¶

func (sm *SessionMonitor) RecordError(id string)

RecordError records an error for a session

func (*SessionMonitor) StartSession ¶

func (sm *SessionMonitor) StartSession(clientIP, userAgent string) *Session

StartSession creates a new session

func (*SessionMonitor) Stats ¶

func (sm *SessionMonitor) Stats() map[string]interface{}

Stats returns session statistics

func (*SessionMonitor) UpdateSession ¶

func (sm *SessionMonitor) UpdateSession(id string, provider, model string, tokensIn, tokensOut int64)

UpdateSession updates session with request data

type SessionStatus ¶

type SessionStatus string

SessionStatus represents the status of a session

const (
	SessionStatusActive    SessionStatus = "active"
	SessionStatusCompleted SessionStatus = "completed"
	SessionStatusFailed    SessionStatus = "failed"
	SessionStatusTimeout   SessionStatus = "timeout"
)

type SmartFailoverHandler ¶

type SmartFailoverHandler struct {
	*FailoverHandler
	// contains filtered or unexported fields
}

SmartFailoverHandler extends FailoverHandler with intelligent error classification

func NewSmartFailoverHandler ¶

func NewSmartFailoverHandler(config *FailoverConfig, router *Router) *SmartFailoverHandler

NewSmartFailoverHandler creates a new smart failover handler

func (*SmartFailoverHandler) ExecuteStreamingWithAnomalyDetection ¶

func (sfh *SmartFailoverHandler) ExecuteStreamingWithAnomalyDetection(
	ctx context.Context,
	provider *Provider,
	fn func(*Provider) (*http.Response, error),
	onAnomaly func(*StreamAnomaly, *StreamRecoveryStrategy),
) (*http.Response, error)

ExecuteStreamingWithAnomalyDetection executes streaming request with anomaly detection

func (*SmartFailoverHandler) ExecuteWithSmartFailover ¶

func (sfh *SmartFailoverHandler) ExecuteWithSmartFailover(
	ctx context.Context,
	provider *Provider,
	fn func(*Provider) (*http.Response, error),
) (*http.Response, error)

ExecuteWithSmartFailover executes request with intelligent failover

func (*SmartFailoverHandler) GetClassifier ¶

func (sfh *SmartFailoverHandler) GetClassifier() *APIErrorClassifier

GetClassifier returns the error classifier

func (*SmartFailoverHandler) GetMetrics ¶

func (sfh *SmartFailoverHandler) GetMetrics() *FailoverMetrics

GetMetrics returns failover metrics

type StreamAnomaly ¶

type StreamAnomaly struct {
	Type        RetryableErrorType `json:"type"`
	Pattern     string             `json:"pattern"`
	RepeatCount int                `json:"repeat_count"`
	Message     string             `json:"message"`
	Position    int                `json:"position"` // Position in buffer where anomaly was detected
}

StreamAnomaly represents a detected streaming anomaly

type StreamBuffer ¶

type StreamBuffer struct {
	// contains filtered or unexported fields
}

StreamBuffer maintains a sliding window of streamed content

func NewStreamBuffer ¶

func NewStreamBuffer(detector *StreamingAnomalyDetector) *StreamBuffer

NewStreamBuffer creates a new stream buffer

func (*StreamBuffer) GetLastAnomaly ¶

func (sb *StreamBuffer) GetLastAnomaly() *StreamAnomaly

GetLastAnomaly returns the last detected anomaly

func (*StreamBuffer) GetTotalBytes ¶

func (sb *StreamBuffer) GetTotalBytes() int64

GetTotalBytes returns total bytes processed

func (*StreamBuffer) GetValidContent ¶

func (sb *StreamBuffer) GetValidContent() []byte

GetValidContent returns content before the anomaly (if any)

func (*StreamBuffer) HasAnomaly ¶

func (sb *StreamBuffer) HasAnomaly() bool

HasAnomaly returns whether an anomaly was detected

func (*StreamBuffer) Reset ¶

func (sb *StreamBuffer) Reset()

Reset clears the buffer

func (*StreamBuffer) Write ¶

func (sb *StreamBuffer) Write(chunk []byte) (*StreamAnomaly, error)

Write adds content to buffer and checks for anomalies

type StreamChunkChoice ¶

type StreamChunkChoice struct {
	Index        int              `json:"index"`
	Delta        StreamChunkDelta `json:"delta"`
	FinishReason *string          `json:"finish_reason"`
}

StreamChunkChoice is a single choice in an OpenAI streaming chunk.

type StreamChunkDelta ¶

type StreamChunkDelta struct {
	Role      string                `json:"role,omitempty"`
	Content   string                `json:"content,omitempty"`
	ToolCalls []StreamChunkToolCall `json:"tool_calls,omitempty"`
}

StreamChunkDelta is the delta object inside an OpenAI streaming chunk choice.

type StreamChunkToolCall ¶

type StreamChunkToolCall struct {
	Index    int                     `json:"index"`
	ID       string                  `json:"id,omitempty"`
	Type     string                  `json:"type,omitempty"`
	Function StreamChunkToolCallFunc `json:"function,omitempty"`
}

StreamChunkToolCall is a single tool call inside a streaming delta.

type StreamChunkToolCallFunc ¶

type StreamChunkToolCallFunc struct {
	Name      string `json:"name,omitempty"`
	Arguments string `json:"arguments,omitempty"`
}

StreamChunkToolCallFunc is the function part of a streaming tool call.

type StreamChunkUsage ¶

type StreamChunkUsage struct {
	PromptTokens             int `json:"prompt_tokens"`
	CompletionTokens         int `json:"completion_tokens"`
	TotalTokens              int `json:"total_tokens"`
	CacheReadInputTokens     int `json:"cache_read_input_tokens,omitempty"`
	CacheCreationInputTokens int `json:"cache_creation_input_tokens,omitempty"`
}

StreamChunkUsage is the usage object in an OpenAI streaming chunk.

type StreamRecoveryStrategy ¶

type StreamRecoveryStrategy struct {
	ForceStop           bool     `json:"force_stop"`
	RetryWithTruncation bool     `json:"retry_with_truncation"`
	TruncateToTokens    int      `json:"truncate_to_tokens"`
	FailoverToProvider  string   `json:"failover_to_provider"`
	AddStopSequences    []string `json:"add_stop_sequences"`
}

StreamRecoveryStrategy defines how to recover from streaming anomalies

func GetRecoveryStrategy ¶

func GetRecoveryStrategy(anomaly *StreamAnomaly, config StreamingAnomalyConfig) *StreamRecoveryStrategy

GetRecoveryStrategy returns appropriate recovery strategy for anomaly

type StreamingAnomalyConfig ¶

type StreamingAnomalyConfig struct {
	Enabled          bool   `json:"enabled" yaml:"enabled"`
	WindowSize       int    `json:"window_size" yaml:"window_size"`
	RepeatThreshold  int    `json:"repeat_threshold" yaml:"repeat_threshold"`
	MinPatternLength int    `json:"min_pattern_length" yaml:"min_pattern_length"`
	MaxPatternLength int    `json:"max_pattern_length" yaml:"max_pattern_length"`
	RecoveryStrategy string `json:"recovery_strategy" yaml:"recovery_strategy"` // truncate_and_retry, failover, stop
}

StreamingAnomalyConfig configuration for anomaly detection

func DefaultStreamingAnomalyConfig ¶

func DefaultStreamingAnomalyConfig() StreamingAnomalyConfig

DefaultStreamingAnomalyConfig returns default configuration

type StreamingAnomalyDetector ¶

type StreamingAnomalyDetector struct {
	// contains filtered or unexported fields
}

StreamingAnomalyDetector detects repetitive/looping output in streaming responses

func NewStreamingAnomalyDetector ¶

func NewStreamingAnomalyDetector(config StreamingAnomalyConfig) *StreamingAnomalyDetector

NewStreamingAnomalyDetector creates a new detector

type TierResolver ¶

type TierResolver struct {
	// contains filtered or unexported fields
}

TierResolver assigns models into two stable tiers: - TierLarge: all non-small models (typically big LLMs) - TierSmall: fixed built-in small-model allowlist Smart routing is enabled only when both tiers are present.

func NewTierResolver ¶

func NewTierResolver() *TierResolver

NewTierResolver creates an empty TierResolver.

func (*TierResolver) BestModelForTier ¶

func (tr *TierResolver) BestModelForTier(tier ModelTier) string

BestModelForTier returns the top-ranked model in the requested tier. Returns "" if no model is available.

func (*TierResolver) IsEnabled ¶

func (tr *TierResolver) IsEnabled() bool

IsEnabled returns true when both large and small tiers are available.

func (*TierResolver) ModelTierOf ¶

func (tr *TierResolver) ModelTierOf(modelID string) ModelTier

ModelTierOf returns the tier for a specific model ID. Returns "" if the model is not known.

func (*TierResolver) Resolve ¶

func (tr *TierResolver) Resolve(models []*providerpool.Model) bool

Resolve analyzes available models and assigns large/small tiers deterministically. Returns true when both tiers are present (smart routing viable). Called on provider change (same hook as candidateSnapshot rebuild).

func (*TierResolver) Stats ¶

func (tr *TierResolver) Stats() map[string]interface{}

Stats returns a summary of resolved tiers for diagnostics.

type TieredModel ¶

type TieredModel struct {
	ModelID    string    `json:"model_id"`
	ProviderID string    `json:"provider_id"`
	Tier       ModelTier `json:"tier"`
	TotalCost  float64   `json:"total_cost"` // InputPrice + OutputPrice per 1M tokens
}

TieredModel holds a model with its resolved tier and cost.

type ToggleState ¶

type ToggleState struct {
	PrunerEnabled      bool            `json:"pruner_enabled"`
	RoutingEnabled     bool            `json:"routing_enabled"`
	MaskingEnabled     bool            `json:"masking_enabled"`
	MaskingRules       map[string]bool `json:"masking_rules,omitempty"`
	PrunerBackend      string          `json:"pruner_backend,omitempty"`
	RoutingRules       map[string]bool `json:"routing_rules,omitempty"`
	PromptCacheEnabled bool            `json:"prompt_cache_enabled"`
	FailoverConfig     *FailoverConfig `json:"failover_config,omitempty"`
	Version            int             `json:"version,omitempty"` // migration marker
}

ToggleState holds persisted feature toggle states.

type ToggleStore ¶

type ToggleStore struct {
	// contains filtered or unexported fields
}

ToggleStore persists feature toggle states via kvstore.

func NewToggleStore ¶

func NewToggleStore(kv kvstore.Store) *ToggleStore

NewToggleStore creates a new toggle store.

func (*ToggleStore) Load ¶

func (ts *ToggleStore) Load(ctx context.Context) (*ToggleState, error)

Load retrieves persisted toggle state. Returns nil if no state has been saved yet.

func (*ToggleStore) Save ¶

func (ts *ToggleStore) Save(ctx context.Context, state *ToggleState) error

Save persists the current toggle state.

type Tool ¶

type Tool struct {
	Name        string      `json:"name"`
	Description string      `json:"description"`
	Parameters  interface{} `json:"parameters"`
}

Tool represents a tool definition

type ToolCapLevel ¶

type ToolCapLevel int

ToolCapLevel represents the tool capability level of a provider endpoint.

const (
	ToolCapNative  ToolCapLevel = iota // Native tool_use supported
	ToolCapPrompt                      // Tools via system prompt injection
	ToolCapNone                        // No tools at all
	ToolCapUnknown ToolCapLevel = -1   // Not yet probed
)

type UpstreamRequestBridge ¶

type UpstreamRequestBridge interface {
	Name() string
	Match(*UpstreamRequestBridgeContext) bool
	Build(*UpstreamRequestBridgeContext) error
}

UpstreamRequestBridge adapts an OpenAI-edge request into a provider-specific upstream request.

type UpstreamRequestBridgeContext ¶

type UpstreamRequestBridgeContext struct {
	Route              *providerpool.RouteResult
	Provider           *providerpool.Provider
	EffectiveFormat    providerpool.APIFormat
	TargetURL          *url.URL
	RequestPath        string
	Body               []byte
	PromptCacheEnabled bool
	AudioTranscriber   chatAudioTranscriber
}

UpstreamRequestBridgeContext carries mutable request-building state. Bridges can rewrite path/body according to endpoint or API format requirements.

type WatcherConfig ¶

type WatcherConfig struct {
	Enabled      bool          `json:"enabled"`
	PollInterval time.Duration `json:"poll_interval"`
}

WatcherConfig configuration watcher settings

func DefaultWatcherConfig ¶

func DefaultWatcherConfig() *WatcherConfig

DefaultWatcherConfig returns default watcher configuration

Directories ¶

Path	Synopsis
testutil Package testutil provides testing utilities for the proxy package	Package testutil provides testing utilities for the proxy package

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL

Documentation ¶

Index ¶

Constants ¶

Variables ¶

Functions ¶

func ApplyPromptCaching ¶

func BackgroundTaskFromContext ¶

func DisableModelRoutingFromContext ¶

func DisableResponsesContinuationFromContext ¶

func GetExcludedProviders ¶

func GetPinnedProvider ¶

func InjectPromptCaching ¶

func IsContextWindowExceededMessage ¶

func LocaleFromContext ¶

func LogTokenChurn ¶

func ReadPortFromFile ¶

func SanitizeError ¶

func SessionIDFromContext ¶

func ValidateRoutingConfig ¶

func WithBackgroundTask ¶

func WithDisableModelRouting ¶

func WithDisableResponsesContinuation ¶

func WithExcludedProviders ¶

func WithLocale ¶

func WithPinnedProvider ¶

func WithResolvedRoute ¶

func WithSessionID ¶

Types ¶

type APIErrorClassifier ¶

func NewAPIErrorClassifier ¶

func (*APIErrorClassifier) AddProviderPatterns ¶

func (*APIErrorClassifier) ClassifyError ¶

type AnthropicCacheControl ¶

type AnthropicContentBlock ¶

type AnthropicMessage ¶

type AnthropicRequest ¶

type AnthropicResponse ¶

type AnthropicStreamContentBlock ¶

type AnthropicStreamDelta ¶

type AnthropicStreamEvent ¶

type AnthropicStreamUsage ¶

type AnthropicSystemBlock ¶

type AnthropicTool ¶

type AuthConfig ¶

func DefaultAuthConfig ¶

type AuthExhaustedError ¶

func (*AuthExhaustedError) Error ¶

type AuthProber ¶

func NewAuthProber ¶

func (*AuthProber) Apply ¶

func (*AuthProber) Forget ¶

func (*AuthProber) ProbeAndForward ¶

func (*AuthProber) Recall ¶

func (*AuthProber) Remember ¶

func (*AuthProber) Strategies ¶

type AuthStrategy ¶

func (AuthStrategy) String ¶

type Authenticator ¶

func NewAuthenticator ¶

func (*Authenticator) AddAPIKey ¶

func (*Authenticator) AddAllowedIP ¶

func (*Authenticator) Authenticate ¶

func (*Authenticator) CheckRateLimit ¶

func (*Authenticator) CleanupRateLimits ¶

func (*Authenticator) ListAPIKeys ¶

func (*Authenticator) RemoveAPIKey ¶

func (*Authenticator) Stats ¶

func (*Authenticator) ValidateAPIKey ¶

type BreakerSnapshot ¶

type BufferPool ¶

func NewBufferPool ¶

func (*BufferPool) Get ¶

func (*BufferPool) Put ¶

func (*BufferPool) Stats ¶

type CacheEntry ¶

type ChatMessage ¶

type ChatRequest ¶

func (*ChatRequest) HasImages ¶

func (*ChatRequest) TokenCount ¶

type ConfigReloader ¶