Documentation
¶
Overview ¶
Package observability provides health checks, metrics, and tracing capabilities
Index ¶
- Constants
- type ComponentHealthChecker
- type Config
- type DatabaseHealthChecker
- type HealthChecker
- type HealthConfig
- type HealthManager
- func (hm *HealthManager) AddHealthChecker(checker HealthChecker)
- func (hm *HealthManager) AddReadinessChecker(checker ReadinessChecker)
- func (hm *HealthManager) GetHealthStatus() HealthResponse
- func (hm *HealthManager) GetReadinessStatus() ReadinessResponse
- func (hm *HealthManager) HealthzHandler() http.HandlerFunc
- func (hm *HealthManager) IsHealthy() bool
- func (hm *HealthManager) IsReady() bool
- func (hm *HealthManager) ReadyzHandler() http.HandlerFunc
- func (hm *HealthManager) SetTimeout(timeout time.Duration)
- type HealthResponse
- type HealthStatus
- type IndexHealthChecker
- type Manager
- func (m *Manager) Close(ctx context.Context) error
- func (m *Manager) HTTPMiddleware() func(http.Handler) http.Handler
- func (m *Manager) Health() *HealthManager
- func (m *Manager) IsHealthy() bool
- func (m *Manager) IsReady() bool
- func (m *Manager) Metrics() *MetricsManager
- func (m *Manager) RecordStorageOperation(operation string, err error)
- func (m *Manager) RecordToolCall(ctx context.Context, serverName, toolName string, duration time.Duration, ...)
- func (m *Manager) RegisterHealthChecker(checker HealthChecker)
- func (m *Manager) RegisterReadinessChecker(checker ReadinessChecker)
- func (m *Manager) SetupHTTPHandlers(mux *http.ServeMux)
- func (m *Manager) Tracing() *TracingManager
- func (m *Manager) UpdateMetrics()
- type MetricsConfig
- type MetricsManager
- func (mm *MetricsManager) HTTPMiddleware() func(http.Handler) http.Handler
- func (mm *MetricsManager) Handler() http.Handler
- func (mm *MetricsManager) RecordActorConnect(server, result string, duration time.Duration)
- func (mm *MetricsManager) RecordActorFailure(server, errorType string)
- func (mm *MetricsManager) RecordActorRetry(server string)
- func (mm *MetricsManager) RecordActorStateTransition(server, fromState, toState string)
- func (mm *MetricsManager) RecordHTTPRequest(method, path, status string, duration time.Duration)
- func (mm *MetricsManager) RecordOAuthRefresh(server, result string)
- func (mm *MetricsManager) RecordOAuthRefreshDuration(server, result string, duration time.Duration)
- func (mm *MetricsManager) RecordReconciliation(result string, duration time.Duration)
- func (mm *MetricsManager) RecordServerStateChange(server, fromState, toState string)
- func (mm *MetricsManager) RecordStorageOperation(operation, status string)
- func (mm *MetricsManager) RecordToolCall(server, tool, status string, duration time.Duration)
- func (mm *MetricsManager) Registry() *prometheus.Registry
- func (mm *MetricsManager) SetDockerContainers(count int)
- func (mm *MetricsManager) SetIndexSize(size uint64)
- func (mm *MetricsManager) SetServerStats(total, connected, quarantined int)
- func (mm *MetricsManager) SetToolsTotal(total int)
- func (mm *MetricsManager) SetUptime(startTime time.Time)
- func (mm *MetricsManager) UpdateFromStatsProvider(provider StatsUpdater)
- type ReadinessChecker
- type ReadinessResponse
- type StatsUpdater
- type TracingConfig
- type TracingManager
- func (tm *TracingManager) AddSpanAttributes(ctx context.Context, attrs ...attribute.KeyValue)
- func (tm *TracingManager) Close(ctx context.Context) error
- func (tm *TracingManager) HTTPMiddleware() func(http.Handler) http.Handler
- func (tm *TracingManager) IsEnabled() bool
- func (tm *TracingManager) SetSpanError(ctx context.Context, err error)
- func (tm *TracingManager) StartSpan(ctx context.Context, name string, attrs ...attribute.KeyValue) (context.Context, oteltrace.Span)
- func (tm *TracingManager) TraceIndexOperation(ctx context.Context, operation string, toolCount int) (context.Context, oteltrace.Span)
- func (tm *TracingManager) TraceStorageOperation(ctx context.Context, operation string) (context.Context, oteltrace.Span)
- func (tm *TracingManager) TraceToolCall(ctx context.Context, serverName, toolName string) (context.Context, oteltrace.Span)
- func (tm *TracingManager) TraceUpstreamConnection(ctx context.Context, serverName, operation string) (context.Context, oteltrace.Span)
- type UpstreamHealthChecker
Constants ¶
const ( StatusHealthy = "healthy" StatusUnhealthy = "unhealthy" StatusReady = "ready" StatusNotReady = "not_ready" )
Health status constants
const ( StatusSuccess = "success" StatusError = "error" )
Tool call status constants
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ComponentHealthChecker ¶
type ComponentHealthChecker struct {
// contains filtered or unexported fields
}
ComponentHealthChecker is a generic health checker for components with a simple status
func NewComponentHealthChecker ¶
func NewComponentHealthChecker(name string, isHealthy, isReady func() bool) *ComponentHealthChecker
NewComponentHealthChecker creates a new component health checker
func (*ComponentHealthChecker) HealthCheck ¶
func (chc *ComponentHealthChecker) HealthCheck(_ context.Context) error
HealthCheck performs a component health check
func (*ComponentHealthChecker) Name ¶
func (chc *ComponentHealthChecker) Name() string
Name returns the name of the health checker
func (*ComponentHealthChecker) ReadinessCheck ¶
func (chc *ComponentHealthChecker) ReadinessCheck(_ context.Context) error
ReadinessCheck performs a component readiness check
type Config ¶
type Config struct {
Health HealthConfig `json:"health"`
Metrics MetricsConfig `json:"metrics"`
Tracing TracingConfig `json:"tracing"`
}
Config holds configuration for observability features
func DefaultConfig ¶
DefaultConfig returns a default observability configuration
type DatabaseHealthChecker ¶
type DatabaseHealthChecker struct {
// contains filtered or unexported fields
}
DatabaseHealthChecker checks the health of a BoltDB database
func NewDatabaseHealthChecker ¶
func NewDatabaseHealthChecker(name string, db *bbolt.DB) *DatabaseHealthChecker
NewDatabaseHealthChecker creates a new database health checker
func (*DatabaseHealthChecker) HealthCheck ¶
func (dhc *DatabaseHealthChecker) HealthCheck(_ context.Context) error
HealthCheck performs a database health check
func (*DatabaseHealthChecker) Name ¶
func (dhc *DatabaseHealthChecker) Name() string
Name returns the name of the health checker
func (*DatabaseHealthChecker) ReadinessCheck ¶
func (dhc *DatabaseHealthChecker) ReadinessCheck(ctx context.Context) error
ReadinessCheck performs a database readiness check
type HealthChecker ¶
type HealthChecker interface {
// HealthCheck returns nil if healthy, error if unhealthy
HealthCheck(ctx context.Context) error
// Name returns the name of the component being checked
Name() string
}
HealthChecker defines an interface for components that can report their health status
type HealthConfig ¶
HealthConfig holds configuration for health checks
type HealthManager ¶
type HealthManager struct {
// contains filtered or unexported fields
}
HealthManager manages health and readiness checks
func NewHealthManager ¶
func NewHealthManager(logger *zap.SugaredLogger) *HealthManager
NewHealthManager creates a new health manager
func (*HealthManager) AddHealthChecker ¶
func (hm *HealthManager) AddHealthChecker(checker HealthChecker)
AddHealthChecker registers a health checker
func (*HealthManager) AddReadinessChecker ¶
func (hm *HealthManager) AddReadinessChecker(checker ReadinessChecker)
AddReadinessChecker registers a readiness checker
func (*HealthManager) GetHealthStatus ¶
func (hm *HealthManager) GetHealthStatus() HealthResponse
GetHealthStatus returns the current health status without HTTP context
func (*HealthManager) GetReadinessStatus ¶
func (hm *HealthManager) GetReadinessStatus() ReadinessResponse
GetReadinessStatus returns the current readiness status without HTTP context
func (*HealthManager) HealthzHandler ¶
func (hm *HealthManager) HealthzHandler() http.HandlerFunc
HealthzHandler returns an HTTP handler for the /healthz endpoint
func (*HealthManager) IsHealthy ¶
func (hm *HealthManager) IsHealthy() bool
IsHealthy returns true if all health checks pass
func (*HealthManager) IsReady ¶
func (hm *HealthManager) IsReady() bool
IsReady returns true if all readiness checks pass
func (*HealthManager) ReadyzHandler ¶
func (hm *HealthManager) ReadyzHandler() http.HandlerFunc
ReadyzHandler returns an HTTP handler for the /readyz endpoint
func (*HealthManager) SetTimeout ¶
func (hm *HealthManager) SetTimeout(timeout time.Duration)
SetTimeout sets the timeout for health checks
type HealthResponse ¶
type HealthResponse struct {
Status string `json:"status"` // "healthy" or "unhealthy"
Timestamp time.Time `json:"timestamp"`
Components []HealthStatus `json:"components"`
}
HealthResponse represents the overall health response
type HealthStatus ¶
type HealthStatus struct {
Name string `json:"name"`
Status string `json:"status"` // "healthy" or "unhealthy"
Error string `json:"error,omitempty"`
Latency string `json:"latency,omitempty"`
}
HealthStatus represents the health status of a component
type IndexHealthChecker ¶
type IndexHealthChecker struct {
// contains filtered or unexported fields
}
IndexHealthChecker checks the health of the search index
func NewIndexHealthChecker ¶
func NewIndexHealthChecker(name string, getDocCount func() (uint64, error)) *IndexHealthChecker
NewIndexHealthChecker creates a new index health checker
func (*IndexHealthChecker) HealthCheck ¶
func (ihc *IndexHealthChecker) HealthCheck(_ context.Context) error
HealthCheck performs an index health check
func (*IndexHealthChecker) Name ¶
func (ihc *IndexHealthChecker) Name() string
Name returns the name of the health checker
func (*IndexHealthChecker) ReadinessCheck ¶
func (ihc *IndexHealthChecker) ReadinessCheck(ctx context.Context) error
ReadinessCheck performs an index readiness check
type Manager ¶
type Manager struct {
// contains filtered or unexported fields
}
Manager coordinates all observability features
func NewManager ¶
func NewManager(logger *zap.SugaredLogger, config *Config) (*Manager, error)
NewManager creates a new observability manager
func (*Manager) HTTPMiddleware ¶
HTTPMiddleware returns combined HTTP middleware for observability
func (*Manager) Health ¶
func (m *Manager) Health() *HealthManager
Health returns the health manager
func (*Manager) Metrics ¶
func (m *Manager) Metrics() *MetricsManager
Metrics returns the metrics manager
func (*Manager) RecordStorageOperation ¶
RecordStorageOperation is a convenience method to record storage operations
func (*Manager) RecordToolCall ¶
func (m *Manager) RecordToolCall(ctx context.Context, serverName, toolName string, duration time.Duration, err error)
RecordToolCall is a convenience method to record tool call metrics and tracing
func (*Manager) RegisterHealthChecker ¶
func (m *Manager) RegisterHealthChecker(checker HealthChecker)
RegisterHealthChecker registers a health checker
func (*Manager) RegisterReadinessChecker ¶
func (m *Manager) RegisterReadinessChecker(checker ReadinessChecker)
RegisterReadinessChecker registers a readiness checker
func (*Manager) SetupHTTPHandlers ¶
SetupHTTPHandlers sets up observability HTTP handlers
func (*Manager) Tracing ¶
func (m *Manager) Tracing() *TracingManager
Tracing returns the tracing manager
func (*Manager) UpdateMetrics ¶
func (m *Manager) UpdateMetrics()
UpdateMetrics updates various metrics with current system state
type MetricsConfig ¶
type MetricsConfig struct {
Enabled bool `json:"enabled"`
}
MetricsConfig holds configuration for metrics
type MetricsManager ¶
type MetricsManager struct {
// contains filtered or unexported fields
}
MetricsManager manages Prometheus metrics
func NewMetricsManager ¶
func NewMetricsManager(logger *zap.SugaredLogger) *MetricsManager
NewMetricsManager creates a new metrics manager
func (*MetricsManager) HTTPMiddleware ¶
func (mm *MetricsManager) HTTPMiddleware() func(http.Handler) http.Handler
HTTPMiddleware returns middleware that records HTTP metrics
func (*MetricsManager) Handler ¶
func (mm *MetricsManager) Handler() http.Handler
Handler returns an HTTP handler for the /metrics endpoint
func (*MetricsManager) RecordActorConnect ¶
func (mm *MetricsManager) RecordActorConnect(server, result string, duration time.Duration)
RecordActorConnect records an actor connection attempt
func (*MetricsManager) RecordActorFailure ¶
func (mm *MetricsManager) RecordActorFailure(server, errorType string)
RecordActorFailure records an actor failure
func (*MetricsManager) RecordActorRetry ¶
func (mm *MetricsManager) RecordActorRetry(server string)
RecordActorRetry records an actor retry attempt
func (*MetricsManager) RecordActorStateTransition ¶
func (mm *MetricsManager) RecordActorStateTransition(server, fromState, toState string)
RecordActorStateTransition records an actor state transition
func (*MetricsManager) RecordHTTPRequest ¶
func (mm *MetricsManager) RecordHTTPRequest(method, path, status string, duration time.Duration)
RecordHTTPRequest records an HTTP request
func (*MetricsManager) RecordOAuthRefresh ¶ added in v0.16.0
func (mm *MetricsManager) RecordOAuthRefresh(server, result string)
RecordOAuthRefresh records an OAuth token refresh attempt. Result should be one of: "success", "failed_network", "failed_invalid_grant", "failed_other".
func (*MetricsManager) RecordOAuthRefreshDuration ¶ added in v0.16.0
func (mm *MetricsManager) RecordOAuthRefreshDuration(server, result string, duration time.Duration)
RecordOAuthRefreshDuration records the duration of an OAuth token refresh attempt. Result should be one of: "success", "failed_network", "failed_invalid_grant", "failed_other".
func (*MetricsManager) RecordReconciliation ¶
func (mm *MetricsManager) RecordReconciliation(result string, duration time.Duration)
RecordReconciliation records a reconciliation cycle
func (*MetricsManager) RecordServerStateChange ¶
func (mm *MetricsManager) RecordServerStateChange(server, fromState, toState string)
RecordServerStateChange records a server state change
func (*MetricsManager) RecordStorageOperation ¶
func (mm *MetricsManager) RecordStorageOperation(operation, status string)
RecordStorageOperation records a storage operation
func (*MetricsManager) RecordToolCall ¶
func (mm *MetricsManager) RecordToolCall(server, tool, status string, duration time.Duration)
RecordToolCall records a tool call
func (*MetricsManager) Registry ¶
func (mm *MetricsManager) Registry() *prometheus.Registry
Registry returns the Prometheus registry for custom metrics
func (*MetricsManager) SetDockerContainers ¶
func (mm *MetricsManager) SetDockerContainers(count int)
SetDockerContainers sets the number of active Docker containers
func (*MetricsManager) SetIndexSize ¶
func (mm *MetricsManager) SetIndexSize(size uint64)
SetIndexSize sets the search index size
func (*MetricsManager) SetServerStats ¶
func (mm *MetricsManager) SetServerStats(total, connected, quarantined int)
SetServerStats updates server-related metrics
func (*MetricsManager) SetToolsTotal ¶
func (mm *MetricsManager) SetToolsTotal(total int)
SetToolsTotal sets the total number of tools
func (*MetricsManager) SetUptime ¶
func (mm *MetricsManager) SetUptime(startTime time.Time)
SetUptime sets the uptime metric
func (*MetricsManager) UpdateFromStatsProvider ¶
func (mm *MetricsManager) UpdateFromStatsProvider(provider StatsUpdater)
UpdateFromStatsProvider updates metrics from a stats provider
type ReadinessChecker ¶
type ReadinessChecker interface {
// ReadinessCheck returns nil if ready, error if not ready
ReadinessCheck(ctx context.Context) error
// Name returns the name of the component being checked
Name() string
}
ReadinessChecker defines an interface for components that can report their readiness status
type ReadinessResponse ¶
type ReadinessResponse struct {
Status string `json:"status"` // "ready" or "not_ready"
Timestamp time.Time `json:"timestamp"`
Components []HealthStatus `json:"components"`
}
ReadinessResponse represents the overall readiness response
type StatsUpdater ¶
type StatsUpdater interface {
UpdateMetrics(mm *MetricsManager)
}
StatsUpdater defines an interface for components that can provide metrics
type TracingConfig ¶
type TracingConfig struct {
Enabled bool `json:"enabled"`
ServiceName string `json:"service_name"`
ServiceVersion string `json:"service_version"`
OTLPEndpoint string `json:"otlp_endpoint"`
SampleRate float64 `json:"sample_rate"`
}
TracingConfig holds configuration for OpenTelemetry tracing
type TracingManager ¶
type TracingManager struct {
// contains filtered or unexported fields
}
TracingManager manages OpenTelemetry tracing
func NewTracingManager ¶
func NewTracingManager(logger *zap.SugaredLogger, config TracingConfig) (*TracingManager, error)
NewTracingManager creates a new tracing manager
func (*TracingManager) AddSpanAttributes ¶
func (tm *TracingManager) AddSpanAttributes(ctx context.Context, attrs ...attribute.KeyValue)
AddSpanAttributes adds attributes to the current span
func (*TracingManager) Close ¶
func (tm *TracingManager) Close(ctx context.Context) error
Close shuts down the tracing provider
func (*TracingManager) HTTPMiddleware ¶
func (tm *TracingManager) HTTPMiddleware() func(http.Handler) http.Handler
HTTPMiddleware returns middleware that adds tracing to HTTP requests
func (*TracingManager) IsEnabled ¶
func (tm *TracingManager) IsEnabled() bool
IsEnabled returns whether tracing is enabled
func (*TracingManager) SetSpanError ¶
func (tm *TracingManager) SetSpanError(ctx context.Context, err error)
SetSpanError marks the current span as having an error
func (*TracingManager) StartSpan ¶
func (tm *TracingManager) StartSpan(ctx context.Context, name string, attrs ...attribute.KeyValue) (context.Context, oteltrace.Span)
StartSpan starts a new trace span
func (*TracingManager) TraceIndexOperation ¶
func (tm *TracingManager) TraceIndexOperation(ctx context.Context, operation string, toolCount int) (context.Context, oteltrace.Span)
TraceIndexOperation creates a span for index operations
func (*TracingManager) TraceStorageOperation ¶
func (tm *TracingManager) TraceStorageOperation(ctx context.Context, operation string) (context.Context, oteltrace.Span)
TraceStorageOperation creates a span for storage operations
func (*TracingManager) TraceToolCall ¶
func (tm *TracingManager) TraceToolCall(ctx context.Context, serverName, toolName string) (context.Context, oteltrace.Span)
TraceToolCall creates a span for tool call operations
func (*TracingManager) TraceUpstreamConnection ¶
func (tm *TracingManager) TraceUpstreamConnection(ctx context.Context, serverName, operation string) (context.Context, oteltrace.Span)
TraceUpstreamConnection creates a span for upstream connection operations
type UpstreamHealthChecker ¶
type UpstreamHealthChecker struct {
// contains filtered or unexported fields
}
UpstreamHealthChecker checks the health of upstream servers
func NewUpstreamHealthChecker ¶
func NewUpstreamHealthChecker(name string, getStats func() map[string]interface{}, minConnected int) *UpstreamHealthChecker
NewUpstreamHealthChecker creates a new upstream health checker
func (*UpstreamHealthChecker) HealthCheck ¶
func (uhc *UpstreamHealthChecker) HealthCheck(_ context.Context) error
HealthCheck performs an upstream servers health check
func (*UpstreamHealthChecker) Name ¶
func (uhc *UpstreamHealthChecker) Name() string
Name returns the name of the health checker
func (*UpstreamHealthChecker) ReadinessCheck ¶
func (uhc *UpstreamHealthChecker) ReadinessCheck(_ context.Context) error
ReadinessCheck performs an upstream servers readiness check