observability

package
v0.17.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 3, 2026 License: MIT Imports: 18 Imported by: 0

Documentation

Overview

Package observability provides health checks, metrics, and tracing capabilities

Index

Constants

View Source
const (
	StatusHealthy   = "healthy"
	StatusUnhealthy = "unhealthy"
	StatusReady     = "ready"
	StatusNotReady  = "not_ready"
)

Health status constants

View Source
const (
	StatusSuccess = "success"
	StatusError   = "error"
)

Tool call status constants

Variables

This section is empty.

Functions

This section is empty.

Types

type ComponentHealthChecker

type ComponentHealthChecker struct {
	// contains filtered or unexported fields
}

ComponentHealthChecker is a generic health checker for components with a simple status

func NewComponentHealthChecker

func NewComponentHealthChecker(name string, isHealthy, isReady func() bool) *ComponentHealthChecker

NewComponentHealthChecker creates a new component health checker

func (*ComponentHealthChecker) HealthCheck

func (chc *ComponentHealthChecker) HealthCheck(_ context.Context) error

HealthCheck performs a component health check

func (*ComponentHealthChecker) Name

func (chc *ComponentHealthChecker) Name() string

Name returns the name of the health checker

func (*ComponentHealthChecker) ReadinessCheck

func (chc *ComponentHealthChecker) ReadinessCheck(_ context.Context) error

ReadinessCheck performs a component readiness check

type Config

type Config struct {
	Health  HealthConfig  `json:"health"`
	Metrics MetricsConfig `json:"metrics"`
	Tracing TracingConfig `json:"tracing"`
}

Config holds configuration for observability features

func DefaultConfig

func DefaultConfig(serviceName, serviceVersion string) Config

DefaultConfig returns a default observability configuration

type DatabaseHealthChecker

type DatabaseHealthChecker struct {
	// contains filtered or unexported fields
}

DatabaseHealthChecker checks the health of a BoltDB database

func NewDatabaseHealthChecker

func NewDatabaseHealthChecker(name string, db *bbolt.DB) *DatabaseHealthChecker

NewDatabaseHealthChecker creates a new database health checker

func (*DatabaseHealthChecker) HealthCheck

func (dhc *DatabaseHealthChecker) HealthCheck(_ context.Context) error

HealthCheck performs a database health check

func (*DatabaseHealthChecker) Name

func (dhc *DatabaseHealthChecker) Name() string

Name returns the name of the health checker

func (*DatabaseHealthChecker) ReadinessCheck

func (dhc *DatabaseHealthChecker) ReadinessCheck(ctx context.Context) error

ReadinessCheck performs a database readiness check

type HealthChecker

type HealthChecker interface {
	// HealthCheck returns nil if healthy, error if unhealthy
	HealthCheck(ctx context.Context) error
	// Name returns the name of the component being checked
	Name() string
}

HealthChecker defines an interface for components that can report their health status

type HealthConfig

type HealthConfig struct {
	Enabled bool          `json:"enabled"`
	Timeout time.Duration `json:"timeout"`
}

HealthConfig holds configuration for health checks

type HealthManager

type HealthManager struct {
	// contains filtered or unexported fields
}

HealthManager manages health and readiness checks

func NewHealthManager

func NewHealthManager(logger *zap.SugaredLogger) *HealthManager

NewHealthManager creates a new health manager

func (*HealthManager) AddHealthChecker

func (hm *HealthManager) AddHealthChecker(checker HealthChecker)

AddHealthChecker registers a health checker

func (*HealthManager) AddReadinessChecker

func (hm *HealthManager) AddReadinessChecker(checker ReadinessChecker)

AddReadinessChecker registers a readiness checker

func (*HealthManager) GetHealthStatus

func (hm *HealthManager) GetHealthStatus() HealthResponse

GetHealthStatus returns the current health status without HTTP context

func (*HealthManager) GetReadinessStatus

func (hm *HealthManager) GetReadinessStatus() ReadinessResponse

GetReadinessStatus returns the current readiness status without HTTP context

func (*HealthManager) HealthzHandler

func (hm *HealthManager) HealthzHandler() http.HandlerFunc

HealthzHandler returns an HTTP handler for the /healthz endpoint

func (*HealthManager) IsHealthy

func (hm *HealthManager) IsHealthy() bool

IsHealthy returns true if all health checks pass

func (*HealthManager) IsReady

func (hm *HealthManager) IsReady() bool

IsReady returns true if all readiness checks pass

func (*HealthManager) ReadyzHandler

func (hm *HealthManager) ReadyzHandler() http.HandlerFunc

ReadyzHandler returns an HTTP handler for the /readyz endpoint

func (*HealthManager) SetTimeout

func (hm *HealthManager) SetTimeout(timeout time.Duration)

SetTimeout sets the timeout for health checks

type HealthResponse

type HealthResponse struct {
	Status     string         `json:"status"` // "healthy" or "unhealthy"
	Timestamp  time.Time      `json:"timestamp"`
	Components []HealthStatus `json:"components"`
}

HealthResponse represents the overall health response

type HealthStatus

type HealthStatus struct {
	Name    string `json:"name"`
	Status  string `json:"status"` // "healthy" or "unhealthy"
	Error   string `json:"error,omitempty"`
	Latency string `json:"latency,omitempty"`
}

HealthStatus represents the health status of a component

type IndexHealthChecker

type IndexHealthChecker struct {
	// contains filtered or unexported fields
}

IndexHealthChecker checks the health of the search index

func NewIndexHealthChecker

func NewIndexHealthChecker(name string, getDocCount func() (uint64, error)) *IndexHealthChecker

NewIndexHealthChecker creates a new index health checker

func (*IndexHealthChecker) HealthCheck

func (ihc *IndexHealthChecker) HealthCheck(_ context.Context) error

HealthCheck performs an index health check

func (*IndexHealthChecker) Name

func (ihc *IndexHealthChecker) Name() string

Name returns the name of the health checker

func (*IndexHealthChecker) ReadinessCheck

func (ihc *IndexHealthChecker) ReadinessCheck(ctx context.Context) error

ReadinessCheck performs an index readiness check

type Manager

type Manager struct {
	// contains filtered or unexported fields
}

Manager coordinates all observability features

func NewManager

func NewManager(logger *zap.SugaredLogger, config *Config) (*Manager, error)

NewManager creates a new observability manager

func (*Manager) Close

func (m *Manager) Close(ctx context.Context) error

Close gracefully shuts down observability components

func (*Manager) HTTPMiddleware

func (m *Manager) HTTPMiddleware() func(http.Handler) http.Handler

HTTPMiddleware returns combined HTTP middleware for observability

func (*Manager) Health

func (m *Manager) Health() *HealthManager

Health returns the health manager

func (*Manager) IsHealthy

func (m *Manager) IsHealthy() bool

IsHealthy returns true if all health checks pass

func (*Manager) IsReady

func (m *Manager) IsReady() bool

IsReady returns true if all readiness checks pass

func (*Manager) Metrics

func (m *Manager) Metrics() *MetricsManager

Metrics returns the metrics manager

func (*Manager) RecordStorageOperation

func (m *Manager) RecordStorageOperation(operation string, err error)

RecordStorageOperation is a convenience method to record storage operations

func (*Manager) RecordToolCall

func (m *Manager) RecordToolCall(ctx context.Context, serverName, toolName string, duration time.Duration, err error)

RecordToolCall is a convenience method to record tool call metrics and tracing

func (*Manager) RegisterHealthChecker

func (m *Manager) RegisterHealthChecker(checker HealthChecker)

RegisterHealthChecker registers a health checker

func (*Manager) RegisterReadinessChecker

func (m *Manager) RegisterReadinessChecker(checker ReadinessChecker)

RegisterReadinessChecker registers a readiness checker

func (*Manager) SetupHTTPHandlers

func (m *Manager) SetupHTTPHandlers(mux *http.ServeMux)

SetupHTTPHandlers sets up observability HTTP handlers

func (*Manager) Tracing

func (m *Manager) Tracing() *TracingManager

Tracing returns the tracing manager

func (*Manager) UpdateMetrics

func (m *Manager) UpdateMetrics()

UpdateMetrics updates various metrics with current system state

type MetricsConfig

type MetricsConfig struct {
	Enabled bool `json:"enabled"`
}

MetricsConfig holds configuration for metrics

type MetricsManager

type MetricsManager struct {
	// contains filtered or unexported fields
}

MetricsManager manages Prometheus metrics

func NewMetricsManager

func NewMetricsManager(logger *zap.SugaredLogger) *MetricsManager

NewMetricsManager creates a new metrics manager

func (*MetricsManager) HTTPMiddleware

func (mm *MetricsManager) HTTPMiddleware() func(http.Handler) http.Handler

HTTPMiddleware returns middleware that records HTTP metrics

func (*MetricsManager) Handler

func (mm *MetricsManager) Handler() http.Handler

Handler returns an HTTP handler for the /metrics endpoint

func (*MetricsManager) RecordActorConnect

func (mm *MetricsManager) RecordActorConnect(server, result string, duration time.Duration)

RecordActorConnect records an actor connection attempt

func (*MetricsManager) RecordActorFailure

func (mm *MetricsManager) RecordActorFailure(server, errorType string)

RecordActorFailure records an actor failure

func (*MetricsManager) RecordActorRetry

func (mm *MetricsManager) RecordActorRetry(server string)

RecordActorRetry records an actor retry attempt

func (*MetricsManager) RecordActorStateTransition

func (mm *MetricsManager) RecordActorStateTransition(server, fromState, toState string)

RecordActorStateTransition records an actor state transition

func (*MetricsManager) RecordHTTPRequest

func (mm *MetricsManager) RecordHTTPRequest(method, path, status string, duration time.Duration)

RecordHTTPRequest records an HTTP request

func (*MetricsManager) RecordOAuthRefresh added in v0.16.0

func (mm *MetricsManager) RecordOAuthRefresh(server, result string)

RecordOAuthRefresh records an OAuth token refresh attempt. Result should be one of: "success", "failed_network", "failed_invalid_grant", "failed_other".

func (*MetricsManager) RecordOAuthRefreshDuration added in v0.16.0

func (mm *MetricsManager) RecordOAuthRefreshDuration(server, result string, duration time.Duration)

RecordOAuthRefreshDuration records the duration of an OAuth token refresh attempt. Result should be one of: "success", "failed_network", "failed_invalid_grant", "failed_other".

func (*MetricsManager) RecordReconciliation

func (mm *MetricsManager) RecordReconciliation(result string, duration time.Duration)

RecordReconciliation records a reconciliation cycle

func (*MetricsManager) RecordServerStateChange

func (mm *MetricsManager) RecordServerStateChange(server, fromState, toState string)

RecordServerStateChange records a server state change

func (*MetricsManager) RecordStorageOperation

func (mm *MetricsManager) RecordStorageOperation(operation, status string)

RecordStorageOperation records a storage operation

func (*MetricsManager) RecordToolCall

func (mm *MetricsManager) RecordToolCall(server, tool, status string, duration time.Duration)

RecordToolCall records a tool call

func (*MetricsManager) Registry

func (mm *MetricsManager) Registry() *prometheus.Registry

Registry returns the Prometheus registry for custom metrics

func (*MetricsManager) SetDockerContainers

func (mm *MetricsManager) SetDockerContainers(count int)

SetDockerContainers sets the number of active Docker containers

func (*MetricsManager) SetIndexSize

func (mm *MetricsManager) SetIndexSize(size uint64)

SetIndexSize sets the search index size

func (*MetricsManager) SetServerStats

func (mm *MetricsManager) SetServerStats(total, connected, quarantined int)

SetServerStats updates server-related metrics

func (*MetricsManager) SetToolsTotal

func (mm *MetricsManager) SetToolsTotal(total int)

SetToolsTotal sets the total number of tools

func (*MetricsManager) SetUptime

func (mm *MetricsManager) SetUptime(startTime time.Time)

SetUptime sets the uptime metric

func (*MetricsManager) UpdateFromStatsProvider

func (mm *MetricsManager) UpdateFromStatsProvider(provider StatsUpdater)

UpdateFromStatsProvider updates metrics from a stats provider

type ReadinessChecker

type ReadinessChecker interface {
	// ReadinessCheck returns nil if ready, error if not ready
	ReadinessCheck(ctx context.Context) error
	// Name returns the name of the component being checked
	Name() string
}

ReadinessChecker defines an interface for components that can report their readiness status

type ReadinessResponse

type ReadinessResponse struct {
	Status     string         `json:"status"` // "ready" or "not_ready"
	Timestamp  time.Time      `json:"timestamp"`
	Components []HealthStatus `json:"components"`
}

ReadinessResponse represents the overall readiness response

type StatsUpdater

type StatsUpdater interface {
	UpdateMetrics(mm *MetricsManager)
}

StatsUpdater defines an interface for components that can provide metrics

type TracingConfig

type TracingConfig struct {
	Enabled        bool    `json:"enabled"`
	ServiceName    string  `json:"service_name"`
	ServiceVersion string  `json:"service_version"`
	OTLPEndpoint   string  `json:"otlp_endpoint"`
	SampleRate     float64 `json:"sample_rate"`
}

TracingConfig holds configuration for OpenTelemetry tracing

type TracingManager

type TracingManager struct {
	// contains filtered or unexported fields
}

TracingManager manages OpenTelemetry tracing

func NewTracingManager

func NewTracingManager(logger *zap.SugaredLogger, config TracingConfig) (*TracingManager, error)

NewTracingManager creates a new tracing manager

func (*TracingManager) AddSpanAttributes

func (tm *TracingManager) AddSpanAttributes(ctx context.Context, attrs ...attribute.KeyValue)

AddSpanAttributes adds attributes to the current span

func (*TracingManager) Close

func (tm *TracingManager) Close(ctx context.Context) error

Close shuts down the tracing provider

func (*TracingManager) HTTPMiddleware

func (tm *TracingManager) HTTPMiddleware() func(http.Handler) http.Handler

HTTPMiddleware returns middleware that adds tracing to HTTP requests

func (*TracingManager) IsEnabled

func (tm *TracingManager) IsEnabled() bool

IsEnabled returns whether tracing is enabled

func (*TracingManager) SetSpanError

func (tm *TracingManager) SetSpanError(ctx context.Context, err error)

SetSpanError marks the current span as having an error

func (*TracingManager) StartSpan

func (tm *TracingManager) StartSpan(ctx context.Context, name string, attrs ...attribute.KeyValue) (context.Context, oteltrace.Span)

StartSpan starts a new trace span

func (*TracingManager) TraceIndexOperation

func (tm *TracingManager) TraceIndexOperation(ctx context.Context, operation string, toolCount int) (context.Context, oteltrace.Span)

TraceIndexOperation creates a span for index operations

func (*TracingManager) TraceStorageOperation

func (tm *TracingManager) TraceStorageOperation(ctx context.Context, operation string) (context.Context, oteltrace.Span)

TraceStorageOperation creates a span for storage operations

func (*TracingManager) TraceToolCall

func (tm *TracingManager) TraceToolCall(ctx context.Context, serverName, toolName string) (context.Context, oteltrace.Span)

TraceToolCall creates a span for tool call operations

func (*TracingManager) TraceUpstreamConnection

func (tm *TracingManager) TraceUpstreamConnection(ctx context.Context, serverName, operation string) (context.Context, oteltrace.Span)

TraceUpstreamConnection creates a span for upstream connection operations

type UpstreamHealthChecker

type UpstreamHealthChecker struct {
	// contains filtered or unexported fields
}

UpstreamHealthChecker checks the health of upstream servers

func NewUpstreamHealthChecker

func NewUpstreamHealthChecker(name string, getStats func() map[string]interface{}, minConnected int) *UpstreamHealthChecker

NewUpstreamHealthChecker creates a new upstream health checker

func (*UpstreamHealthChecker) HealthCheck

func (uhc *UpstreamHealthChecker) HealthCheck(_ context.Context) error

HealthCheck performs an upstream servers health check

func (*UpstreamHealthChecker) Name

func (uhc *UpstreamHealthChecker) Name() string

Name returns the name of the health checker

func (*UpstreamHealthChecker) ReadinessCheck

func (uhc *UpstreamHealthChecker) ReadinessCheck(_ context.Context) error

ReadinessCheck performs an upstream servers readiness check

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL