alert_exporter

package
v0.9.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Sep 4, 2025 License: Apache-2.0 Imports: 23 Imported by: 0

Documentation

Index

Constants

View Source
const AlertCheckpointConsumer = "alert-exporter"
View Source
const AlertCheckpointKey = "active-alerts"
View Source
const CurrentAlertCheckpointVersion = 1
View Source
const SentinelAlertName = "__sentinel_alert__"

Variables

View Source
var (
	// Processing metrics
	ProcessingCyclesTotal = promauto.NewCounter(prometheus.CounterOpts{
		Name: "flightctl_alert_exporter_processing_cycles_total",
		Help: "Total number of processing cycles completed",
	})

	ProcessingDurationSeconds = promauto.NewHistogram(prometheus.HistogramOpts{
		Name:    "flightctl_alert_exporter_processing_duration_seconds",
		Help:    "Time spent processing events in seconds",
		Buckets: prometheus.DefBuckets,
	})

	EventsProcessedTotal = promauto.NewCounter(prometheus.CounterOpts{
		Name: "flightctl_alert_exporter_events_processed_total",
		Help: "Total number of events processed",
	})

	// Alert metrics
	AlertsActiveTotal = promauto.NewGauge(prometheus.GaugeOpts{
		Name: "flightctl_alert_exporter_alerts_active_total",
		Help: "Current number of active alerts",
	})

	AlertsCreatedTotal = promauto.NewCounter(prometheus.CounterOpts{
		Name: "flightctl_alert_exporter_alerts_created_total",
		Help: "Total number of alerts created",
	})

	AlertsResolvedTotal = promauto.NewCounter(prometheus.CounterOpts{
		Name: "flightctl_alert_exporter_alerts_resolved_total",
		Help: "Total number of alerts resolved",
	})

	// Alertmanager interaction metrics
	AlertmanagerRequestsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "flightctl_alert_exporter_alertmanager_requests_total",
		Help: "Total number of requests to Alertmanager",
	}, []string{"status"})

	AlertmanagerRequestDurationSeconds = promauto.NewHistogram(prometheus.HistogramOpts{
		Name:    "flightctl_alert_exporter_alertmanager_request_duration_seconds",
		Help:    "Time spent sending requests to Alertmanager in seconds",
		Buckets: prometheus.DefBuckets,
	})

	AlertmanagerRetriesTotal = promauto.NewCounter(prometheus.CounterOpts{
		Name: "flightctl_alert_exporter_alertmanager_retries_total",
		Help: "Total number of retries when sending to Alertmanager",
	})

	// Checkpoint metrics
	CheckpointOperationsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "flightctl_alert_exporter_checkpoint_operations_total",
		Help: "Total number of checkpoint operations",
	}, []string{"operation", "status"})

	CheckpointSizeBytes = promauto.NewGauge(prometheus.GaugeOpts{
		Name: "flightctl_alert_exporter_checkpoint_size_bytes",
		Help: "Size of the checkpoint data in bytes",
	})

	// Health metrics
	UptimeSeconds = promauto.NewGauge(prometheus.GaugeOpts{
		Name: "flightctl_alert_exporter_uptime_seconds",
		Help: "Time since the alert exporter started in seconds",
	})

	LastSuccessfulProcessingTimestamp = promauto.NewGauge(prometheus.GaugeOpts{
		Name: "flightctl_alert_exporter_last_successful_processing_timestamp",
		Help: "Unix timestamp of the last successful processing cycle",
	})

	ErrorsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "flightctl_alert_exporter_errors_total",
		Help: "Total number of errors encountered",
	}, []string{"component", "type"})
)

Prometheus metrics for the alert exporter

Functions

This section is empty.

Types

type AlertCheckpoint

type AlertCheckpoint struct {
	Version   int
	Timestamp string
	Alerts    map[AlertKey]map[string]*AlertInfo
}

type AlertExporter

type AlertExporter struct {
	// contains filtered or unexported fields
}

func NewAlertExporter

func NewAlertExporter(log *logrus.Logger, handler service.Service, config *config.Config) *AlertExporter

func (*AlertExporter) Poll

func (a *AlertExporter) Poll(ctx context.Context) error

type AlertInfo

type AlertInfo struct {
	ResourceName string
	ResourceKind string
	OrgID        string
	Reason       string
	Summary      string
	StartsAt     time.Time
	EndsAt       *time.Time
}

type AlertKey

type AlertKey string

func AlertKeyFromEvent

func AlertKeyFromEvent(event api.Event) AlertKey

func NewAlertKey

func NewAlertKey(org string, kind string, name string) AlertKey

type AlertSender

type AlertSender struct {
	// contains filtered or unexported fields
}

func NewAlertSender

func NewAlertSender(log *logrus.Logger, hostname string, port uint, cfg *config.Config) *AlertSender

func (*AlertSender) SendAlerts

func (a *AlertSender) SendAlerts(checkpoint *AlertCheckpoint) error

type AlertmanagerAlert

type AlertmanagerAlert struct {
	Labels       map[string]string `json:"labels"`
	Annotations  map[string]string `json:"annotations,omitempty"`
	StartsAt     time.Time         `json:"startsAt"`
	EndsAt       time.Time         `json:"endsAt,omitempty"`
	GeneratorURL string            `json:"generatorURL,omitempty"`
}

type AlertmanagerClient

type AlertmanagerClient struct {
	// contains filtered or unexported fields
}

func NewAlertmanagerClient

func NewAlertmanagerClient(hostname string, port uint, log logrus.FieldLogger, cfg *config.Config) *AlertmanagerClient

func (*AlertmanagerClient) SendAllAlerts

func (a *AlertmanagerClient) SendAllAlerts(alerts map[AlertKey]map[string]*AlertInfo) error

SendAllAlerts sends all alerts from a nested map to Alertmanager in batches.

type CheckpointContext

type CheckpointContext struct {
	// contains filtered or unexported fields
}

type CheckpointManager

type CheckpointManager struct {
	// contains filtered or unexported fields
}

func NewCheckpointManager

func NewCheckpointManager(log *logrus.Logger, handler service.Service) *CheckpointManager

func (*CheckpointManager) LoadCheckpoint

func (c *CheckpointManager) LoadCheckpoint(ctx context.Context) *AlertCheckpoint

LoadCheckpoint retrieves the last processed event and active alerts from the database. If no checkpoint exists, it initializes a fresh state. If it fails to retrieve the checkpoint or unmarshal the contents, it logs an error and starts from a fresh state. This is better than panicking, as it allows the exporter to continue running and at least report new alerts from the point of failure onward. In the future, we could consider using a more robust error handling strategy, such as listing the system resources and reconstructing the list of active alerts based on the current state of the system. However, for now, I assume that if we fail to fetch the checkpoint then we will also fail to fetch the system resources.

func (*CheckpointManager) StoreCheckpoint

func (c *CheckpointManager) StoreCheckpoint(ctx context.Context, checkpoint *AlertCheckpoint) error

type EventProcessor

type EventProcessor struct {
	// contains filtered or unexported fields
}

func NewEventProcessor

func NewEventProcessor(log *logrus.Logger, handler service.Service) *EventProcessor

func (*EventProcessor) ProcessLatestEvents

func (e *EventProcessor) ProcessLatestEvents(ctx context.Context, oldCheckpoint *AlertCheckpoint, metrics *ProcessingMetrics) (*AlertCheckpoint, error)

type HTTPError

type HTTPError struct {
	StatusCode int
	Status     string
	Message    string
}

HTTPError represents an HTTP error with status code

func (*HTTPError) Error

func (e *HTTPError) Error() string

type ProcessingMetrics

type ProcessingMetrics struct {
	CycleStartTime   time.Time
	EventsProcessed  int
	AlertsCreated    int
	AlertsResolved   int
	ProcessingTimeMs int64
	SendingTimeMs    int64
	CheckpointTimeMs int64
	TotalCycleTimeMs int64
	ActiveAlerts     int
}

ProcessingMetrics tracks operational metrics for monitoring and observability

type Server

type Server struct {
	// contains filtered or unexported fields
}

func New

func New(
	cfg *config.Config,
	log *logrus.Logger,
) *Server

New returns a new instance of a flightctl server.

func (*Server) Run

func (s *Server) Run(ctx context.Context, serviceHandler service.Service) error

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL