alert_exporter

package

v0.9.3 Latest Latest Go to latest Published: Sep 4, 2025 License: Apache-2.0 Imports: 23 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/flightctl/flightctl

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
type AlertCheckpoint
type AlertExporter
- func NewAlertExporter(log *logrus.Logger, handler service.Service, config *config.Config) *AlertExporter
- func (a *AlertExporter) Poll(ctx context.Context) error
type AlertInfo
type AlertKey
- func AlertKeyFromEvent(event api.Event) AlertKey
- func NewAlertKey(org string, kind string, name string) AlertKey
type AlertSender
- func NewAlertSender(log *logrus.Logger, hostname string, port uint, cfg *config.Config) *AlertSender
- func (a *AlertSender) SendAlerts(checkpoint *AlertCheckpoint) error
type AlertmanagerAlert
type AlertmanagerClient
- func NewAlertmanagerClient(hostname string, port uint, log logrus.FieldLogger, cfg *config.Config) *AlertmanagerClient
- func (a *AlertmanagerClient) SendAllAlerts(alerts map[AlertKey]map[string]*AlertInfo) error
type CheckpointContext
type CheckpointManager
- func NewCheckpointManager(log *logrus.Logger, handler service.Service) *CheckpointManager
- func (c *CheckpointManager) LoadCheckpoint(ctx context.Context) *AlertCheckpoint
- func (c *CheckpointManager) StoreCheckpoint(ctx context.Context, checkpoint *AlertCheckpoint) error
type EventProcessor
- func NewEventProcessor(log *logrus.Logger, handler service.Service) *EventProcessor
- func (e *EventProcessor) ProcessLatestEvents(ctx context.Context, oldCheckpoint *AlertCheckpoint, ...) (*AlertCheckpoint, error)
type HTTPError
- func (e *HTTPError) Error() string
type ProcessingMetrics
type Server
- func New(cfg *config.Config, log *logrus.Logger) *Server
- func (s *Server) Run(ctx context.Context, serviceHandler service.Service) error

Constants ¶

View Source

const AlertCheckpointConsumer = "alert-exporter"

View Source

const AlertCheckpointKey = "active-alerts"

View Source

const CurrentAlertCheckpointVersion = 1

View Source

const SentinelAlertName = "__sentinel_alert__"

Variables ¶

View Source

var (
	// Processing metrics
	ProcessingCyclesTotal = promauto.NewCounter(prometheus.CounterOpts{
		Name: "flightctl_alert_exporter_processing_cycles_total",
		Help: "Total number of processing cycles completed",
	})

	ProcessingDurationSeconds = promauto.NewHistogram(prometheus.HistogramOpts{
		Name:    "flightctl_alert_exporter_processing_duration_seconds",
		Help:    "Time spent processing events in seconds",
		Buckets: prometheus.DefBuckets,
	})

	EventsProcessedTotal = promauto.NewCounter(prometheus.CounterOpts{
		Name: "flightctl_alert_exporter_events_processed_total",
		Help: "Total number of events processed",
	})

	// Alert metrics
	AlertsActiveTotal = promauto.NewGauge(prometheus.GaugeOpts{
		Name: "flightctl_alert_exporter_alerts_active_total",
		Help: "Current number of active alerts",
	})

	AlertsCreatedTotal = promauto.NewCounter(prometheus.CounterOpts{
		Name: "flightctl_alert_exporter_alerts_created_total",
		Help: "Total number of alerts created",
	})

	AlertsResolvedTotal = promauto.NewCounter(prometheus.CounterOpts{
		Name: "flightctl_alert_exporter_alerts_resolved_total",
		Help: "Total number of alerts resolved",
	})

	// Alertmanager interaction metrics
	AlertmanagerRequestsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "flightctl_alert_exporter_alertmanager_requests_total",
		Help: "Total number of requests to Alertmanager",
	}, []string{"status"})

	AlertmanagerRequestDurationSeconds = promauto.NewHistogram(prometheus.HistogramOpts{
		Name:    "flightctl_alert_exporter_alertmanager_request_duration_seconds",
		Help:    "Time spent sending requests to Alertmanager in seconds",
		Buckets: prometheus.DefBuckets,
	})

	AlertmanagerRetriesTotal = promauto.NewCounter(prometheus.CounterOpts{
		Name: "flightctl_alert_exporter_alertmanager_retries_total",
		Help: "Total number of retries when sending to Alertmanager",
	})

	// Checkpoint metrics
	CheckpointOperationsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "flightctl_alert_exporter_checkpoint_operations_total",
		Help: "Total number of checkpoint operations",
	}, []string{"operation", "status"})

	CheckpointSizeBytes = promauto.NewGauge(prometheus.GaugeOpts{
		Name: "flightctl_alert_exporter_checkpoint_size_bytes",
		Help: "Size of the checkpoint data in bytes",
	})

	// Health metrics
	UptimeSeconds = promauto.NewGauge(prometheus.GaugeOpts{
		Name: "flightctl_alert_exporter_uptime_seconds",
		Help: "Time since the alert exporter started in seconds",
	})

	LastSuccessfulProcessingTimestamp = promauto.NewGauge(prometheus.GaugeOpts{
		Name: "flightctl_alert_exporter_last_successful_processing_timestamp",
		Help: "Unix timestamp of the last successful processing cycle",
	})

	ErrorsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
		Name: "flightctl_alert_exporter_errors_total",
		Help: "Total number of errors encountered",
	}, []string{"component", "type"})
)

Prometheus metrics for the alert exporter

Functions ¶

This section is empty.

Types ¶

type AlertCheckpoint ¶

type AlertCheckpoint struct {
	Version   int
	Timestamp string
	Alerts    map[AlertKey]map[string]*AlertInfo
}

type AlertExporter ¶

type AlertExporter struct {
	// contains filtered or unexported fields
}

func NewAlertExporter ¶

func NewAlertExporter(log *logrus.Logger, handler service.Service, config *config.Config) *AlertExporter

func (*AlertExporter) Poll ¶

func (a *AlertExporter) Poll(ctx context.Context) error

type AlertInfo ¶

type AlertInfo struct {
	ResourceName string
	ResourceKind string
	OrgID        string
	Reason       string
	Summary      string
	StartsAt     time.Time
	EndsAt       *time.Time
}

type AlertKey ¶

type AlertKey string

func AlertKeyFromEvent ¶

func AlertKeyFromEvent(event api.Event) AlertKey

func NewAlertKey ¶

func NewAlertKey(org string, kind string, name string) AlertKey

type AlertSender ¶

type AlertSender struct {
	// contains filtered or unexported fields
}

func NewAlertSender ¶

func NewAlertSender(log *logrus.Logger, hostname string, port uint, cfg *config.Config) *AlertSender

func (*AlertSender) SendAlerts ¶

func (a *AlertSender) SendAlerts(checkpoint *AlertCheckpoint) error

type AlertmanagerAlert ¶

type AlertmanagerAlert struct {
	Labels       map[string]string `json:"labels"`
	Annotations  map[string]string `json:"annotations,omitempty"`
	StartsAt     time.Time         `json:"startsAt"`
	EndsAt       time.Time         `json:"endsAt,omitempty"`
	GeneratorURL string            `json:"generatorURL,omitempty"`
}

type AlertmanagerClient ¶

type AlertmanagerClient struct {
	// contains filtered or unexported fields
}

func NewAlertmanagerClient ¶

func NewAlertmanagerClient(hostname string, port uint, log logrus.FieldLogger, cfg *config.Config) *AlertmanagerClient

func (*AlertmanagerClient) SendAllAlerts ¶

func (a *AlertmanagerClient) SendAllAlerts(alerts map[AlertKey]map[string]*AlertInfo) error

SendAllAlerts sends all alerts from a nested map to Alertmanager in batches.

type CheckpointContext ¶

type CheckpointContext struct {
	// contains filtered or unexported fields
}

type CheckpointManager ¶

type CheckpointManager struct {
	// contains filtered or unexported fields
}

func NewCheckpointManager ¶

func NewCheckpointManager(log *logrus.Logger, handler service.Service) *CheckpointManager

func (*CheckpointManager) LoadCheckpoint ¶

func (c *CheckpointManager) LoadCheckpoint(ctx context.Context) *AlertCheckpoint

LoadCheckpoint retrieves the last processed event and active alerts from the database. If no checkpoint exists, it initializes a fresh state. If it fails to retrieve the checkpoint or unmarshal the contents, it logs an error and starts from a fresh state. This is better than panicking, as it allows the exporter to continue running and at least report new alerts from the point of failure onward. In the future, we could consider using a more robust error handling strategy, such as listing the system resources and reconstructing the list of active alerts based on the current state of the system. However, for now, I assume that if we fail to fetch the checkpoint then we will also fail to fetch the system resources.

func (*CheckpointManager) StoreCheckpoint ¶

func (c *CheckpointManager) StoreCheckpoint(ctx context.Context, checkpoint *AlertCheckpoint) error

type EventProcessor ¶

type EventProcessor struct {
	// contains filtered or unexported fields
}

func NewEventProcessor ¶

func NewEventProcessor(log *logrus.Logger, handler service.Service) *EventProcessor

func (*EventProcessor) ProcessLatestEvents ¶

func (e *EventProcessor) ProcessLatestEvents(ctx context.Context, oldCheckpoint *AlertCheckpoint, metrics *ProcessingMetrics) (*AlertCheckpoint, error)

type HTTPError ¶

type HTTPError struct {
	StatusCode int
	Status     string
	Message    string
}

HTTPError represents an HTTP error with status code

func (*HTTPError) Error ¶

func (e *HTTPError) Error() string

type ProcessingMetrics ¶

type ProcessingMetrics struct {
	CycleStartTime   time.Time
	EventsProcessed  int
	AlertsCreated    int
	AlertsResolved   int
	ProcessingTimeMs int64
	SendingTimeMs    int64
	CheckpointTimeMs int64
	TotalCycleTimeMs int64
	ActiveAlerts     int
}

ProcessingMetrics tracks operational metrics for monitoring and observability

type Server ¶

type Server struct {
	// contains filtered or unexported fields
}

func New ¶

func New(
	cfg *config.Config,
	log *logrus.Logger,
) *Server

New returns a new instance of a flightctl server.

func (*Server) Run ¶

func (s *Server) Run(ctx context.Context, serviceHandler service.Service) error

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL