types

package
v1.6.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 28, 2025 License: Apache-2.0 Imports: 6 Imported by: 0

Documentation

Overview

Package types defines configuration types for Node Doctor.

Package types defines the core interfaces and types for Node Doctor. Based on architecture.md specification.

Index

Constants

View Source
const (
	DefaultLogLevel                 = "info"
	DefaultLogFormat                = "json"
	DefaultLogOutput                = "stdout"
	DefaultUpdateInterval           = "10s"
	DefaultResyncInterval           = "60s"
	DefaultHeartbeatInterval        = "5m"
	DefaultQPS                      = 50
	DefaultBurst                    = 100
	DefaultHTTPPort                 = 8080
	DefaultHTTPBindAddress          = "0.0.0.0"
	DefaultPrometheusPort           = 9100
	DefaultPrometheusPath           = "/metrics"
	DefaultMonitorInterval          = "30s"
	DefaultMonitorTimeout           = "10s"
	DefaultCooldownPeriod           = "5m"
	DefaultMaxAttemptsGlobal        = 3
	DefaultMaxRemediationsPerHour   = 10
	DefaultMaxRemediationsPerMinute = 2
	DefaultCircuitBreakerThreshold  = 5
	DefaultCircuitBreakerTimeout    = "30m"
	DefaultHistorySize              = 100
	MaxRecursionDepth               = 10 // Maximum nesting depth for strategies
	MaxQPS                          = 10000
	MaxBurst                        = 100000
)

Package-level defaults

Variables

View Source
var (

	// Minimum interval thresholds (conservative settings to prevent system overload)
	MinMonitorInterval   = 1 * time.Second  // Minimum time between monitor polls
	MinHeartbeatInterval = 5 * time.Second  // Minimum heartbeat check interval
	MinCooldownPeriod    = 10 * time.Second // Minimum cooldown between remediation attempts
)

Package-level variables for validation

Functions

This section is empty.

Types

type APIServerLatency added in v1.5.0

type APIServerLatency struct {
	LatencyMs float64 `json:"latency_ms"`
	Reachable bool    `json:"reachable"`
}

APIServerLatency represents Kubernetes API server response latency.

type AnnotationConfig

type AnnotationConfig struct {
	Key   string `json:"key" yaml:"key"`
	Value string `json:"value" yaml:"value"`
}

AnnotationConfig defines a node annotation to manage.

type AuthConfig

type AuthConfig struct {
	Type     string `json:"type" yaml:"type"`                             // "none", "bearer", "basic"
	Token    string `json:"token,omitempty" yaml:"token,omitempty"`       // Bearer token
	Username string `json:"username,omitempty" yaml:"username,omitempty"` // Basic auth username
	Password string `json:"password,omitempty" yaml:"password,omitempty"` // Basic auth password
}

AuthConfig defines authentication configuration for webhooks.

func (*AuthConfig) Validate

func (a *AuthConfig) Validate() error

Validate validates the AuthConfig configuration.

type CircuitBreakerConfig

type CircuitBreakerConfig struct {
	Enabled          bool          `json:"enabled" yaml:"enabled"`
	Threshold        int           `json:"threshold,omitempty" yaml:"threshold,omitempty"`
	TimeoutString    string        `json:"timeout,omitempty" yaml:"timeout,omitempty"`
	Timeout          time.Duration `json:"-" yaml:"-"`
	SuccessThreshold int           `json:"successThreshold,omitempty" yaml:"successThreshold,omitempty"`
}

CircuitBreakerConfig configures circuit breaker behavior.

type Condition

type Condition struct {
	// Type is the type of condition (e.g., "KubeletReady", "DiskPressure").
	Type string

	// Status is the current status of the condition (True, False, Unknown).
	Status ConditionStatus

	// Transition is when the condition last transitioned.
	Transition time.Time

	// Reason is a brief machine-readable string explaining the condition.
	Reason string

	// Message is a human-readable explanation of the condition.
	Message string
}

Condition represents the current state of a monitored resource.

func NewCondition

func NewCondition(conditionType string, status ConditionStatus, reason, message string) Condition

NewCondition creates a new Condition with the specified parameters. Transition time is automatically set to the current time.

func (*Condition) String

func (c *Condition) String() string

String returns a human-readable string representation of the Condition.

func (*Condition) Validate

func (c *Condition) Validate() error

Validate checks if the Condition has all required fields populated. Returns an error if any required field is missing or invalid.

type ConditionConfig

type ConditionConfig struct {
	Type           string `json:"type" yaml:"type"`
	DefaultStatus  string `json:"defaultStatus,omitempty" yaml:"defaultStatus,omitempty"`
	DefaultReason  string `json:"defaultReason,omitempty" yaml:"defaultReason,omitempty"`
	DefaultMessage string `json:"defaultMessage,omitempty" yaml:"defaultMessage,omitempty"`
}

ConditionConfig defines a custom node condition.

type ConditionStatus

type ConditionStatus string

ConditionStatus represents the status of a condition.

const (
	// ConditionTrue indicates the condition is true/healthy.
	ConditionTrue ConditionStatus = "True"

	// ConditionFalse indicates the condition is false/unhealthy.
	ConditionFalse ConditionStatus = "False"

	// ConditionUnknown indicates the condition status cannot be determined.
	ConditionUnknown ConditionStatus = "Unknown"
)

type ConfigMetadata

type ConfigMetadata struct {
	Name      string            `json:"name" yaml:"name"`
	Namespace string            `json:"namespace,omitempty" yaml:"namespace,omitempty"`
	Labels    map[string]string `json:"labels,omitempty" yaml:"labels,omitempty"`
}

ConfigMetadata contains metadata about the configuration.

type ControllerWebhookConfig added in v1.6.0

type ControllerWebhookConfig struct {
	// Enabled indicates whether to send reports to the controller
	Enabled bool `json:"enabled" yaml:"enabled"`

	// URL is the controller's report ingestion endpoint
	URL string `json:"url" yaml:"url"`

	// IntervalString is the interval between reports (stored as string)
	IntervalString string        `json:"interval,omitempty" yaml:"interval,omitempty"`
	Interval       time.Duration `json:"-" yaml:"-"`

	// TimeoutString is the request timeout (stored as string)
	TimeoutString string        `json:"timeout,omitempty" yaml:"timeout,omitempty"`
	Timeout       time.Duration `json:"-" yaml:"-"`

	// Auth configuration for authenticating with the controller
	Auth AuthConfig `json:"auth,omitempty" yaml:"auth,omitempty"`

	// Headers are custom headers to include in requests
	Headers map[string]string `json:"headers,omitempty" yaml:"headers,omitempty"`

	// Retry configuration for failed requests
	Retry *RetryConfig `json:"retry,omitempty" yaml:"retry,omitempty"`
}

ControllerWebhookConfig configures the webhook for sending reports to the node-doctor controller.

func (*ControllerWebhookConfig) ApplyDefaults added in v1.6.0

func (c *ControllerWebhookConfig) ApplyDefaults(parent *HTTPExporterConfig) error

ApplyDefaults applies default values to ControllerWebhookConfig.

func (*ControllerWebhookConfig) Validate added in v1.6.0

func (c *ControllerWebhookConfig) Validate() error

Validate validates the ControllerWebhookConfig configuration.

type DNSLatency added in v1.5.0

type DNSLatency struct {
	DNSServer  string  `json:"dns_server"`
	Domain     string  `json:"domain"`
	RecordType string  `json:"record_type"`
	DomainType string  `json:"domain_type"` // "cluster", "external", "custom"
	LatencyMs  float64 `json:"latency_ms"`
	Success    bool    `json:"success"`
}

DNSLatency represents DNS resolution latency.

type Event

type Event struct {
	// Severity indicates the importance of the event (Info, Warning, Error).
	Severity EventSeverity

	// Timestamp when the event occurred.
	Timestamp time.Time

	// Reason is a short, machine-readable string that describes the event.
	Reason string

	// Message is a human-readable description of the event.
	Message string
}

Event represents a discrete occurrence detected by a monitor.

func NewEvent

func NewEvent(severity EventSeverity, reason, message string) Event

NewEvent creates a new Event with the specified parameters. Timestamp is automatically set to the current time.

func (*Event) String

func (e *Event) String() string

String returns a human-readable string representation of the Event.

func (*Event) Validate

func (e *Event) Validate() error

Validate checks if the Event has all required fields populated. Returns an error if any required field is missing or invalid.

type EventConfig

type EventConfig struct {
	MaxEventsPerMinute        int           `json:"maxEventsPerMinute,omitempty" yaml:"maxEventsPerMinute,omitempty"`
	EventTTLString            string        `json:"eventTTL,omitempty" yaml:"eventTTL,omitempty"`
	EventTTL                  time.Duration `json:"-" yaml:"-"`
	DeduplicationWindowString string        `json:"deduplicationWindow,omitempty" yaml:"deduplicationWindow,omitempty"`
	DeduplicationWindow       time.Duration `json:"-" yaml:"-"`
}

EventConfig configures Kubernetes event behavior.

type EventSeverity

type EventSeverity string

EventSeverity represents the severity level of an event.

const (
	// EventInfo indicates an informational event with no action required.
	EventInfo EventSeverity = "Info"

	// EventWarning indicates a warning that may require attention.
	EventWarning EventSeverity = "Warning"

	// EventError indicates an error condition that requires immediate attention.
	EventError EventSeverity = "Error"
)

type Exporter

type Exporter interface {
	// ExportStatus publishes a status update.
	ExportStatus(ctx context.Context, status *Status) error

	// ExportProblem publishes a problem report.
	ExportProblem(ctx context.Context, problem *Problem) error
}

Exporter is the interface for components that export status and problems. Exporters publish information to external systems (Prometheus, Kubernetes API, logs).

type ExporterConfigs

type ExporterConfigs struct {
	Kubernetes *KubernetesExporterConfig `json:"kubernetes,omitempty" yaml:"kubernetes,omitempty"`
	HTTP       *HTTPExporterConfig       `json:"http,omitempty" yaml:"http,omitempty"`
	Prometheus *PrometheusExporterConfig `json:"prometheus,omitempty" yaml:"prometheus,omitempty"`
}

ExporterConfigs contains all exporter configurations.

type ExporterReloadResult

type ExporterReloadResult struct {
	ExporterType string // Type of exporter (e.g., "kubernetes", "http", "prometheus")
	Success      bool   // Whether the reload was successful
	Error        error  // Error details if reload failed
	Message      string // Additional information about the reload
}

ExporterReloadResult represents the result of an exporter reload operation

type ExporterReloadSummary

type ExporterReloadSummary struct {
	TotalExporters    int                    // Total number of exporters
	ReloadableCount   int                    // Number of exporters that support reload
	SuccessfulReloads int                    // Number of successful reloads
	FailedReloads     int                    // Number of failed reloads
	Results           []ExporterReloadResult // Detailed results for each exporter
}

ExporterReloadSummary provides a summary of all exporter reload operations

func (*ExporterReloadSummary) AddResult

func (s *ExporterReloadSummary) AddResult(result ExporterReloadResult)

AddResult adds a reload result to the summary

type FeatureFlags

type FeatureFlags struct {
	EnableMetrics   bool   `json:"enableMetrics,omitempty" yaml:"enableMetrics,omitempty"`
	EnableProfiling bool   `json:"enableProfiling,omitempty" yaml:"enableProfiling,omitempty"`
	ProfilingPort   int    `json:"profilingPort,omitempty" yaml:"profilingPort,omitempty"`
	EnableTracing   bool   `json:"enableTracing,omitempty" yaml:"enableTracing,omitempty"`
	TracingEndpoint string `json:"tracingEndpoint,omitempty" yaml:"tracingEndpoint,omitempty"`
}

FeatureFlags contains experimental feature flags.

func (*FeatureFlags) ApplyDefaults

func (f *FeatureFlags) ApplyDefaults()

ApplyDefaults applies default values to FeatureFlags.

type GatewayLatency added in v1.5.0

type GatewayLatency struct {
	GatewayIP    string  `json:"gateway_ip"`
	LatencyMs    float64 `json:"latency_ms"`
	AvgLatencyMs float64 `json:"avg_latency_ms"`
	MaxLatencyMs float64 `json:"max_latency_ms"`
	Reachable    bool    `json:"reachable"`
	PingCount    int     `json:"ping_count"`
	SuccessCount int     `json:"success_count"`
}

GatewayLatency represents latency to the default gateway.

type GlobalSettings

type GlobalSettings struct {
	// NodeName is the Kubernetes node name (usually from ${NODE_NAME})
	NodeName string `json:"nodeName" yaml:"nodeName"`

	// Logging configuration
	LogLevel  string `json:"logLevel,omitempty" yaml:"logLevel,omitempty"`
	LogFormat string `json:"logFormat,omitempty" yaml:"logFormat,omitempty"`
	LogOutput string `json:"logOutput,omitempty" yaml:"logOutput,omitempty"`
	LogFile   string `json:"logFile,omitempty" yaml:"logFile,omitempty"`

	// Update intervals (stored as strings, parsed to time.Duration)
	UpdateIntervalString    string `json:"updateInterval,omitempty" yaml:"updateInterval,omitempty"`
	ResyncIntervalString    string `json:"resyncInterval,omitempty" yaml:"resyncInterval,omitempty"`
	HeartbeatIntervalString string `json:"heartbeatInterval,omitempty" yaml:"heartbeatInterval,omitempty"`

	// Parsed duration fields (not in JSON/YAML)
	UpdateInterval    time.Duration `json:"-" yaml:"-"`
	ResyncInterval    time.Duration `json:"-" yaml:"-"`
	HeartbeatInterval time.Duration `json:"-" yaml:"-"`

	// Remediation master switches
	EnableRemediation bool `json:"enableRemediation,omitempty" yaml:"enableRemediation,omitempty"`
	DryRunMode        bool `json:"dryRunMode,omitempty" yaml:"dryRunMode,omitempty"`

	// Kubernetes client configuration
	Kubeconfig string  `json:"kubeconfig,omitempty" yaml:"kubeconfig,omitempty"`
	QPS        float32 `json:"qps,omitempty" yaml:"qps,omitempty"`
	Burst      int     `json:"burst,omitempty" yaml:"burst,omitempty"`
}

GlobalSettings contains global configuration settings.

func (*GlobalSettings) ApplyDefaults

func (s *GlobalSettings) ApplyDefaults() error

ApplyDefaults applies default values to GlobalSettings.

func (*GlobalSettings) SubstituteEnvVars

func (s *GlobalSettings) SubstituteEnvVars()

SubstituteEnvVars performs environment variable substitution on GlobalSettings.

func (*GlobalSettings) Validate

func (s *GlobalSettings) Validate() error

Validate validates the GlobalSettings configuration.

type HTTPExporterConfig

type HTTPExporterConfig struct {
	Enabled   bool              `json:"enabled" yaml:"enabled"`
	Webhooks  []WebhookEndpoint `json:"webhooks,omitempty" yaml:"webhooks,omitempty"`
	Workers   int               `json:"workers,omitempty" yaml:"workers,omitempty"`
	QueueSize int               `json:"queueSize,omitempty" yaml:"queueSize,omitempty"`

	// Default timeout for all webhooks (can be overridden per webhook)
	TimeoutString string        `json:"timeout,omitempty" yaml:"timeout,omitempty"`
	Timeout       time.Duration `json:"-" yaml:"-"`

	// Default retry configuration for all webhooks (can be overridden per webhook)
	Retry   RetryConfig       `json:"retry,omitempty" yaml:"retry,omitempty"`
	Headers map[string]string `json:"headers,omitempty" yaml:"headers,omitempty"`

	// Controller webhook for sending aggregated reports to the controller
	Controller *ControllerWebhookConfig `json:"controller,omitempty" yaml:"controller,omitempty"`
}

HTTPExporterConfig configures the HTTP webhook exporter.

func (*HTTPExporterConfig) ApplyDefaults

func (h *HTTPExporterConfig) ApplyDefaults() error

ApplyDefaults applies default values to HTTPExporterConfig.

func (*HTTPExporterConfig) SubstituteEnvVars

func (h *HTTPExporterConfig) SubstituteEnvVars()

SubstituteEnvVars performs environment variable substitution on HTTPExporterConfig.

func (*HTTPExporterConfig) Validate

func (h *HTTPExporterConfig) Validate() error

Validate validates the HTTPExporterConfig configuration.

type KubernetesExporterConfig

type KubernetesExporterConfig struct {
	Enabled bool `json:"enabled" yaml:"enabled"`

	// Update intervals
	UpdateIntervalString    string `json:"updateInterval,omitempty" yaml:"updateInterval,omitempty"`
	ResyncIntervalString    string `json:"resyncInterval,omitempty" yaml:"resyncInterval,omitempty"`
	HeartbeatIntervalString string `json:"heartbeatInterval,omitempty" yaml:"heartbeatInterval,omitempty"`

	UpdateInterval    time.Duration `json:"-" yaml:"-"`
	ResyncInterval    time.Duration `json:"-" yaml:"-"`
	HeartbeatInterval time.Duration `json:"-" yaml:"-"`

	// Namespace for events
	Namespace string `json:"namespace,omitempty" yaml:"namespace,omitempty"`

	// Custom node conditions
	Conditions []ConditionConfig `json:"conditions,omitempty" yaml:"conditions,omitempty"`

	// Node annotations to manage
	Annotations []AnnotationConfig `json:"annotations,omitempty" yaml:"annotations,omitempty"`

	// Event configuration
	Events EventConfig `json:"events,omitempty" yaml:"events,omitempty"`
}

KubernetesExporterConfig configures the Kubernetes exporter.

func (*KubernetesExporterConfig) ApplyDefaults

func (k *KubernetesExporterConfig) ApplyDefaults() error

ApplyDefaults applies default values to KubernetesExporterConfig.

func (*KubernetesExporterConfig) SubstituteEnvVars

func (k *KubernetesExporterConfig) SubstituteEnvVars()

SubstituteEnvVars performs environment variable substitution on KubernetesExporterConfig.

func (*KubernetesExporterConfig) Validate

func (k *KubernetesExporterConfig) Validate() error

Validate validates the KubernetesExporterConfig configuration.

type LatencyMetrics added in v1.5.0

type LatencyMetrics struct {
	// Gateway latency metrics
	Gateway *GatewayLatency `json:"gateway,omitempty"`

	// Peer latency metrics (CNI/cross-node connectivity)
	Peers []PeerLatency `json:"peers,omitempty"`

	// DNS latency metrics
	DNS []DNSLatency `json:"dns,omitempty"`

	// API server latency
	APIServer *APIServerLatency `json:"apiserver,omitempty"`
}

LatencyMetrics contains network latency measurements for Prometheus export. Monitors should populate this in Status.Metadata["latency_metrics"].

type Monitor

type Monitor interface {
	// Start begins the monitoring process and returns a channel for status updates.
	// The monitor runs asynchronously and sends Status updates through the channel.
	Start() (<-chan *Status, error)

	// Stop gracefully stops the monitor.
	Stop()
}

Monitor is the interface that all monitors must implement. Monitors detect problems on the node and report them via a channel.

type MonitorConfig

type MonitorConfig struct {
	// Name is the unique identifier for this monitor
	Name string `json:"name" yaml:"name"`

	// Type is the monitor type (e.g., "system-disk-check")
	Type string `json:"type" yaml:"type"`

	// Enabled indicates whether this monitor is active
	Enabled bool `json:"enabled" yaml:"enabled"`

	// Interval and timeout (stored as strings)
	IntervalString string `json:"interval,omitempty" yaml:"interval,omitempty"`
	TimeoutString  string `json:"timeout,omitempty" yaml:"timeout,omitempty"`

	// Parsed duration fields
	Interval time.Duration `json:"-" yaml:"-"`
	Timeout  time.Duration `json:"-" yaml:"-"`

	// Config contains monitor-specific configuration as a map
	// Each monitor type will parse this according to its needs
	Config map[string]interface{} `json:"config,omitempty" yaml:"config,omitempty"`

	// Remediation contains optional remediation configuration for this monitor
	Remediation *MonitorRemediationConfig `json:"remediation,omitempty" yaml:"remediation,omitempty"`

	// DependsOn specifies monitors that must complete successfully before this monitor starts
	// Used for dependency ordering and circular dependency detection during validation
	DependsOn []string `json:"dependsOn,omitempty" yaml:"dependsOn,omitempty"`
}

MonitorConfig represents a single monitor configuration.

func (*MonitorConfig) ApplyDefaults

func (m *MonitorConfig) ApplyDefaults() error

ApplyDefaults applies default values to MonitorConfig.

func (*MonitorConfig) SubstituteEnvVars

func (m *MonitorConfig) SubstituteEnvVars()

SubstituteEnvVars performs environment variable substitution on MonitorConfig.

func (*MonitorConfig) Validate

func (m *MonitorConfig) Validate() error

Validate validates the MonitorConfig configuration.

type MonitorRegistryValidator

type MonitorRegistryValidator interface {
	// IsRegistered returns true if the given monitor type is registered
	IsRegistered(monitorType string) bool

	// GetRegisteredTypes returns a sorted list of all registered monitor types
	GetRegisteredTypes() []string
}

MonitorRegistryValidator provides an interface for validating monitor types without creating an import cycle between config and monitors packages. This interface is implemented by monitors.Registry.

type MonitorRemediationConfig

type MonitorRemediationConfig struct {
	// Enabled indicates whether remediation is enabled for this monitor
	Enabled bool `json:"enabled" yaml:"enabled"`

	// Strategy is the remediation strategy type
	Strategy string `json:"strategy,omitempty" yaml:"strategy,omitempty"`

	// Action is the specific action to take
	Action string `json:"action,omitempty" yaml:"action,omitempty"`

	// Service is the systemd service name (for systemd-restart strategy)
	Service string `json:"service,omitempty" yaml:"service,omitempty"`

	// ScriptPath is the path to remediation script (for custom-script strategy)
	ScriptPath string `json:"scriptPath,omitempty" yaml:"scriptPath,omitempty"`

	// Args are arguments to pass to the script
	Args []string `json:"args,omitempty" yaml:"args,omitempty"`

	// Cooldown period (stored as string)
	CooldownString string        `json:"cooldown,omitempty" yaml:"cooldown,omitempty"`
	Cooldown       time.Duration `json:"-" yaml:"-"`

	// MaxAttempts is the maximum remediation attempts
	MaxAttempts int `json:"maxAttempts,omitempty" yaml:"maxAttempts,omitempty"`

	// Priority for multiple remediation strategies
	Priority int `json:"priority,omitempty" yaml:"priority,omitempty"`

	// GracefulStop indicates whether to stop gracefully
	GracefulStop bool `json:"gracefulStop,omitempty" yaml:"gracefulStop,omitempty"`

	// WaitTimeout for graceful stop (stored as string)
	WaitTimeoutString string        `json:"waitTimeout,omitempty" yaml:"waitTimeout,omitempty"`
	WaitTimeout       time.Duration `json:"-" yaml:"-"`

	// Additional strategies for multi-step remediation
	Strategies []MonitorRemediationConfig `json:"strategies,omitempty" yaml:"strategies,omitempty"`
}

MonitorRemediationConfig contains remediation settings for a monitor.

func (*MonitorRemediationConfig) ApplyDefaults

func (r *MonitorRemediationConfig) ApplyDefaults() error

ApplyDefaults applies default values to MonitorRemediationConfig.

func (*MonitorRemediationConfig) SubstituteEnvVars

func (r *MonitorRemediationConfig) SubstituteEnvVars()

SubstituteEnvVars performs environment variable substitution on MonitorRemediationConfig.

func (*MonitorRemediationConfig) Validate

func (r *MonitorRemediationConfig) Validate() error

Validate validates the MonitorRemediationConfig configuration.

type NodeDoctorConfig

type NodeDoctorConfig struct {
	// APIVersion of the configuration schema
	APIVersion string `json:"apiVersion" yaml:"apiVersion"`

	// Kind of resource (always "NodeDoctorConfig")
	Kind string `json:"kind" yaml:"kind"`

	// Metadata contains name, namespace, labels, etc.
	Metadata ConfigMetadata `json:"metadata" yaml:"metadata"`

	// Settings contains global configuration
	Settings GlobalSettings `json:"settings" yaml:"settings"`

	// Monitors contains all monitor configurations
	Monitors []MonitorConfig `json:"monitors" yaml:"monitors"`

	// Exporters contains exporter configurations
	Exporters ExporterConfigs `json:"exporters" yaml:"exporters"`

	// Remediation contains global remediation settings
	Remediation RemediationConfig `json:"remediation" yaml:"remediation"`

	// Features contains feature flags
	Features FeatureFlags `json:"features,omitempty" yaml:"features,omitempty"`

	// Reload contains configuration hot reload settings
	Reload ReloadConfig `json:"reload,omitempty" yaml:"reload,omitempty"`
}

NodeDoctorConfig is the top-level configuration structure.

func (*NodeDoctorConfig) ApplyDefaults

func (c *NodeDoctorConfig) ApplyDefaults() error

ApplyDefaults applies default values to the configuration.

func (*NodeDoctorConfig) SubstituteEnvVars

func (c *NodeDoctorConfig) SubstituteEnvVars()

SubstituteEnvVars performs environment variable substitution on the configuration.

func (*NodeDoctorConfig) Validate

func (c *NodeDoctorConfig) Validate() error

Validate validates the entire configuration.

func (*NodeDoctorConfig) ValidateWithRegistry

func (c *NodeDoctorConfig) ValidateWithRegistry(registry MonitorRegistryValidator) error

ValidateWithRegistry validates the entire configuration including monitor type registration. This method should be called instead of Validate() when a monitor registry is available, as it performs additional validation that requires checking against registered monitor types.

type PeerLatency added in v1.5.0

type PeerLatency struct {
	PeerNode     string  `json:"peer_node"`
	PeerIP       string  `json:"peer_ip"`
	LatencyMs    float64 `json:"latency_ms"`
	AvgLatencyMs float64 `json:"avg_latency_ms"`
	Reachable    bool    `json:"reachable"`
}

PeerLatency represents latency to a peer node.

type Problem

type Problem struct {
	// Type categorizes the problem (e.g., "systemd-service-failed").
	Type string

	// Resource identifies the affected resource (e.g., "kubelet.service").
	Resource string

	// Severity indicates how critical the problem is.
	Severity ProblemSeverity

	// Message describes the problem in detail.
	Message string

	// DetectedAt is when the problem was first detected.
	DetectedAt time.Time

	// Metadata contains additional context about the problem.
	Metadata map[string]string
}

Problem represents an issue detected that may require remediation.

func NewProblem

func NewProblem(problemType, resource string, severity ProblemSeverity, message string) *Problem

NewProblem creates a new Problem with the specified parameters. DetectedAt time is automatically set to the current time. Metadata map is initialized as empty.

func (*Problem) GetMetadata

func (p *Problem) GetMetadata(key string) (string, bool)

GetMetadata retrieves a metadata value by key from the Problem. Returns the value and true if found, empty string and false otherwise. If the Problem pointer is nil, returns empty string and false.

func (*Problem) String

func (p *Problem) String() string

String returns a human-readable string representation of the Problem.

func (*Problem) Validate

func (p *Problem) Validate() error

Validate checks if the Problem has all required fields populated. Returns an error if any required field is missing or invalid.

func (*Problem) WithMetadata

func (p *Problem) WithMetadata(key, value string) *Problem

WithMetadata adds a metadata key-value pair to the Problem. Returns the Problem pointer for method chaining. If the Problem pointer is nil, this is a no-op and returns nil.

type ProblemSeverity

type ProblemSeverity string

ProblemSeverity represents the severity level of a problem.

const (
	// ProblemInfo indicates an informational problem with no immediate impact.
	ProblemInfo ProblemSeverity = "Info"

	// ProblemWarning indicates a problem that may impact node health if not addressed.
	ProblemWarning ProblemSeverity = "Warning"

	// ProblemCritical indicates a critical problem requiring immediate remediation.
	ProblemCritical ProblemSeverity = "Critical"
)

type PrometheusExporterConfig

type PrometheusExporterConfig struct {
	Enabled   bool              `json:"enabled" yaml:"enabled"`
	Port      int               `json:"port,omitempty" yaml:"port,omitempty"`
	Path      string            `json:"path,omitempty" yaml:"path,omitempty"`
	Namespace string            `json:"namespace,omitempty" yaml:"namespace,omitempty"`
	Subsystem string            `json:"subsystem,omitempty" yaml:"subsystem,omitempty"`
	Labels    map[string]string `json:"labels,omitempty" yaml:"labels,omitempty"`
}

PrometheusExporterConfig configures the Prometheus exporter.

func (*PrometheusExporterConfig) ApplyDefaults

func (p *PrometheusExporterConfig) ApplyDefaults() error

ApplyDefaults applies default values to PrometheusExporterConfig.

func (*PrometheusExporterConfig) SubstituteEnvVars

func (p *PrometheusExporterConfig) SubstituteEnvVars()

SubstituteEnvVars performs environment variable substitution on PrometheusExporterConfig.

func (*PrometheusExporterConfig) Validate

func (p *PrometheusExporterConfig) Validate() error

Validate validates the PrometheusExporterConfig configuration.

type ReloadConfig

type ReloadConfig struct {
	// Enabled indicates whether hot reload is enabled
	Enabled bool `json:"enabled" yaml:"enabled"`

	// DebounceIntervalString is the debounce interval as a string (e.g., "500ms")
	DebounceIntervalString string `json:"debounceInterval,omitempty" yaml:"debounceInterval,omitempty"`

	// DebounceInterval is the parsed debounce duration
	DebounceInterval time.Duration `json:"-" yaml:"-"`
}

ReloadConfig contains configuration hot reload settings.

func (*ReloadConfig) ApplyDefaults

func (r *ReloadConfig) ApplyDefaults() error

ApplyDefaults applies default values to reload configuration.

type ReloadableExporter

type ReloadableExporter interface {
	Exporter

	// Reload updates the exporter configuration without restarting the exporter.
	// The config parameter should be the exporter-specific configuration struct.
	// Returns an error if the reload fails or if the configuration is invalid.
	Reload(config interface{}) error

	// IsReloadable returns true if this exporter supports configuration reload.
	// This is primarily used for runtime checks and debugging.
	IsReloadable() bool
}

ReloadableExporter extends the basic Exporter interface with reload capability. Exporters that implement this interface can update their configuration without requiring a full restart, enabling hot reload of exporter settings.

type RemediationConfig

type RemediationConfig struct {
	// Master switches
	Enabled bool `json:"enabled" yaml:"enabled"`
	DryRun  bool `json:"dryRun,omitempty" yaml:"dryRun,omitempty"`

	// Safety limits
	MaxRemediationsPerHour   int `json:"maxRemediationsPerHour,omitempty" yaml:"maxRemediationsPerHour,omitempty"`
	MaxRemediationsPerMinute int `json:"maxRemediationsPerMinute,omitempty" yaml:"maxRemediationsPerMinute,omitempty"`

	// Cooldown configuration
	CooldownPeriodString string        `json:"cooldownPeriod,omitempty" yaml:"cooldownPeriod,omitempty"`
	CooldownPeriod       time.Duration `json:"-" yaml:"-"`

	// Global max attempts
	MaxAttemptsGlobal int `json:"maxAttemptsGlobal,omitempty" yaml:"maxAttemptsGlobal,omitempty"`

	// Circuit breaker settings
	CircuitBreaker CircuitBreakerConfig `json:"circuitBreaker,omitempty" yaml:"circuitBreaker,omitempty"`

	// History configuration
	HistorySize int `json:"historySize,omitempty" yaml:"historySize,omitempty"`

	// Problem-specific overrides
	Overrides []RemediationOverride `json:"overrides,omitempty" yaml:"overrides,omitempty"`

	// Coordination with controller for cluster-wide remediation safety
	Coordination *RemediationCoordinationConfig `json:"coordination,omitempty" yaml:"coordination,omitempty"`
}

RemediationConfig contains global remediation settings.

func (*RemediationConfig) ApplyDefaults

func (r *RemediationConfig) ApplyDefaults() error

ApplyDefaults applies default values to RemediationConfig.

func (*RemediationConfig) SubstituteEnvVars

func (r *RemediationConfig) SubstituteEnvVars()

SubstituteEnvVars performs environment variable substitution on RemediationConfig.

func (*RemediationConfig) Validate

func (r *RemediationConfig) Validate() error

Validate validates the RemediationConfig configuration.

type RemediationCoordinationConfig added in v1.6.0

type RemediationCoordinationConfig struct {
	// Enabled indicates whether to coordinate remediations with the controller
	Enabled bool `json:"enabled" yaml:"enabled"`

	// ControllerURL is the URL of the node-doctor controller
	ControllerURL string `json:"controllerURL" yaml:"controllerURL"`

	// LeaseTimeoutString is the requested duration for remediation leases (stored as string)
	LeaseTimeoutString string        `json:"leaseTimeout,omitempty" yaml:"leaseTimeout,omitempty"`
	LeaseTimeout       time.Duration `json:"-" yaml:"-"`

	// RequestTimeoutString is the timeout for lease requests to the controller (stored as string)
	RequestTimeoutString string        `json:"requestTimeout,omitempty" yaml:"requestTimeout,omitempty"`
	RequestTimeout       time.Duration `json:"-" yaml:"-"`

	// FallbackOnUnreachable determines behavior when controller is unreachable
	// If true, proceed with remediation; if false, block and wait for controller
	FallbackOnUnreachable bool `json:"fallbackOnUnreachable,omitempty" yaml:"fallbackOnUnreachable,omitempty"`

	// MaxRetries is the maximum number of lease request retries
	MaxRetries int `json:"maxRetries,omitempty" yaml:"maxRetries,omitempty"`

	// RetryIntervalString is the interval between lease request retries (stored as string)
	RetryIntervalString string        `json:"retryInterval,omitempty" yaml:"retryInterval,omitempty"`
	RetryInterval       time.Duration `json:"-" yaml:"-"`
}

RemediationCoordinationConfig configures coordination with the controller for remediation leases.

func (*RemediationCoordinationConfig) ApplyDefaults added in v1.6.0

func (c *RemediationCoordinationConfig) ApplyDefaults() error

ApplyDefaults applies default values to RemediationCoordinationConfig.

func (*RemediationCoordinationConfig) Validate added in v1.6.0

func (c *RemediationCoordinationConfig) Validate() error

Validate validates the RemediationCoordinationConfig configuration.

type RemediationOverride

type RemediationOverride struct {
	Problem                 string        `json:"problem" yaml:"problem"`
	CooldownString          string        `json:"cooldown,omitempty" yaml:"cooldown,omitempty"`
	Cooldown                time.Duration `json:"-" yaml:"-"`
	MaxAttempts             int           `json:"maxAttempts,omitempty" yaml:"maxAttempts,omitempty"`
	CircuitBreakerThreshold int           `json:"circuitBreakerThreshold,omitempty" yaml:"circuitBreakerThreshold,omitempty"`
}

RemediationOverride allows problem-specific remediation overrides.

type Remediator

type Remediator interface {
	// CanRemediate returns true if this remediator can handle the given problem.
	CanRemediate(problem Problem) bool

	// Remediate attempts to fix the problem.
	// Returns an error if remediation fails or is not allowed (cooldown, rate limit, etc.).
	Remediate(ctx context.Context, problem Problem) error

	// GetCooldown returns the minimum time between remediation attempts for this remediator.
	GetCooldown() time.Duration
}

Remediator is the interface for components that can fix problems.

type RetryConfig

type RetryConfig struct {
	MaxAttempts int `json:"maxAttempts,omitempty" yaml:"maxAttempts,omitempty"`

	// Base delay between retries (stored as string)
	BaseDelayString string        `json:"baseDelay,omitempty" yaml:"baseDelay,omitempty"`
	BaseDelay       time.Duration `json:"-" yaml:"-"`

	// Maximum delay between retries (stored as string)
	MaxDelayString string        `json:"maxDelay,omitempty" yaml:"maxDelay,omitempty"`
	MaxDelay       time.Duration `json:"-" yaml:"-"`
}

RetryConfig defines retry behavior for webhook calls.

func (*RetryConfig) Validate

func (r *RetryConfig) Validate() error

Validate validates the RetryConfig configuration.

type Status

type Status struct {
	// Source identifies the monitor that generated this status.
	Source string

	// Events are notable occurrences detected by the monitor.
	Events []Event

	// Conditions represent the current state of the monitored resource.
	Conditions []Condition

	// Timestamp when this status was generated.
	Timestamp time.Time

	// Metadata holds monitor-specific observability data (metrics, diagnostics, etc.)
	Metadata map[string]interface{} `json:"metadata,omitempty"`
}

Status represents the current state reported by a monitor.

func NewStatus

func NewStatus(source string) *Status

NewStatus creates a new Status with the specified source. Timestamp is automatically set to the current time. Events and Conditions slices are initialized as empty.

func (*Status) AddCondition

func (s *Status) AddCondition(condition Condition) *Status

AddCondition adds a condition to the Status. Returns the Status pointer for method chaining.

func (*Status) AddEvent

func (s *Status) AddEvent(event Event) *Status

AddEvent adds an event to the Status. Returns the Status pointer for method chaining.

func (*Status) ClearConditions

func (s *Status) ClearConditions() *Status

ClearConditions removes all conditions from the Status. Returns the Status pointer for method chaining.

func (*Status) ClearEvents

func (s *Status) ClearEvents() *Status

ClearEvents removes all events from the Status. Returns the Status pointer for method chaining.

func (*Status) GetLatencyMetrics added in v1.5.0

func (s *Status) GetLatencyMetrics() *LatencyMetrics

GetLatencyMetrics retrieves latency metrics from Status.Metadata. Returns nil if not set or if type assertion fails.

func (*Status) SetLatencyMetrics added in v1.5.0

func (s *Status) SetLatencyMetrics(metrics *LatencyMetrics) *Status

SetLatencyMetrics is a helper to set latency metrics in Status.Metadata.

func (*Status) String

func (s *Status) String() string

String returns a human-readable string representation of the Status.

func (*Status) Validate

func (s *Status) Validate() error

Validate checks if the Status has all required fields populated. Returns an error if any required field is missing or invalid.

type WebhookEndpoint

type WebhookEndpoint struct {
	Name string     `json:"name" yaml:"name"`
	URL  string     `json:"url" yaml:"url"`
	Auth AuthConfig `json:"auth,omitempty" yaml:"auth,omitempty"`

	// Per-webhook timeout (overrides default)
	TimeoutString string        `json:"timeout,omitempty" yaml:"timeout,omitempty"`
	Timeout       time.Duration `json:"-" yaml:"-"`

	// Per-webhook retry config (overrides default)
	Retry *RetryConfig `json:"retry,omitempty" yaml:"retry,omitempty"`

	// Per-webhook headers (merged with default headers)
	Headers map[string]string `json:"headers,omitempty" yaml:"headers,omitempty"`

	// Control what gets sent to this webhook
	SendStatus   bool `json:"sendStatus,omitempty" yaml:"sendStatus,omitempty"`
	SendProblems bool `json:"sendProblems,omitempty" yaml:"sendProblems,omitempty"`
}

WebhookEndpoint defines a webhook destination for HTTP exports.

func (*WebhookEndpoint) ApplyDefaults

func (w *WebhookEndpoint) ApplyDefaults(parent *HTTPExporterConfig) error

ApplyDefaults applies default values to WebhookEndpoint.

func (*WebhookEndpoint) SubstituteEnvVars

func (w *WebhookEndpoint) SubstituteEnvVars()

SubstituteEnvVars performs environment variable substitution on WebhookEndpoint.

func (*WebhookEndpoint) Validate

func (w *WebhookEndpoint) Validate() error

Validate validates the WebhookEndpoint configuration.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL