operator

package
v1.36.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 29, 2026 License: Apache-2.0 Imports: 4 Imported by: 0

Documentation

Overview

Package operator provides a Kubernetes operator for managing ZerfooInferenceService custom resources. It reconciles desired inference service state into Kubernetes Deployments, Services, and HorizontalPodAutoscalers via a pluggable KubeClient interface.

Index

Constants

This section is empty.

Variables

View Source
var (
	ErrInvalidSpec   = errors.New("operator: invalid spec")
	ErrNotFound      = errors.New("operator: resource not found")
	ErrAlreadyExists = errors.New("operator: resource already exists")
)

Standard errors returned by validation and reconciliation.

Functions

This section is empty.

Types

type CanarySpec

type CanarySpec struct {
	// ModelRef is the canary model reference.
	ModelRef string `json:"modelRef"`

	// Weight is the percentage of traffic routed to the canary (0-100).
	Weight int `json:"weight"`
}

CanarySpec configures a canary deployment alongside the primary.

type Deployment

type Deployment struct {
	Name      string          `json:"name"`
	Namespace string          `json:"namespace"`
	Replicas  int             `json:"replicas"`
	ModelRef  string          `json:"modelRef"`
	Resources ResourceSpec    `json:"resources"`
	Health    HealthCheckSpec `json:"healthCheck"`
}

Deployment represents a Kubernetes Deployment managed by the operator.

type HPA

type HPA struct {
	Name        string `json:"name"`
	Namespace   string `json:"namespace"`
	TargetRef   string `json:"targetRef"`
	MinReplicas int    `json:"minReplicas"`
	MaxReplicas int    `json:"maxReplicas"`
}

HPA represents a Kubernetes HorizontalPodAutoscaler.

type HealthCheckSpec

type HealthCheckSpec struct {
	// Path is the HTTP health check endpoint (e.g. "/healthz").
	Path string `json:"path"`

	// Interval between health checks.
	Interval time.Duration `json:"interval"`

	// Timeout for a single health check.
	Timeout time.Duration `json:"timeout"`
}

HealthCheckSpec configures health check probes.

type KubeClient

type KubeClient interface {
	GetDeployment(ctx context.Context, namespace, name string) (*Deployment, error)
	CreateDeployment(ctx context.Context, d *Deployment) error
	UpdateDeployment(ctx context.Context, d *Deployment) error
	DeleteDeployment(ctx context.Context, namespace, name string) error

	GetService(ctx context.Context, namespace, name string) (*Service, error)
	CreateService(ctx context.Context, s *Service) error
	UpdateService(ctx context.Context, s *Service) error

	GetHPA(ctx context.Context, namespace, name string) (*HPA, error)
	CreateHPA(ctx context.Context, h *HPA) error
	UpdateHPA(ctx context.Context, h *HPA) error
}

KubeClient abstracts Kubernetes API operations needed by the reconciler. Implementations may wrap a real Kubernetes client or a mock for testing.

type Reconciler

type Reconciler struct {
	// contains filtered or unexported fields
}

Reconciler compares the desired ZerfooInferenceService spec against the current cluster state and applies the necessary changes.

func NewReconciler

func NewReconciler(client KubeClient) *Reconciler

NewReconciler creates a Reconciler backed by the given KubeClient.

func (*Reconciler) Delete

func (r *Reconciler) Delete(ctx context.Context, svc *ZerfooInferenceService) error

Delete removes all resources associated with the given service.

func (*Reconciler) Reconcile

func (r *Reconciler) Reconcile(ctx context.Context, svc *ZerfooInferenceService) error

Reconcile drives the cluster toward the desired state described by svc. It creates, updates, or deletes Deployments, Services, and HPAs as needed.

type ResourceSpec

type ResourceSpec struct {
	CPU       string `json:"cpu"`       // e.g. "4"
	Memory    string `json:"memory"`    // e.g. "16Gi"
	GPUMemory string `json:"gpuMemory"` // e.g. "24Gi"
}

ResourceSpec declares CPU, memory, and GPU resource limits.

type Service

type Service struct {
	Name      string            `json:"name"`
	Namespace string            `json:"namespace"`
	Selector  map[string]string `json:"selector"`
	Weights   []WeightedTarget  `json:"weights,omitempty"`
}

Service represents a Kubernetes Service managed by the operator.

type WeightedTarget

type WeightedTarget struct {
	DeploymentName string `json:"deploymentName"`
	Weight         int    `json:"weight"`
}

WeightedTarget maps a deployment name to a traffic weight for canary routing.

type ZerfooInferenceService

type ZerfooInferenceService struct {
	Name      string                       `json:"name"`
	Namespace string                       `json:"namespace"`
	Spec      ZerfooInferenceServiceSpec   `json:"spec"`
	Status    ZerfooInferenceServiceStatus `json:"status"`
}

ZerfooInferenceService is the top-level custom resource that declares a desired inference service deployment.

type ZerfooInferenceServiceSpec

type ZerfooInferenceServiceSpec struct {
	// ModelRef is the model repository reference (e.g. "llama3-8b-q4").
	ModelRef string `json:"modelRef"`

	// Replicas is the desired number of inference pods.
	Replicas int `json:"replicas"`

	// MinReplicas for autoscaling (0 means no autoscaling).
	MinReplicas int `json:"minReplicas,omitempty"`

	// MaxReplicas for autoscaling.
	MaxReplicas int `json:"maxReplicas,omitempty"`

	// Resources specifies compute resource limits.
	Resources ResourceSpec `json:"resources"`

	// Canary optionally configures a canary deployment with traffic splitting.
	Canary *CanarySpec `json:"canary,omitempty"`

	// HealthCheck configures liveness/readiness probes.
	HealthCheck HealthCheckSpec `json:"healthCheck"`
}

ZerfooInferenceServiceSpec describes the desired state of an inference service.

func (*ZerfooInferenceServiceSpec) Validate

func (s *ZerfooInferenceServiceSpec) Validate() error

Validate checks the spec for required fields and constraints.

type ZerfooInferenceServiceStatus

type ZerfooInferenceServiceStatus struct {
	// Ready indicates whether the service is fully available.
	Ready bool `json:"ready"`

	// Replicas is the current number of running replicas.
	Replicas int `json:"replicas"`

	// Message provides a human-readable status message.
	Message string `json:"message,omitempty"`
}

ZerfooInferenceServiceStatus represents the observed state.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL