runtime

package
v1.24.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 28, 2025 License: Apache-2.0 Imports: 44 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var ErrRequestCanceled = errors.New("request is canceled")

ErrRequestCanceled is returned when the request is canceled.

Functions

This section is empty.

Types

type Client

type Client interface {
	GetName(modelID string) string
	GetAddress(name string) string
	DeployRuntime(ctx context.Context, modelID string, update bool) (*appsv1.StatefulSet, error)
	DeleteRuntime(ctx context.Context, modelID string) error

	RuntimeName() string
	Namespace() string
}

Client is the interface for managing runtimes.

func NewOllamaClient

func NewOllamaClient(
	k8sClient client.Client,
	namespace string,
	owner *metav1apply.OwnerReferenceApplyConfiguration,
	rconfig *config.RuntimeConfig,
	mconfig *config.ProcessedModelConfig,
	oconfig config.OllamaConfig,
	modelClient modelGetter,
) Client

NewOllamaClient creates a new Ollama runtime client.a

func NewTritonClient added in v0.378.0

func NewTritonClient(
	k8sClient client.Client,
	namespace string,
	owner *metav1apply.OwnerReferenceApplyConfiguration,
	rconfig *config.RuntimeConfig,
	mconfig *config.ProcessedModelConfig,
) Client

NewTritonClient creates a new Triton runtime client.

func NewVLLMClient

func NewVLLMClient(
	k8sClient client.Client,
	namespace string,
	owner *metav1apply.OwnerReferenceApplyConfiguration,
	rconfig *config.RuntimeConfig,
	mconfig *config.ProcessedModelConfig,
	modelClient modelClient,
	vLLMConfg *config.VLLMConfig,
) Client

NewVLLMClient creates a new VLLM runtime client.

type ClientFactory

type ClientFactory interface {
	New(modelID string) (Client, error)
}

ClientFactory is the interface for creating a new Client given a model ID.

type LoRAAdapterStatusGetter added in v1.23.0

type LoRAAdapterStatusGetter struct {
}

LoRAAdapterStatusGetter is a getter for LoRA adapter status.

type LoRAReconciler added in v1.23.0

type LoRAReconciler struct {
	// contains filtered or unexported fields
}

LoRAReconciler reconciles the LoRA adapters loading status.

func NewLoRAReconciler added in v1.23.0

func NewLoRAReconciler(
	k8sClient k8sclient.Client,
	updateProcessor updateProcessor,
	loraAdapterStatusGetter loraAdapterStatusGetter,
) *LoRAReconciler

NewLoRAReconciler creates a new LoRAReconciler.

func (*LoRAReconciler) Reconcile added in v1.23.0

func (r *LoRAReconciler) Reconcile(
	ctx context.Context,
	req ctrl.Request,
) (ctrl.Result, error)

Reconcile updates the pods in the cluster.

func (*LoRAReconciler) Run added in v1.23.0

func (r *LoRAReconciler) Run(ctx context.Context, interval time.Duration) error

Run periodically checks the status of the pods and loaded LoRA adapters.

func (*LoRAReconciler) SetupWithManager added in v1.23.0

func (r *LoRAReconciler) SetupWithManager(mgr ctrl.Manager) error

SetupWithManager sets up the runtime manager with the given controller manager.

type Manager

type Manager struct {
	// contains filtered or unexported fields
}

Manager manages runtimes.

func NewManager

func NewManager(
	k8sClient client.Client,
	rtClientFactory ClientFactory,
	autoscaler autoscaler.Registerer,
	modelClient modelClient,
	enableDynamicLoRALoading bool,
	pullerPort int,
) *Manager

NewManager creates a new runtime manager.

func (*Manager) DeleteModel added in v1.21.0

func (m *Manager) DeleteModel(ctx context.Context, modelID string) error

DeleteModel deletes the model from the model manager.

func (*Manager) GetLLMAddress

func (m *Manager) GetLLMAddress(modelID string) (string, error)

GetLLMAddress returns the address of the LLM.

func (*Manager) ListInProgressModels

func (m *Manager) ListInProgressModels() []ModelRuntimeInfo

ListInProgressModels returns the list of models that are in progress.

func (*Manager) ListSyncedModels added in v1.15.0

func (m *Manager) ListSyncedModels() []ModelRuntimeInfo

ListSyncedModels returns the list of models that are synced.

func (*Manager) PullModel

func (m *Manager) PullModel(ctx context.Context, modelID string) error

PullModel pulls the model from the model manager.

func (*Manager) Reconcile

func (m *Manager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)

Reconcile reconciles the runtime.

func (*Manager) RunStateMachine added in v1.24.0

func (m *Manager) RunStateMachine(ctx context.Context) error

RunStateMachine runs the state machine for the manager.

func (*Manager) SetupWithManager

func (m *Manager) SetupWithManager(mgr ctrl.Manager, leaderElection bool) error

SetupWithManager sets up the runtime manager with the given controller manager.

type ModelPuller added in v1.18.0

type ModelPuller interface {
	PullModel(ctx context.Context, modelID string) error
}

ModelPuller pulls a model.

type ModelRuntimeInfo added in v1.15.0

type ModelRuntimeInfo struct {
	// ID is model ID.
	ID string
	// GPU is the total GPU allocated for the model.
	GPU   int32
	Ready bool
}

ModelRuntimeInfo is the info of a model runtime.

type OllamaManager added in v1.18.0

type OllamaManager struct {
	// contains filtered or unexported fields
}

OllamaManager manages multiple models in a single ollama runtime.

func NewOllamaManager added in v1.18.0

func NewOllamaManager(
	k8sClient client.Client,
	client Client,
	autoscaler autoscaler.Registerer,
	pullerAddr string,
) *OllamaManager

NewOllamaManager creates a new ollama runtime manager.

func (*OllamaManager) DeleteModel added in v1.21.0

func (m *OllamaManager) DeleteModel(ctx context.Context, modelID string) error

DeleteModel deletes the model from the model manager.

func (*OllamaManager) GetLLMAddress added in v1.18.0

func (m *OllamaManager) GetLLMAddress(_ string) (string, error)

GetLLMAddress returns the address of the LLM for the given model.

func (*OllamaManager) ListInProgressModels added in v1.18.0

func (m *OllamaManager) ListInProgressModels() []ModelRuntimeInfo

ListInProgressModels returns the list of models that are in progress.

func (*OllamaManager) ListSyncedModels added in v1.18.0

func (m *OllamaManager) ListSyncedModels() []ModelRuntimeInfo

ListSyncedModels returns the list of models that are synced.

func (*OllamaManager) NeedLeaderElection added in v1.18.0

func (m *OllamaManager) NeedLeaderElection() bool

NeedLeaderElection implements LeaderElectionRunnable and always returns true.

func (*OllamaManager) PullModel added in v1.18.0

func (m *OllamaManager) PullModel(ctx context.Context, modelID string) error

PullModel pulls the model from the model manager.

func (*OllamaManager) Reconcile added in v1.18.0

func (m *OllamaManager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)

Reconcile reconciles the runtime.

func (*OllamaManager) SetupWithManager added in v1.18.0

func (m *OllamaManager) SetupWithManager(mgr ctrl.Manager, leaderElection bool) error

SetupWithManager sets up the runtime manager with the given controller manager.

func (*OllamaManager) Start added in v1.18.0

func (m *OllamaManager) Start(ctx context.Context) error

Start deploys the ollama runtime.

type Preloader

type Preloader struct {
	// contains filtered or unexported fields
}

Preloader preloads models.

func NewPreloader

func NewPreloader(puller ModelPuller, ids []string, modelClient modelGetter) *Preloader

NewPreloader creates a new Preloader.

func (*Preloader) NeedLeaderElection

func (p *Preloader) NeedLeaderElection() bool

NeedLeaderElection implements LeaderElectionRunnable and always returns true.

func (*Preloader) SetupWithManager

func (p *Preloader) SetupWithManager(mgr ctrl.Manager) error

SetupWithManager sets up the multi-autoscaler with the Manager.

func (*Preloader) Start

func (p *Preloader) Start(ctx context.Context) error

Start starts the multi-autoscaler.

type Updater

type Updater struct {
	// contains filtered or unexported fields
}

Updater updates runtimes at startup.

func NewUpdater

func NewUpdater(namespace string, rtClientFactory ClientFactory) *Updater

NewUpdater creates a new Updater.

func (*Updater) NeedLeaderElection

func (u *Updater) NeedLeaderElection() bool

NeedLeaderElection implements LeaderElectionRunnable and always returns true.

func (*Updater) SetupWithManager

func (u *Updater) SetupWithManager(mgr ctrl.Manager) error

SetupWithManager sets up the updater with the manager.

func (*Updater) Start

func (u *Updater) Start(ctx context.Context) error

Start starts the updater.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL