runtime

package
v1.20.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 4, 2025 License: Apache-2.0 Imports: 44 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var ErrRequestCanceled = errors.New("request is canceled")

ErrRequestCanceled is returned when the request is canceled.

Functions

func ModelDir

func ModelDir() string

ModelDir returns the directory where models are stored.

func PreferredModelFormat

func PreferredModelFormat(runtime string, supportedFormats []mv1.ModelFormat) (mv1.ModelFormat, error)

PreferredModelFormat returns the preferred model format.

Types

type Client

type Client interface {
	GetName(modelID string) string
	GetAddress(name string) string
	DeployRuntime(ctx context.Context, modelID string, update bool) (*appsv1.StatefulSet, error)
}

Client is the interface for managing runtimes.

func NewOllamaClient

func NewOllamaClient(
	k8sClient client.Client,
	namespace string,
	owner *metav1apply.OwnerReferenceApplyConfiguration,
	rconfig *config.RuntimeConfig,
	mconfig *config.ProcessedModelConfig,
	oconfig config.OllamaConfig,
	modelClient modelGetter,
) Client

NewOllamaClient creates a new Ollama runtime client.a

func NewTritonClient added in v0.378.0

func NewTritonClient(
	k8sClient client.Client,
	namespace string,
	owner *metav1apply.OwnerReferenceApplyConfiguration,
	rconfig *config.RuntimeConfig,
	mconfig *config.ProcessedModelConfig,
) Client

NewTritonClient creates a new Triton runtime client.

func NewVLLMClient

func NewVLLMClient(
	k8sClient client.Client,
	namespace string,
	owner *metav1apply.OwnerReferenceApplyConfiguration,
	rconfig *config.RuntimeConfig,
	mconfig *config.ProcessedModelConfig,
	modelClient modelClient,
) Client

NewVLLMClient creates a new VLLM runtime client.

type ClientFactory

type ClientFactory interface {
	New(modelID string) (Client, error)
}

ClientFactory is the interface for creating a new Client given a model ID.

type Manager

type Manager struct {
	// contains filtered or unexported fields
}

Manager manages runtimes.

func NewManager

func NewManager(
	k8sClient client.Client,
	rtClientFactory ClientFactory,
	autoscaler autoscaler.Registerer,
) *Manager

NewManager creates a new runtime manager.

func (*Manager) GetLLMAddress

func (m *Manager) GetLLMAddress(modelID string) (string, error)

GetLLMAddress returns the address of the LLM.

func (*Manager) ListInProgressModels

func (m *Manager) ListInProgressModels() []ModelRuntimeInfo

ListInProgressModels returns the list of models that are in progress.

func (*Manager) ListSyncedModels added in v1.15.0

func (m *Manager) ListSyncedModels() []ModelRuntimeInfo

ListSyncedModels returns the list of models that are synced.

func (*Manager) PullModel

func (m *Manager) PullModel(ctx context.Context, modelID string) error

PullModel pulls the model from the model manager.

func (*Manager) Reconcile

func (m *Manager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)

Reconcile reconciles the runtime.

func (*Manager) SetupWithManager

func (m *Manager) SetupWithManager(mgr ctrl.Manager, leaderElection bool) error

SetupWithManager sets up the runtime manager with the given controller manager.

type ModelPuller added in v1.18.0

type ModelPuller interface {
	PullModel(ctx context.Context, modelID string) error
}

ModelPuller pulls a model.

type ModelRuntimeInfo added in v1.15.0

type ModelRuntimeInfo struct {
	// ID is model ID.
	ID string
	// GPU is the total GPU allocated for the model.
	GPU   int32
	Ready bool
}

ModelRuntimeInfo is the info of a model runtime.

type OllamaManager added in v1.18.0

type OllamaManager struct {
	// contains filtered or unexported fields
}

OllamaManager manages multiple models in a single ollama runtime.

func NewOllamaManager added in v1.18.0

func NewOllamaManager(
	k8sClient client.Client,
	client Client,
	autoscaler autoscaler.Registerer,
	pullerAddr string,
) *OllamaManager

NewOllamaManager creates a new ollama runtime manager.

func (*OllamaManager) GetLLMAddress added in v1.18.0

func (m *OllamaManager) GetLLMAddress(_ string) (string, error)

GetLLMAddress returns the address of the LLM for the given model.

func (*OllamaManager) ListInProgressModels added in v1.18.0

func (m *OllamaManager) ListInProgressModels() []ModelRuntimeInfo

ListInProgressModels returns the list of models that are in progress.

func (*OllamaManager) ListSyncedModels added in v1.18.0

func (m *OllamaManager) ListSyncedModels() []ModelRuntimeInfo

ListSyncedModels returns the list of models that are synced.

func (*OllamaManager) NeedLeaderElection added in v1.18.0

func (m *OllamaManager) NeedLeaderElection() bool

NeedLeaderElection implements LeaderElectionRunnable and always returns true.

func (*OllamaManager) PullModel added in v1.18.0

func (m *OllamaManager) PullModel(ctx context.Context, modelID string) error

PullModel pulls the model from the model manager.

func (*OllamaManager) Reconcile added in v1.18.0

func (m *OllamaManager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)

Reconcile reconciles the runtime.

func (*OllamaManager) SetupWithManager added in v1.18.0

func (m *OllamaManager) SetupWithManager(mgr ctrl.Manager, leaderElection bool) error

SetupWithManager sets up the runtime manager with the given controller manager.

func (*OllamaManager) Start added in v1.18.0

func (m *OllamaManager) Start(ctx context.Context) error

Start deploys the ollama runtime.

type Preloader

type Preloader struct {
	// contains filtered or unexported fields
}

Preloader preloads models.

func NewPreloader

func NewPreloader(puller ModelPuller, ids []string, modelClient modelGetter) *Preloader

NewPreloader creates a new Preloader.

func (*Preloader) NeedLeaderElection

func (p *Preloader) NeedLeaderElection() bool

NeedLeaderElection implements LeaderElectionRunnable and always returns true.

func (*Preloader) SetupWithManager

func (p *Preloader) SetupWithManager(mgr ctrl.Manager) error

SetupWithManager sets up the multi-autoscaler with the Manager.

func (*Preloader) Start

func (p *Preloader) Start(ctx context.Context) error

Start starts the multi-autoscaler.

type Updater

type Updater struct {
	// contains filtered or unexported fields
}

Updater updates runtimes at startup.

func NewUpdater

func NewUpdater(namespace string, rtClientFactory ClientFactory) *Updater

NewUpdater creates a new Updater.

func (*Updater) NeedLeaderElection

func (u *Updater) NeedLeaderElection() bool

NeedLeaderElection implements LeaderElectionRunnable and always returns true.

func (*Updater) SetupWithManager

func (u *Updater) SetupWithManager(mgr ctrl.Manager) error

SetupWithManager sets up the updater with the manager.

func (*Updater) Start

func (u *Updater) Start(ctx context.Context) error

Start starts the updater.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL