Documentation
¶
Index ¶
- Variables
- func ModelDir() string
- func PreferredModelFormat(runtime string, supportedFormats []mv1.ModelFormat) (mv1.ModelFormat, error)
- type Client
- type ClientFactory
- type Manager
- func (m *Manager) GetLLMAddress(modelID string) (string, error)
- func (m *Manager) ListInProgressModels() []ModelRuntimeInfo
- func (m *Manager) ListSyncedModels() []ModelRuntimeInfo
- func (m *Manager) PullModel(ctx context.Context, modelID string) error
- func (m *Manager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)
- func (m *Manager) SetupWithManager(mgr ctrl.Manager, leaderElection bool) error
- type ModelPuller
- type ModelRuntimeInfo
- type OllamaManager
- func (m *OllamaManager) GetLLMAddress(_ string) (string, error)
- func (m *OllamaManager) ListInProgressModels() []ModelRuntimeInfo
- func (m *OllamaManager) ListSyncedModels() []ModelRuntimeInfo
- func (m *OllamaManager) NeedLeaderElection() bool
- func (m *OllamaManager) PullModel(ctx context.Context, modelID string) error
- func (m *OllamaManager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)
- func (m *OllamaManager) SetupWithManager(mgr ctrl.Manager, leaderElection bool) error
- func (m *OllamaManager) Start(ctx context.Context) error
- type Preloader
- type Updater
Constants ¶
This section is empty.
Variables ¶
var ErrRequestCanceled = errors.New("request is canceled")
ErrRequestCanceled is returned when the request is canceled.
Functions ¶
func PreferredModelFormat ¶
func PreferredModelFormat(runtime string, supportedFormats []mv1.ModelFormat) (mv1.ModelFormat, error)
PreferredModelFormat returns the preferred model format.
Types ¶
type Client ¶
type Client interface { GetName(modelID string) string GetAddress(name string) string DeployRuntime(ctx context.Context, modelID string, update bool) (*appsv1.StatefulSet, error) }
Client is the interface for managing runtimes.
func NewOllamaClient ¶
func NewOllamaClient( k8sClient client.Client, namespace string, owner *metav1apply.OwnerReferenceApplyConfiguration, rconfig *config.RuntimeConfig, mconfig *config.ProcessedModelConfig, oconfig config.OllamaConfig, modelClient modelGetter, ) Client
NewOllamaClient creates a new Ollama runtime client.a
func NewTritonClient ¶ added in v0.378.0
func NewTritonClient( k8sClient client.Client, namespace string, owner *metav1apply.OwnerReferenceApplyConfiguration, rconfig *config.RuntimeConfig, mconfig *config.ProcessedModelConfig, ) Client
NewTritonClient creates a new Triton runtime client.
func NewVLLMClient ¶
func NewVLLMClient( k8sClient client.Client, namespace string, owner *metav1apply.OwnerReferenceApplyConfiguration, rconfig *config.RuntimeConfig, mconfig *config.ProcessedModelConfig, modelClient modelClient, ) Client
NewVLLMClient creates a new VLLM runtime client.
type ClientFactory ¶
ClientFactory is the interface for creating a new Client given a model ID.
type Manager ¶
type Manager struct {
// contains filtered or unexported fields
}
Manager manages runtimes.
func NewManager ¶
func NewManager( k8sClient client.Client, rtClientFactory ClientFactory, autoscaler autoscaler.Registerer, ) *Manager
NewManager creates a new runtime manager.
func (*Manager) GetLLMAddress ¶
GetLLMAddress returns the address of the LLM.
func (*Manager) ListInProgressModels ¶
func (m *Manager) ListInProgressModels() []ModelRuntimeInfo
ListInProgressModels returns the list of models that are in progress.
func (*Manager) ListSyncedModels ¶ added in v1.15.0
func (m *Manager) ListSyncedModels() []ModelRuntimeInfo
ListSyncedModels returns the list of models that are synced.
type ModelPuller ¶ added in v1.18.0
ModelPuller pulls a model.
type ModelRuntimeInfo ¶ added in v1.15.0
type ModelRuntimeInfo struct { // ID is model ID. ID string // GPU is the total GPU allocated for the model. GPU int32 Ready bool }
ModelRuntimeInfo is the info of a model runtime.
type OllamaManager ¶ added in v1.18.0
type OllamaManager struct {
// contains filtered or unexported fields
}
OllamaManager manages multiple models in a single ollama runtime.
func NewOllamaManager ¶ added in v1.18.0
func NewOllamaManager( k8sClient client.Client, client Client, autoscaler autoscaler.Registerer, pullerAddr string, ) *OllamaManager
NewOllamaManager creates a new ollama runtime manager.
func (*OllamaManager) GetLLMAddress ¶ added in v1.18.0
func (m *OllamaManager) GetLLMAddress(_ string) (string, error)
GetLLMAddress returns the address of the LLM for the given model.
func (*OllamaManager) ListInProgressModels ¶ added in v1.18.0
func (m *OllamaManager) ListInProgressModels() []ModelRuntimeInfo
ListInProgressModels returns the list of models that are in progress.
func (*OllamaManager) ListSyncedModels ¶ added in v1.18.0
func (m *OllamaManager) ListSyncedModels() []ModelRuntimeInfo
ListSyncedModels returns the list of models that are synced.
func (*OllamaManager) NeedLeaderElection ¶ added in v1.18.0
func (m *OllamaManager) NeedLeaderElection() bool
NeedLeaderElection implements LeaderElectionRunnable and always returns true.
func (*OllamaManager) PullModel ¶ added in v1.18.0
func (m *OllamaManager) PullModel(ctx context.Context, modelID string) error
PullModel pulls the model from the model manager.
func (*OllamaManager) SetupWithManager ¶ added in v1.18.0
func (m *OllamaManager) SetupWithManager(mgr ctrl.Manager, leaderElection bool) error
SetupWithManager sets up the runtime manager with the given controller manager.
type Preloader ¶
type Preloader struct {
// contains filtered or unexported fields
}
Preloader preloads models.
func NewPreloader ¶
func NewPreloader(puller ModelPuller, ids []string, modelClient modelGetter) *Preloader
NewPreloader creates a new Preloader.
func (*Preloader) NeedLeaderElection ¶
NeedLeaderElection implements LeaderElectionRunnable and always returns true.
func (*Preloader) SetupWithManager ¶
SetupWithManager sets up the multi-autoscaler with the Manager.
type Updater ¶
type Updater struct {
// contains filtered or unexported fields
}
Updater updates runtimes at startup.
func NewUpdater ¶
func NewUpdater(namespace string, rtClientFactory ClientFactory) *Updater
NewUpdater creates a new Updater.
func (*Updater) NeedLeaderElection ¶
NeedLeaderElection implements LeaderElectionRunnable and always returns true.
func (*Updater) SetupWithManager ¶
SetupWithManager sets up the updater with the manager.