worker

package
v0.2.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 12, 2024 License: Apache-2.0 Imports: 21 Imported by: 0

Documentation

Index

Constants

View Source
const (
	WokerCommonSuffix = "-worker"

	RDMANodeLabel = "arcadia.kubeagi.k8s.com.cn/rdma"
)
View Source
const (
	// Resource
	ResourceNvidiaGPU corev1.ResourceName = "nvidia.com/gpu"
)

Variables

View Source
var (
	ErrNotImplementedYet = errors.New("not implemented yet")
	ErrModelNotReady     = errors.New("worker's model is not ready")

	// Default replicas for a worker
	// Only support 1 for now
	DefaultWorkerReplicas int32 = 1
)

Functions

func MakeMergedDeployment

func MakeMergedDeployment(target *appsv1.Deployment, desired *appsv1.Deployment) *appsv1.Deployment

func NumberOfGPUs

func NumberOfGPUs(resource corev1.ResourceList) string

NumberOfGPUs from ResourceList

Types

type Action

type Action string
const (
	Create Action = "create"
	Update Action = "update"
	Panic  Action = "panic"
)

func ActionOnError

func ActionOnError(err error) Action

type Device

type Device string

Device defines different types like cpu,gpu,xpu,npu which runs the model

const (
	CPU  Device = "cpu"
	CUDA Device = "cuda"
	// Not supported yet
	XPU Device = "xpu"
	// Not supported yet
	NPU Device = "npu"
)

func DeviceBasedOnResource

func DeviceBasedOnResource(resource corev1.ResourceList) Device

DeviceBasedOnResource returns the device type based on the resource list

func (Device) String

func (device Device) String() string

type KubeAGIRunner added in v0.2.1

type KubeAGIRunner struct {
	// contains filtered or unexported fields
}

KubeAGIRunner utilizes core-library-cli(https://github.com/kubeagi/core-library/tree/main/libs/cli) to run model services Mainly for reranking,whisper,etc..

func (*KubeAGIRunner) Build added in v0.2.1

Build a model runner instance

func (*KubeAGIRunner) Device added in v0.2.1

func (runner *KubeAGIRunner) Device() Device

Device used when running model

func (*KubeAGIRunner) NumberOfGPUs added in v0.2.1

func (runner *KubeAGIRunner) NumberOfGPUs() string

NumberOfGPUs utilized by this runner

type LoaderGit

type LoaderGit struct{}

LoaderGit defines the way to load model from git

func (*LoaderGit) Build

func (loader *LoaderGit) Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error)

type LoaderOSS

type LoaderOSS struct {
	// contains filtered or unexported fields
}

LoaderOSS defines the way to load model from oss

func (*LoaderOSS) Build

func (loader *LoaderOSS) Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error)

Load nothing inner go code

type ModelLoader

type ModelLoader interface {
	Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error)
}

ModelLoader load models for worker

func NewLoaderOSS

func NewLoaderOSS(ctx context.Context, c client.Client, endpoint *arcadiav1alpha1.Endpoint, worker *arcadiav1alpha1.Worker) (ModelLoader, error)

type ModelRunner

type ModelRunner interface {
	// Device used when running model
	Device() Device
	// NumberOfGPUs used when running model
	NumberOfGPUs() string
	// Build a model runner instance
	Build(ctx context.Context, model *arcadiav1alpha1.TypedObjectReference) (any, error)
}

ModelRunner run a model service

func NewKubeAGIRunner added in v0.2.1

func NewKubeAGIRunner(c client.Client, w *arcadiav1alpha1.Worker, modelFileFromRemote bool) (ModelRunner, error)

func NewRunnerFastchat

func NewRunnerFastchat(c client.Client, w *arcadiav1alpha1.Worker, modelFileFromRemote bool) (ModelRunner, error)

func NewRunnerFastchatVLLM

func NewRunnerFastchatVLLM(c client.Client, w *arcadiav1alpha1.Worker, modelFileFromRemote bool) (ModelRunner, error)

type PodWorker

type PodWorker struct {

	// worker's namespacedname
	types.NamespacedName
	// contains filtered or unexported fields
}

PodWorker hosts this worker in a single pod but with different loader and runner based on Worker's configuration

func (*PodWorker) AfterStart

func (podWorker *PodWorker) AfterStart(ctx context.Context) error

Actions to do after start this worker

func (*PodWorker) BeforeStart

func (podWorker *PodWorker) BeforeStart(ctx context.Context) error

BeforeStart will create resources which are related to this Worker Now we have a pvc(if configured), service, LLM(if a llm model), Embedder(if a embedding model)

func (*PodWorker) BeforeStop

func (podWorker *PodWorker) BeforeStop(ctx context.Context) error

TODO: BeforeStop

func (*PodWorker) Model

func (podWorker *PodWorker) Model() *arcadiav1alpha1.Model

Model that this worker is running for

func (*PodWorker) Start

func (podWorker *PodWorker) Start(ctx context.Context) error

Start will build and create worker pod which will host model service

func (*PodWorker) State

func (podWorker *PodWorker) State(ctx context.Context) (any, error)

State of this worker

func (*PodWorker) Stop

func (podWorker *PodWorker) Stop(ctx context.Context) error

TODO: Stop

func (*PodWorker) SuffixedName

func (podWorker *PodWorker) SuffixedName() string

func (*PodWorker) Worker

func (podWorker *PodWorker) Worker() *arcadiav1alpha1.Worker

type RDMALoader

type RDMALoader struct {
	// contains filtered or unexported fields
}

RDMALoader Support for RDMA. Allow Pod to user hostpath and RDMA to pull models faster and start services

func NewRDMALoader

func NewRDMALoader(c client.Client, modelName, workerUID string, source *arcadiav1alpha1.Datasource, worker *arcadiav1alpha1.Worker) *RDMALoader

func (*RDMALoader) Build

type RunnerFastchat

type RunnerFastchat struct {
	// contains filtered or unexported fields
}

RunnerFastchat use fastchat to run a model

func (*RunnerFastchat) Build

Build a runner instance

func (*RunnerFastchat) Device

func (runner *RunnerFastchat) Device() Device

Device utilized by this runner

func (*RunnerFastchat) NumberOfGPUs

func (runner *RunnerFastchat) NumberOfGPUs() string

NumberOfGPUs utilized by this runner

type RunnerFastchatVLLM

type RunnerFastchatVLLM struct {
	// contains filtered or unexported fields
}

RunnerFastchatVLLM use fastchat with vllm to run a model

func (*RunnerFastchatVLLM) Build

Build a runner instance

func (*RunnerFastchatVLLM) Device

func (runner *RunnerFastchatVLLM) Device() Device

Device used by this runner

func (*RunnerFastchatVLLM) NumberOfGPUs

func (runner *RunnerFastchatVLLM) NumberOfGPUs() string

NumberOfGPUs utilized by this runner

type Worker

type Worker interface {
	// Worker that this is for
	Worker() *arcadiav1alpha1.Worker
	// Model that this worker is running for
	Model() *arcadiav1alpha1.Model

	// Actions to do before start this worker
	BeforeStart(ctx context.Context) error
	// Actions to do when Start this worker
	Start(ctx context.Context) error
	// Actions to do after start this worker
	AfterStart(ctx context.Context) error

	// Actions to do before stop this worker
	BeforeStop(ctx context.Context) error
	// Actions to do when Stop this worker
	Stop(ctx context.Context) error

	// State of this worker
	State(context.Context) (any, error)
}

Worker implement the lifecycle management of a LLM worker

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL