modeladapter

package
v0.4.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 19, 2025 License: Apache-2.0 Imports: 40 Imported by: 0

README

apiVersion: apps/v1
kind: Deployment
metadata:
  name: deepseek-33b-instruct
  namespace: default
  labels:
    model.aibrix.ai/name: deepseek-33b-instruct
    adapter.model.aibrix.ai/enabled: "true"
spec:
  replicas: 1
  selector:
    matchLabels:
      model.aibrix.ai/name: deepseek-33b-instruct
  template:
    metadata:
      labels:
        model.aibrix.ai/name: deepseek-33b-instruct
    spec:
      containers:
      - name: deepseek-33b-instruct
        image: your-docker-registry/deepseek-33b-instruct:latest
        resources:
          requests:
            nvidia.com/gpu: "2"  # Assuming you need a GPU
          limits:
            nvidia.com/gpu: "2"
        ports:
        - containerPort: 8080
        env:
        - name: MODEL_PATH
          value: "/models/deepseek-33b-instruct"
        volumeMounts:
        - name: model-storage
          mountPath: /models
      volumes:
      - name: model-storage
        persistentVolumeClaim:
          claimName: model-pvc
apiVersion: model.aibrix.ai/v1alpha1
kind: ModelAdapter
metadata:
  annotations:
    kubectl.kubernetes.io/last-applied-configuration: |
      {"apiVersion":"model.aibrix.ai/v1alpha1","kind":"ModelAdapter","metadata":{"annotations":{},"name":"text2sql-lora-1","namespace":"default"},"spec":{"additionalConfig":{"model-artifact":"jeffwan/rank-1"},"baseModel":"llama2-70b","podSelector":{"matchLabels":{"model.aibrix.ai":"llama2-70b"}},"schedulerName":"default-model-adapter-scheduler"}}
  creationTimestamp: "2024-07-14T21:09:18Z"
  generation: 2
  name: text2sql-lora-1
  namespace: default
  resourceVersion: "788513"
  uid: 61fd3d3c-8549-4742-8f43-7df8c66f0a6d
spec:
  additionalConfig:
    model-artifact: jeffwan/rank-1
  baseModel: llama2-70b
  podSelector:
    matchLabels:
      model.aibrix.ai/name: llama2-70b
  schedulerName: default-model-adapter-scheduler
status:
  phase: Configuring
apiVersion: v1
kind: Service
metadata:
  creationTimestamp: "2024-07-14T21:42:57Z"
  labels:
    model.aibrix.ai/name: llama2-70b
    adapter.model.aibrix.ai/name: text2sql-lora-1
  name: text2sql-lora-1
  namespace: default
  ownerReferences:
  - apiVersion: model.aibrix.ai/v1alpha1
    blockOwnerDeletion: true
    controller: true
    kind: ModelAdapter
    name: text2sql-lora-1
    uid: 61fd3d3c-8549-4742-8f43-7df8c66f0a6d
  resourceVersion: "789949"
  uid: bef1fb3e-27d2-4663-ac87-14ef721c3693
spec:
  clusterIP: None
  clusterIPs:
  - None
  internalTrafficPolicy: Cluster
  ipFamilies:
  - IPv4
  ipFamilyPolicy: SingleStack
  ports:
  - name: http
    port: 8000
    protocol: TCP
    targetPort: 8000
  publishNotReadyAddresses: true
  selector:
    model.aibrix.ai/name: llama2-70b
  sessionAffinity: None
  type: ClusterIP
status:
  loadBalancer: {}
addressType: IPv4
apiVersion: discovery.k8s.io/v1
endpoints:
- addresses:
  - 10.1.2.133
  conditions: {}
kind: EndpointSlice
metadata:
  creationTimestamp: "2024-07-14T21:42:59Z"
  generation: 1
  labels:
    kubernetes.io/service-name: text2sql-lora-1
  name: text2sql-lora-1
  namespace: default
  ownerReferences:
  - apiVersion: model.aibrix.ai/v1alpha1
    blockOwnerDeletion: true
    controller: true
    kind: ModelAdapter
    name: text2sql-lora-1
    uid: 61fd3d3c-8549-4742-8f43-7df8c66f0a6d
  resourceVersion: "789958"
  uid: bf913402-b97d-426d-89a9-8ea734ba8a7a
ports:
- name: http
  port: 80
  protocol: TCP

problem here. 2nd was created by endpoint.

text2sql-lora-1                     IPv4          80                           10.1.2.133     2m24s
text2sql-lora-1-hzdl9               IPv4          8000                         10.1.2.133     2m26s
apiVersion: v1
kind: Endpoints
metadata:
  annotations:
    endpoints.kubernetes.io/last-change-trigger-time: "2024-07-14T21:42:57Z"
  creationTimestamp: "2024-07-14T21:42:57Z"
  labels:
    model.aibrix.ai/name: llama2-70b
    adapter.model.aibrix.ai/name: text2sql-lora-1
    service.kubernetes.io/headless: ""
  name: text2sql-lora-1
  namespace: default
  resourceVersion: "789951"
  uid: 7f64255c-ff58-49fa-9ec3-f19164f884ba
subsets:
- addresses:
  - ip: 10.1.2.133
    nodeName: docker-desktop
    targetRef:
      kind: Pod
      name: lora-test
      namespace: default
      uid: 408484b6-38e9-4fa1-8b2c-e57753a0f220
  ports:
  - name: http
    port: 8000
    protocol: TCP

Documentation

Index

Constants

View Source
const (
	ModelIdentifierKey                = constants.ModelLabelName
	ModelAdapterKey                   = "adapter.model.aibrix.ai/name"
	ModelAdapterFinalizer             = "adapter.model.aibrix.ai/finalizer"
	ModelAdapterPodTemplateLabelKey   = "adapter.model.aibrix.ai/enabled"
	ModelAdapterPodTemplateLabelValue = "true"

	// ModelAdapterInitializedReason is added in model adapter when it comes into the reconciliation loop.
	ModelAdapterInitializedReason = "ModelAdapterPending"
	// FailedServiceCreateReason is added in a model adapter when it cannot create a new service.
	FailedServiceCreateReason = "ServiceCreateError"
	// FailedEndpointSliceCreateReason is added in a model adapter when it cannot create a new replica set.
	FailedEndpointSliceCreateReason = "EndpointSliceCreateError"
	// ModelAdapterLoadingErrorReason is added in a model adapter when it cannot be loaded in an engine pod.
	ModelAdapterLoadingErrorReason = "ModelAdapterLoadingError"
	// ValidationFailedReason is added when model adapter object fails the validation
	ValidationFailedReason = "ValidationFailed"
	// StableInstanceFoundReason is added if there's stale pod and instance has been deleted successfully.
	StableInstanceFoundReason = "StableInstanceFound"
	// ConditionNotReason is added when there's no condition found in the cluster.
	ConditionNotReason = "ConditionNotFound"

	// ModelAdapterAvailable is added in a ModelAdapter when it has replicas available.
	ModelAdapterAvailable = "ModelAdapterAvailable"
	// ModelAdapterUnavailable is added in a ModelAdapter when it doesn't have any pod hosting it.
	ModelAdapterUnavailable = "ModelAdapterUnavailable"

	// Inference Service path and ports
	DefaultInferenceEnginePort      = "8000"
	DefaultDebugInferenceEnginePort = "30081"
	DefaultRuntimeAPIPort           = "8080"

	ModelListPath            = "/v1/models"
	ModelListRuntimeAPIPath  = "/v1/models"
	LoadLoraAdapterPath      = "/v1/load_lora_adapter"
	LoadLoraRuntimeAPIPath   = "/v1/lora_adapter/load"
	UnloadLoraAdapterPath    = "/v1/unload_lora_adapter"
	UnloadLoraRuntimeAPIPath = "/v1/lora_adapter/unload"

	// DefaultModelAdapterSchedulerPolicy is the default scheduler policy for ModelAdapter Controller.
	DefaultModelAdapterSchedulerPolicy = "leastAdapters"
)

Variables

This section is empty.

Functions

func Add

func Add(mgr manager.Manager, runtimeConfig config.RuntimeConfig) error

Add creates a new ModelAdapter Controller and adds it to the Manager with default RBAC. The Manager will set fields on the Controller and Start it when the Manager is Started.

func NewCondition

func NewCondition(condType string, status metav1.ConditionStatus, reason, msg string) metav1.Condition

NewCondition creates a new condition.

func RemoveInstanceFromList

func RemoveInstanceFromList(slice []string, strToRemove string) []string

RemoveInstanceFromList removes a string from a slice of strings

func StringInSlice

func StringInSlice(slice []string, str string) bool

Types

type ModelAdapterReconciler

type ModelAdapterReconciler struct {
	client.Client
	Scheme   *runtime.Scheme
	Recorder record.EventRecorder

	// PodLister is able to list/get pods from a shared informer's cache store
	PodLister corelisters.PodLister
	// ServiceLister is able to list/get services from a shared informer's cache store
	ServiceLister corelisters.ServiceLister
	// EndpointSliceLister is able to list/get services from a shared informer's cache store
	EndpointSliceLister discoverylisters.EndpointSliceLister
	RuntimeConfig       config.RuntimeConfig
	// contains filtered or unexported fields
}

ModelAdapterReconciler reconciles a ModelAdapter object

func (*ModelAdapterReconciler) DoReconcile

func (*ModelAdapterReconciler) Reconcile

func (r *ModelAdapterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)

Reconcile reads that state of ModelAdapter object and makes changes based on the state read and what is in the ModelAdapter.Spec

type URLConfig

type URLConfig struct {
	BaseURL          string
	ListModelsURL    string
	LoadAdapterURL   string
	UnloadAdapterURL string
}

func BuildURLs

func BuildURLs(podIP string, config config.RuntimeConfig) URLConfig

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL