modeladapter

package

v0.4.1 Latest Latest Go to latest Published: Aug 19, 2025 License: Apache-2.0 Imports: 40 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/vllm-project/aibrix

Links

Open Source Insights

README ¶

apiVersion: apps/v1
kind: Deployment
metadata:
  name: deepseek-33b-instruct
  namespace: default
  labels:
    model.aibrix.ai/name: deepseek-33b-instruct
    adapter.model.aibrix.ai/enabled: "true"
spec:
  replicas: 1
  selector:
    matchLabels:
      model.aibrix.ai/name: deepseek-33b-instruct
  template:
    metadata:
      labels:
        model.aibrix.ai/name: deepseek-33b-instruct
    spec:
      containers:
      - name: deepseek-33b-instruct
        image: your-docker-registry/deepseek-33b-instruct:latest
        resources:
          requests:
            nvidia.com/gpu: "2"  # Assuming you need a GPU
          limits:
            nvidia.com/gpu: "2"
        ports:
        - containerPort: 8080
        env:
        - name: MODEL_PATH
          value: "/models/deepseek-33b-instruct"
        volumeMounts:
        - name: model-storage
          mountPath: /models
      volumes:
      - name: model-storage
        persistentVolumeClaim:
          claimName: model-pvc

apiVersion: model.aibrix.ai/v1alpha1
kind: ModelAdapter
metadata:
  annotations:
    kubectl.kubernetes.io/last-applied-configuration: |
      {"apiVersion":"model.aibrix.ai/v1alpha1","kind":"ModelAdapter","metadata":{"annotations":{},"name":"text2sql-lora-1","namespace":"default"},"spec":{"additionalConfig":{"model-artifact":"jeffwan/rank-1"},"baseModel":"llama2-70b","podSelector":{"matchLabels":{"model.aibrix.ai":"llama2-70b"}},"schedulerName":"default-model-adapter-scheduler"}}
  creationTimestamp: "2024-07-14T21:09:18Z"
  generation: 2
  name: text2sql-lora-1
  namespace: default
  resourceVersion: "788513"
  uid: 61fd3d3c-8549-4742-8f43-7df8c66f0a6d
spec:
  additionalConfig:
    model-artifact: jeffwan/rank-1
  baseModel: llama2-70b
  podSelector:
    matchLabels:
      model.aibrix.ai/name: llama2-70b
  schedulerName: default-model-adapter-scheduler
status:
  phase: Configuring

apiVersion: v1
kind: Service
metadata:
  creationTimestamp: "2024-07-14T21:42:57Z"
  labels:
    model.aibrix.ai/name: llama2-70b
    adapter.model.aibrix.ai/name: text2sql-lora-1
  name: text2sql-lora-1
  namespace: default
  ownerReferences:
  - apiVersion: model.aibrix.ai/v1alpha1
    blockOwnerDeletion: true
    controller: true
    kind: ModelAdapter
    name: text2sql-lora-1
    uid: 61fd3d3c-8549-4742-8f43-7df8c66f0a6d
  resourceVersion: "789949"
  uid: bef1fb3e-27d2-4663-ac87-14ef721c3693
spec:
  clusterIP: None
  clusterIPs:
  - None
  internalTrafficPolicy: Cluster
  ipFamilies:
  - IPv4
  ipFamilyPolicy: SingleStack
  ports:
  - name: http
    port: 8000
    protocol: TCP
    targetPort: 8000
  publishNotReadyAddresses: true
  selector:
    model.aibrix.ai/name: llama2-70b
  sessionAffinity: None
  type: ClusterIP
status:
  loadBalancer: {}

addressType: IPv4
apiVersion: discovery.k8s.io/v1
endpoints:
- addresses:
  - 10.1.2.133
  conditions: {}
kind: EndpointSlice
metadata:
  creationTimestamp: "2024-07-14T21:42:59Z"
  generation: 1
  labels:
    kubernetes.io/service-name: text2sql-lora-1
  name: text2sql-lora-1
  namespace: default
  ownerReferences:
  - apiVersion: model.aibrix.ai/v1alpha1
    blockOwnerDeletion: true
    controller: true
    kind: ModelAdapter
    name: text2sql-lora-1
    uid: 61fd3d3c-8549-4742-8f43-7df8c66f0a6d
  resourceVersion: "789958"
  uid: bf913402-b97d-426d-89a9-8ea734ba8a7a
ports:
- name: http
  port: 80
  protocol: TCP

problem here. 2nd was created by endpoint.

text2sql-lora-1                     IPv4          80                           10.1.2.133     2m24s
text2sql-lora-1-hzdl9               IPv4          8000                         10.1.2.133     2m26s

apiVersion: v1
kind: Endpoints
metadata:
  annotations:
    endpoints.kubernetes.io/last-change-trigger-time: "2024-07-14T21:42:57Z"
  creationTimestamp: "2024-07-14T21:42:57Z"
  labels:
    model.aibrix.ai/name: llama2-70b
    adapter.model.aibrix.ai/name: text2sql-lora-1
    service.kubernetes.io/headless: ""
  name: text2sql-lora-1
  namespace: default
  resourceVersion: "789951"
  uid: 7f64255c-ff58-49fa-9ec3-f19164f884ba
subsets:
- addresses:
  - ip: 10.1.2.133
    nodeName: docker-desktop
    targetRef:
      kind: Pod
      name: lora-test
      namespace: default
      uid: 408484b6-38e9-4fa1-8b2c-e57753a0f220
  ports:
  - name: http
    port: 8000
    protocol: TCP

Documentation ¶

Index ¶

Constants
func Add(mgr manager.Manager, runtimeConfig config.RuntimeConfig) error
func NewCondition(condType string, status metav1.ConditionStatus, reason, msg string) metav1.Condition
func RemoveInstanceFromList(slice []string, strToRemove string) []string
func StringInSlice(slice []string, str string) bool
type ModelAdapterReconciler
- func (r *ModelAdapterReconciler) DoReconcile(ctx context.Context, req ctrl.Request, instance *modelv1alpha1.ModelAdapter) (ctrl.Result, error)
- func (r *ModelAdapterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)
type URLConfig
- func BuildURLs(podIP string, config config.RuntimeConfig) URLConfig

Constants ¶

View Source

const (
	ModelIdentifierKey                = constants.ModelLabelName
	ModelAdapterKey                   = "adapter.model.aibrix.ai/name"
	ModelAdapterFinalizer             = "adapter.model.aibrix.ai/finalizer"
	ModelAdapterPodTemplateLabelKey   = "adapter.model.aibrix.ai/enabled"
	ModelAdapterPodTemplateLabelValue = "true"

	// ModelAdapterInitializedReason is added in model adapter when it comes into the reconciliation loop.
	ModelAdapterInitializedReason = "ModelAdapterPending"
	// FailedServiceCreateReason is added in a model adapter when it cannot create a new service.
	FailedServiceCreateReason = "ServiceCreateError"
	// FailedEndpointSliceCreateReason is added in a model adapter when it cannot create a new replica set.
	FailedEndpointSliceCreateReason = "EndpointSliceCreateError"
	// ModelAdapterLoadingErrorReason is added in a model adapter when it cannot be loaded in an engine pod.
	ModelAdapterLoadingErrorReason = "ModelAdapterLoadingError"
	// ValidationFailedReason is added when model adapter object fails the validation
	ValidationFailedReason = "ValidationFailed"
	// StableInstanceFoundReason is added if there's stale pod and instance has been deleted successfully.
	StableInstanceFoundReason = "StableInstanceFound"
	// ConditionNotReason is added when there's no condition found in the cluster.
	ConditionNotReason = "ConditionNotFound"

	// ModelAdapterAvailable is added in a ModelAdapter when it has replicas available.
	ModelAdapterAvailable = "ModelAdapterAvailable"
	// ModelAdapterUnavailable is added in a ModelAdapter when it doesn't have any pod hosting it.
	ModelAdapterUnavailable = "ModelAdapterUnavailable"

	// Inference Service path and ports
	DefaultInferenceEnginePort      = "8000"
	DefaultDebugInferenceEnginePort = "30081"
	DefaultRuntimeAPIPort           = "8080"

	ModelListPath            = "/v1/models"
	ModelListRuntimeAPIPath  = "/v1/models"
	LoadLoraAdapterPath      = "/v1/load_lora_adapter"
	LoadLoraRuntimeAPIPath   = "/v1/lora_adapter/load"
	UnloadLoraAdapterPath    = "/v1/unload_lora_adapter"
	UnloadLoraRuntimeAPIPath = "/v1/lora_adapter/unload"

	// DefaultModelAdapterSchedulerPolicy is the default scheduler policy for ModelAdapter Controller.
	DefaultModelAdapterSchedulerPolicy = "leastAdapters"
)

Variables ¶

This section is empty.

Functions ¶

func Add ¶

func Add(mgr manager.Manager, runtimeConfig config.RuntimeConfig) error

Add creates a new ModelAdapter Controller and adds it to the Manager with default RBAC. The Manager will set fields on the Controller and Start it when the Manager is Started.

func NewCondition ¶

func NewCondition(condType string, status metav1.ConditionStatus, reason, msg string) metav1.Condition

NewCondition creates a new condition.

func RemoveInstanceFromList ¶

func RemoveInstanceFromList(slice []string, strToRemove string) []string

RemoveInstanceFromList removes a string from a slice of strings

func StringInSlice ¶

func StringInSlice(slice []string, str string) bool

Types ¶

type ModelAdapterReconciler ¶

type ModelAdapterReconciler struct {
	client.Client
	Scheme   *runtime.Scheme
	Recorder record.EventRecorder

	// PodLister is able to list/get pods from a shared informer's cache store
	PodLister corelisters.PodLister
	// ServiceLister is able to list/get services from a shared informer's cache store
	ServiceLister corelisters.ServiceLister
	// EndpointSliceLister is able to list/get services from a shared informer's cache store
	EndpointSliceLister discoverylisters.EndpointSliceLister
	RuntimeConfig       config.RuntimeConfig
	// contains filtered or unexported fields
}

ModelAdapterReconciler reconciles a ModelAdapter object

func (*ModelAdapterReconciler) DoReconcile ¶

func (r *ModelAdapterReconciler) DoReconcile(ctx context.Context, req ctrl.Request, instance *modelv1alpha1.ModelAdapter) (ctrl.Result, error)

func (*ModelAdapterReconciler) Reconcile ¶

func (r *ModelAdapterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error)

Reconcile reads that state of ModelAdapter object and makes changes based on the state read and what is in the ModelAdapter.Spec

type URLConfig ¶

type URLConfig struct {
	BaseURL          string
	ListModelsURL    string
	LoadAdapterURL   string
	UnloadAdapterURL string
}

func BuildURLs ¶

func BuildURLs(podIP string, config config.RuntimeConfig) URLConfig

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
scheduling

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL