v1alpha2

package

v0.5.1 Latest Latest Go to latest Published: Jul 23, 2025 License: Apache-2.0 Imports: 3 Imported by: 0

Documentation ¶

Overview ¶

Package v1alpha2 contains API Schema definitions for the inference.networking.x-k8s.io API group.

+k8s:openapi-gen=true +kubebuilder:object:generate=true +groupName=inference.networking.x-k8s.io

Index ¶

Constants
Variables
func Resource(resource string) schema.GroupResource
type Criticality
type EndpointPickerConfig
- func (in *EndpointPickerConfig) DeepCopy() *EndpointPickerConfig
- func (in *EndpointPickerConfig) DeepCopyInto(out *EndpointPickerConfig)
type Extension
- func (in *Extension) DeepCopy() *Extension
- func (in *Extension) DeepCopyInto(out *Extension)
type ExtensionConnection
- func (in *ExtensionConnection) DeepCopy() *ExtensionConnection
- func (in *ExtensionConnection) DeepCopyInto(out *ExtensionConnection)
type ExtensionFailureMode
type ExtensionReference
- func (in *ExtensionReference) DeepCopy() *ExtensionReference
- func (in *ExtensionReference) DeepCopyInto(out *ExtensionReference)
type Group
type InferenceModel
- func (in *InferenceModel) DeepCopy() *InferenceModel
- func (in *InferenceModel) DeepCopyInto(out *InferenceModel)
- func (in *InferenceModel) DeepCopyObject() runtime.Object
type InferenceModelConditionReason
type InferenceModelConditionType
type InferenceModelList
- func (in *InferenceModelList) DeepCopy() *InferenceModelList
- func (in *InferenceModelList) DeepCopyInto(out *InferenceModelList)
- func (in *InferenceModelList) DeepCopyObject() runtime.Object
type InferenceModelSpec
- func (in *InferenceModelSpec) DeepCopy() *InferenceModelSpec
- func (in *InferenceModelSpec) DeepCopyInto(out *InferenceModelSpec)
type InferenceModelStatus
- func (in *InferenceModelStatus) DeepCopy() *InferenceModelStatus
- func (in *InferenceModelStatus) DeepCopyInto(out *InferenceModelStatus)
type InferencePool
- func (in *InferencePool) DeepCopy() *InferencePool
- func (in *InferencePool) DeepCopyInto(out *InferencePool)
- func (in *InferencePool) DeepCopyObject() runtime.Object
type InferencePoolConditionType
type InferencePoolList
- func (in *InferencePoolList) DeepCopy() *InferencePoolList
- func (in *InferencePoolList) DeepCopyInto(out *InferencePoolList)
- func (in *InferencePoolList) DeepCopyObject() runtime.Object
type InferencePoolReason
type InferencePoolSpec
- func (in *InferencePoolSpec) DeepCopy() *InferencePoolSpec
- func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec)
type InferencePoolStatus
- func (in *InferencePoolStatus) DeepCopy() *InferencePoolStatus
- func (in *InferencePoolStatus) DeepCopyInto(out *InferencePoolStatus)
type Kind
type LabelKey
type LabelValue
type Namespace
type ObjectName
type ParentGatewayReference
- func (in *ParentGatewayReference) DeepCopy() *ParentGatewayReference
- func (in *ParentGatewayReference) DeepCopyInto(out *ParentGatewayReference)
type PoolObjectReference
- func (in *PoolObjectReference) DeepCopy() *PoolObjectReference
- func (in *PoolObjectReference) DeepCopyInto(out *PoolObjectReference)
type PoolStatus
- func (in *PoolStatus) DeepCopy() *PoolStatus
- func (in *PoolStatus) DeepCopyInto(out *PoolStatus)
type PortNumber
type TargetModel
- func (in *TargetModel) DeepCopy() *TargetModel
- func (in *TargetModel) DeepCopyInto(out *TargetModel)

Constants ¶

View Source

const (
	// ModelConditionAccepted indicates if the model config is accepted, and if not, why.
	//
	// Possible reasons for this condition to be True are:
	//
	// * "Accepted"
	//
	// Possible reasons for this condition to be False are:
	//
	// * "ModelNameInUse"
	//
	// Possible reasons for this condition to be Unknown are:
	//
	// * "Pending"
	//
	ModelConditionAccepted InferenceModelConditionType = "Accepted"

	// ModelReasonAccepted is the desired state. Model conforms to the state of the pool.
	ModelReasonAccepted InferenceModelConditionReason = "Accepted"

	// ModelReasonNameInUse is used when a given ModelName already exists within the pool.
	// Details about naming conflict resolution are on the ModelName field itself.
	ModelReasonNameInUse InferenceModelConditionReason = "ModelNameInUse"

	// ModelReasonPending is the initial state, and indicates that the controller has not yet reconciled the InferenceModel.
	ModelReasonPending InferenceModelConditionReason = "Pending"
)

View Source

const (
	// This condition indicates whether the InferencePool has been accepted or rejected
	// by a Gateway, and why.
	//
	// Possible reasons for this condition to be True are:
	//
	// * "Accepted"
	//
	// Possible reasons for this condition to be False are:
	//
	// * "NotSupportedByGateway"
	// * "HTTPRouteNotAccepted"
	//
	// Possible reasons for this condition to be Unknown are:
	//
	// * "Pending"
	//
	// Controllers MAY raise this condition with other reasons, but should
	// prefer to use the reasons listed above to improve interoperability.
	InferencePoolConditionAccepted InferencePoolConditionType = "Accepted"

	// This reason is used with the "Accepted" condition when the InferencePool has been
	// accepted by the Gateway.
	InferencePoolReasonAccepted InferencePoolReason = "Accepted"

	// This reason is used with the "Accepted" condition when the InferencePool
	// has not been accepted by a Gateway because the Gateway does not support
	// InferencePool as a backend.
	InferencePoolReasonNotSupportedByGateway InferencePoolReason = "NotSupportedByGateway"

	// This reason is used with the "Accepted" condition when the InferencePool is
	// referenced by an HTTPRoute that has been rejected by the Gateway. The user
	// should inspect the status of the referring HTTPRoute for the specific reason.
	InferencePoolReasonHTTPRouteNotAccepted InferencePoolReason = "HTTPRouteNotAccepted"

	// This reason is used with the "Accepted" when a controller has not yet
	// reconciled the InferencePool.
	InferencePoolReasonPending InferencePoolReason = "Pending"
)

View Source

const (
	// This condition indicates whether the controller was able to resolve all
	// the object references for the InferencePool.
	//
	// Possible reasons for this condition to be True are:
	//
	// * "ResolvedRefs"
	//
	// Possible reasons for this condition to be False are:
	//
	// * "InvalidExtensionRef"
	//
	// Controllers MAY raise this condition with other reasons, but should
	// prefer to use the reasons listed above to improve interoperability.
	InferencePoolConditionResolvedRefs InferencePoolConditionType = "ResolvedRefs"

	// This reason is used with the "ResolvedRefs" condition when the condition
	// is true.
	InferencePoolReasonResolvedRefs InferencePoolReason = "ResolvedRefs"

	// This reason is used with the "ResolvedRefs" condition when the
	// ExtensionRef is invalid in some way. This can include an unsupported kind
	// or API group, or a reference to a resource that can not be found.
	InferencePoolReasonInvalidExtensionRef InferencePoolReason = "InvalidExtensionRef"
)

View Source

const GroupName = "inference.networking.x-k8s.io"

GroupName specifies the group name used to register the objects.

Variables ¶

View Source

var (
	// localSchemeBuilder and AddToScheme will stay in k8s.io/kubernetes.
	SchemeBuilder runtime.SchemeBuilder

	// Deprecated: use Install instead
	AddToScheme = localSchemeBuilder.AddToScheme
	Install     = localSchemeBuilder.AddToScheme
)

View Source

var GroupVersion = v1.GroupVersion{Group: GroupName, Version: "v1alpha2"}

GroupVersion specifies the group and the version used to register the objects.

View Source

var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: "v1alpha2"}

SchemeGroupVersion is group version used to register these objects Deprecated: use GroupVersion instead.

Functions ¶

func Resource ¶

func Resource(resource string) schema.GroupResource

Resource takes an unqualified resource and returns a Group qualified GroupResource

Types ¶

type Criticality ¶

type Criticality string

Criticality defines how important it is to serve the model compared to other models. Criticality is intentionally a bounded enum to contain the possibilities that need to be supported by the load balancing algorithm. Any reference to the Criticality field must be optional (use a pointer), and set no default. This allows us to union this with a oneOf field in the future should we wish to adjust/extend this behavior. +kubebuilder:validation:Enum=Critical;Standard;Sheddable

const (
	// Critical defines the highest level of criticality. Requests to this band will be shed last.
	Critical Criticality = "Critical"

	// Standard defines the base criticality level and is more important than Sheddable but less
	// important than Critical. Requests in this band will be shed before critical traffic.
	// Most models are expected to fall within this band.
	Standard Criticality = "Standard"

	// Sheddable defines the lowest level of criticality. Requests to this band will be shed before
	// all other bands.
	Sheddable Criticality = "Sheddable"
)

type EndpointPickerConfig ¶

type EndpointPickerConfig struct {
	// Extension configures an endpoint picker as an extension service.
	//
	// +kubebuilder:validation:Required
	ExtensionRef *Extension `json:"extensionRef,omitempty"`
}

EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint picker extension. This type is intended to be a union of mutually exclusive configuration options that we may add in the future.

func (*EndpointPickerConfig) DeepCopy ¶

func (in *EndpointPickerConfig) DeepCopy() *EndpointPickerConfig

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EndpointPickerConfig.

func (*EndpointPickerConfig) DeepCopyInto ¶

func (in *EndpointPickerConfig) DeepCopyInto(out *EndpointPickerConfig)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type Extension ¶

type Extension struct {
	// Reference is a reference to a service extension. When ExtensionReference is invalid,
	// a 5XX status code MUST be returned for the request that would have otherwise been routed
	// to the invalid backend.
	ExtensionReference `json:",inline"`

	// ExtensionConnection configures the connection between the gateway and the extension.
	ExtensionConnection `json:",inline"`
}

Extension specifies how to configure an extension that runs the endpoint picker.

func (*Extension) DeepCopy ¶

func (in *Extension) DeepCopy() *Extension

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Extension.

func (*Extension) DeepCopyInto ¶

func (in *Extension) DeepCopyInto(out *Extension)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type ExtensionConnection ¶

type ExtensionConnection struct {
	// Configures how the gateway handles the case when the extension is not responsive.
	// Defaults to failClose.
	//
	// +optional
	// +kubebuilder:default="FailClose"
	FailureMode *ExtensionFailureMode `json:"failureMode"`
}

ExtensionConnection encapsulates options that configures the connection to the extension.

func (*ExtensionConnection) DeepCopy ¶

func (in *ExtensionConnection) DeepCopy() *ExtensionConnection

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtensionConnection.

func (*ExtensionConnection) DeepCopyInto ¶

func (in *ExtensionConnection) DeepCopyInto(out *ExtensionConnection)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type ExtensionFailureMode ¶

type ExtensionFailureMode string

ExtensionFailureMode defines the options for how the gateway handles the case when the extension is not responsive. +kubebuilder:validation:Enum=FailOpen;FailClose

const (
	// FailOpen specifies that the proxy should not drop the request and forward the request to and endpoint of its picking.
	FailOpen ExtensionFailureMode = "FailOpen"
	// FailClose specifies that the proxy should drop the request.
	FailClose ExtensionFailureMode = "FailClose"
)

type ExtensionReference ¶

type ExtensionReference struct {
	// Group is the group of the referent.
	// The default value is "", representing the Core API group.
	//
	// +optional
	// +kubebuilder:default=""
	Group *Group `json:"group,omitempty"`

	// Kind is the Kubernetes resource kind of the referent. For example
	// "Service".
	//
	// Defaults to "Service" when not specified.
	//
	// ExternalName services can refer to CNAME DNS records that may live
	// outside of the cluster and as such are difficult to reason about in
	// terms of conformance. They also may not be safe to forward to (see
	// CVE-2021-25740 for more information). Implementations MUST NOT
	// support ExternalName Services.
	//
	// +optional
	// +kubebuilder:default=Service
	Kind *Kind `json:"kind,omitempty"`

	// Name is the name of the referent.
	//
	// +kubebuilder:validation:Required
	Name ObjectName `json:"name"`

	// The port number on the service running the extension. When unspecified,
	// implementations SHOULD infer a default value of 9002 when the Kind is
	// Service.
	//
	// +optional
	PortNumber *PortNumber `json:"portNumber,omitempty"`
}

ExtensionReference is a reference to the extension.

Connections to this extension MUST use TLS by default. Implementations MAY provide a way to customize this connection to use cleartext, a different protocol, or custom TLS configuration.

If a reference is invalid, the implementation MUST update the `ResolvedRefs` Condition on the InferencePool's status to `status: False`. A 5XX status code MUST be returned for the request that would have otherwise been routed to the invalid backend.

func (*ExtensionReference) DeepCopy ¶

func (in *ExtensionReference) DeepCopy() *ExtensionReference

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ExtensionReference.

func (*ExtensionReference) DeepCopyInto ¶

func (in *ExtensionReference) DeepCopyInto(out *ExtensionReference)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type Group ¶

type Group string

Group refers to a Kubernetes Group. It must either be an empty string or a RFC 1123 subdomain.

This validation is based off of the corresponding Kubernetes validation: https://github.com/kubernetes/apimachinery/blob/02cfb53916346d085a6c6c7c66f882e3c6b0eca6/pkg/util/validation/validation.go#L208

Valid values include:

* "" - empty string implies core Kubernetes API group * "gateway.networking.k8s.io" * "foo.example.com"

Invalid values include:

* "example.com/bar" - "/" is an invalid character

+kubebuilder:validation:MaxLength=253 +kubebuilder:validation:Pattern=`^$|^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$`

type InferenceModel ¶

type InferenceModel struct {
	metav1.TypeMeta   `json:",inline"`
	metav1.ObjectMeta `json:"metadata,omitempty"`

	Spec   InferenceModelSpec   `json:"spec,omitempty"`
	Status InferenceModelStatus `json:"status,omitempty"`
}

InferenceModel is the Schema for the InferenceModels API.

+kubebuilder:object:root=true +kubebuilder:subresource:status +kubebuilder:storageversion +kubebuilder:printcolumn:name="Model Name",type=string,JSONPath=`.spec.modelName` +kubebuilder:printcolumn:name="Inference Pool",type=string,JSONPath=`.spec.poolRef.name` +kubebuilder:printcolumn:name="Criticality",type=string,JSONPath=`.spec.criticality` +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` +genclient

func (*InferenceModel) DeepCopy ¶

func (in *InferenceModel) DeepCopy() *InferenceModel

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceModel.

func (*InferenceModel) DeepCopyInto ¶

func (in *InferenceModel) DeepCopyInto(out *InferenceModel)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*InferenceModel) DeepCopyObject ¶

func (in *InferenceModel) DeepCopyObject() runtime.Object

DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.

type InferenceModelConditionReason ¶

type InferenceModelConditionReason string

InferenceModelConditionReason is the reason for a given InferenceModelConditionType.

type InferenceModelConditionType ¶

type InferenceModelConditionType string

InferenceModelConditionType is a type of condition for the InferenceModel.

type InferenceModelList ¶

type InferenceModelList struct {
	metav1.TypeMeta `json:",inline"`
	metav1.ListMeta `json:"metadata,omitempty"`
	Items           []InferenceModel `json:"items"`
}

InferenceModelList contains a list of InferenceModel.

+kubebuilder:object:root=true

func (*InferenceModelList) DeepCopy ¶

func (in *InferenceModelList) DeepCopy() *InferenceModelList

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceModelList.

func (*InferenceModelList) DeepCopyInto ¶

func (in *InferenceModelList) DeepCopyInto(out *InferenceModelList)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*InferenceModelList) DeepCopyObject ¶

func (in *InferenceModelList) DeepCopyObject() runtime.Object

DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.

type InferenceModelSpec ¶

type InferenceModelSpec struct {
	// ModelName is the name of the model as it will be set in the "model" parameter for an incoming request.
	// ModelNames must be unique for a referencing InferencePool
	// (names can be reused for a different pool in the same cluster).
	// The modelName with the oldest creation timestamp is retained, and the incoming
	// InferenceModel's Ready status is set to false with a corresponding reason.
	// In the rare case of a race condition, one Model will be selected randomly to be considered valid, and the other rejected.
	// Names can be reserved without an underlying model configured in the pool.
	// This can be done by specifying a target model and setting the weight to zero,
	// an error will be returned specifying that no valid target model is found.
	//
	// +kubebuilder:validation:MaxLength=256
	// +kubebuilder:validation:Required
	// +kubebuilder:validation:XValidation:rule="self == oldSelf",message="modelName is immutable"
	ModelName string `json:"modelName"`

	// Criticality defines how important it is to serve the model compared to other models referencing the same pool.
	// Criticality impacts how traffic is handled in resource constrained situations. It handles this by
	// queuing or rejecting requests of lower criticality. InferenceModels of an equivalent Criticality will
	// fairly share resources over throughput of tokens. In the future, the metric used to calculate fairness,
	// and the proportionality of fairness will be configurable.
	//
	// Default values for this field will not be set, to allow for future additions of new field that may 'one of' with this field.
	// Any implementations that may consume this field may treat an unset value as the 'Standard' range.
	// +optional
	Criticality *Criticality `json:"criticality,omitempty"`

	// TargetModels allow multiple versions of a model for traffic splitting.
	// If not specified, the target model name is defaulted to the modelName parameter.
	// modelName is often in reference to a LoRA adapter.
	//
	// +optional
	// +kubebuilder:validation:MaxItems=10
	// +kubebuilder:validation:XValidation:message="Weights should be set for all models, or none of the models.",rule="self.all(model, has(model.weight)) || self.all(model, !has(model.weight))"
	TargetModels []TargetModel `json:"targetModels,omitempty"`

	// PoolRef is a reference to the inference pool, the pool must exist in the same namespace.
	//
	// +kubebuilder:validation:Required
	PoolRef PoolObjectReference `json:"poolRef"`
}

InferenceModelSpec represents the desired state of a specific model use case. This resource is managed by the "Inference Workload Owner" persona.

The Inference Workload Owner persona is someone that trains, verifies, and leverages a large language model from a model frontend, drives the lifecycle and rollout of new versions of those models, and defines the specific performance and latency goals for the model. These workloads are expected to operate within an InferencePool sharing compute capacity with other InferenceModels, defined by the Inference Platform Admin.

InferenceModel's modelName (not the ObjectMeta name) is unique for a given InferencePool, if the name is reused, an error will be shown on the status of a InferenceModel that attempted to reuse. The oldest InferenceModel, based on creation timestamp, will be selected to remain valid. In the event of a race condition, one will be selected at random.

func (*InferenceModelSpec) DeepCopy ¶

func (in *InferenceModelSpec) DeepCopy() *InferenceModelSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceModelSpec.

func (*InferenceModelSpec) DeepCopyInto ¶

func (in *InferenceModelSpec) DeepCopyInto(out *InferenceModelSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type InferenceModelStatus ¶

type InferenceModelStatus struct {
	// Conditions track the state of the InferenceModel.
	//
	// Known condition types are:
	//
	// * "Accepted"
	//
	// +optional
	// +listType=map
	// +listMapKey=type
	// +kubebuilder:validation:MaxItems=8
	// +kubebuilder:default={{type: "Ready", status: "Unknown", reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}}
	Conditions []metav1.Condition `json:"conditions,omitempty"`
}

InferenceModelStatus defines the observed state of InferenceModel

func (*InferenceModelStatus) DeepCopy ¶

func (in *InferenceModelStatus) DeepCopy() *InferenceModelStatus

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferenceModelStatus.

func (*InferenceModelStatus) DeepCopyInto ¶

func (in *InferenceModelStatus) DeepCopyInto(out *InferenceModelStatus)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type InferencePool ¶

type InferencePool struct {
	metav1.TypeMeta   `json:",inline"`
	metav1.ObjectMeta `json:"metadata,omitempty"`

	Spec InferencePoolSpec `json:"spec,omitempty"`

	// Status defines the observed state of InferencePool.
	//
	// +kubebuilder:default={parent: {{parentRef: {kind: "Status", name: "default"}, conditions: {{type: "Accepted", status: "Unknown", reason: "Pending", message: "Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}}}}}
	Status InferencePoolStatus `json:"status,omitempty"`
}

InferencePool is the Schema for the InferencePools API.

+kubebuilder:object:root=true +kubebuilder:subresource:status +kubebuilder:storageversion +genclient

func (*InferencePool) DeepCopy ¶

func (in *InferencePool) DeepCopy() *InferencePool

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePool.

func (*InferencePool) DeepCopyInto ¶

func (in *InferencePool) DeepCopyInto(out *InferencePool)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*InferencePool) DeepCopyObject ¶

func (in *InferencePool) DeepCopyObject() runtime.Object

DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.

type InferencePoolConditionType ¶

type InferencePoolConditionType string

InferencePoolConditionType is a type of condition for the InferencePool

type InferencePoolList ¶

type InferencePoolList struct {
	metav1.TypeMeta `json:",inline"`
	metav1.ListMeta `json:"metadata,omitempty"`
	Items           []InferencePool `json:"items"`
}

InferencePoolList contains a list of InferencePool.

+kubebuilder:object:root=true

func (*InferencePoolList) DeepCopy ¶

func (in *InferencePoolList) DeepCopy() *InferencePoolList

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolList.

func (*InferencePoolList) DeepCopyInto ¶

func (in *InferencePoolList) DeepCopyInto(out *InferencePoolList)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

func (*InferencePoolList) DeepCopyObject ¶

func (in *InferencePoolList) DeepCopyObject() runtime.Object

DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.

type InferencePoolReason ¶

type InferencePoolReason string

InferencePoolReason is the reason for a given InferencePoolConditionType

type InferencePoolSpec ¶

type InferencePoolSpec struct {
	// Selector defines a map of labels to watch model server pods
	// that should be included in the InferencePool.
	// In some cases, implementations may translate this field to a Service selector, so this matches the simple
	// map used for Service selectors instead of the full Kubernetes LabelSelector type.
	// If sepecified, it will be applied to match the model server pods in the same namespace as the InferencePool.
	// Cross namesoace selector is not supported.
	//
	// +kubebuilder:validation:Required
	Selector map[LabelKey]LabelValue `json:"selector"`

	// TargetPortNumber defines the port number to access the selected model servers.
	// The number must be in the range 1 to 65535.
	//
	// +kubebuilder:validation:Minimum=1
	// +kubebuilder:validation:Maximum=65535
	// +kubebuilder:validation:Required
	TargetPortNumber int32 `json:"targetPortNumber"`

	// EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint
	// picker service that picks endpoints for the requests routed to this pool.
	EndpointPickerConfig `json:",inline"`
}

InferencePoolSpec defines the desired state of InferencePool

func (*InferencePoolSpec) DeepCopy ¶

func (in *InferencePoolSpec) DeepCopy() *InferencePoolSpec

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolSpec.

func (*InferencePoolSpec) DeepCopyInto ¶

func (in *InferencePoolSpec) DeepCopyInto(out *InferencePoolSpec)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type InferencePoolStatus ¶

type InferencePoolStatus struct {
	// Parents is a list of parent resources (usually Gateways) that are
	// associated with the InferencePool, and the status of the InferencePool with respect to
	// each parent.
	//
	// A maximum of 32 Gateways will be represented in this list. When the list contains
	// `kind: Status, name: default`, it indicates that the InferencePool is not
	// associated with any Gateway and a controller must perform the following:
	//
	//  - Remove the parent when setting the "Accepted" condition.
	//  - Add the parent when the controller will no longer manage the InferencePool
	//    and no other parents exist.
	//
	// +kubebuilder:validation:MaxItems=32
	Parents []PoolStatus `json:"parent,omitempty"`
}

InferencePoolStatus defines the observed state of InferencePool.

func (*InferencePoolStatus) DeepCopy ¶

func (in *InferencePoolStatus) DeepCopy() *InferencePoolStatus

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InferencePoolStatus.

func (*InferencePoolStatus) DeepCopyInto ¶

func (in *InferencePoolStatus) DeepCopyInto(out *InferencePoolStatus)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type Kind ¶

type Kind string

Kind refers to a Kubernetes Kind.

Valid values include:

* "Service" * "HTTPRoute"

Invalid values include:

* "invalid/kind" - "/" is an invalid character

+kubebuilder:validation:MinLength=1 +kubebuilder:validation:MaxLength=63 +kubebuilder:validation:Pattern=`^[a-zA-Z]([-a-zA-Z0-9]*[a-zA-Z0-9])?$`

type LabelKey ¶

type LabelKey string

LabelKey was originally copied from: https://github.com/kubernetes-sigs/gateway-api/blob/99a3934c6bc1ce0874f3a4c5f20cafd8977ffcb4/apis/v1/shared_types.go#L694-L731 Duplicated as to not take an unexpected dependency on gw's API.

LabelKey is the key of a label. This is used for validation of maps. This matches the Kubernetes "qualified name" validation that is used for labels. Labels are case sensitive, so: my-label and My-Label are considered distinct.

Valid values include:

* example * example.com * example.com/path * example.com/path.html

Invalid values include:

* example~ - "~" is an invalid character * example.com. - can not start or end with "."

+kubebuilder:validation:MinLength=1 +kubebuilder:validation:MaxLength=253 +kubebuilder:validation:Pattern=`^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?([A-Za-z0-9][-A-Za-z0-9_.]{0,61})?[A-Za-z0-9]$`

type LabelValue ¶

type LabelValue string

LabelValue is the value of a label. This is used for validation of maps. This matches the Kubernetes label validation rules: * must be 63 characters or less (can be empty), * unless empty, must begin and end with an alphanumeric character ([a-z0-9A-Z]), * could contain dashes (-), underscores (_), dots (.), and alphanumerics between.

Valid values include:

* MyValue * my.name * 123-my-value

+kubebuilder:validation:MinLength=0 +kubebuilder:validation:MaxLength=63 +kubebuilder:validation:Pattern=`^(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?$`

type Namespace ¶ added in v0.5.0

type Namespace string

Namespace refers to a Kubernetes namespace. It must be a RFC 1123 label.

This validation is based off of the corresponding Kubernetes validation: https://github.com/kubernetes/apimachinery/blob/02cfb53916346d085a6c6c7c66f882e3c6b0eca6/pkg/util/validation/validation.go#L187

This is used for Namespace name validation here: https://github.com/kubernetes/apimachinery/blob/02cfb53916346d085a6c6c7c66f882e3c6b0eca6/pkg/api/validation/generic.go#L63

Valid values include:

* "example"

Invalid values include:

* "example.com" - "." is an invalid character

+kubebuilder:validation:Pattern=`^[a-z0-9]([-a-z0-9]*[a-z0-9])?$` +kubebuilder:validation:MinLength=1 +kubebuilder:validation:MaxLength=63

type ObjectName ¶

type ObjectName string

ObjectName refers to the name of a Kubernetes object. Object names can have a variety of forms, including RFC 1123 subdomains, RFC 1123 labels, or RFC 1035 labels.

+kubebuilder:validation:MinLength=1 +kubebuilder:validation:MaxLength=253

type ParentGatewayReference ¶ added in v0.5.0

type ParentGatewayReference struct {
	// Group is the group of the referent.
	//
	// +optional
	// +kubebuilder:default="gateway.networking.k8s.io"
	Group *Group `json:"group"`

	// Kind is kind of the referent. For example "Gateway".
	//
	// +optional
	// +kubebuilder:default=Gateway
	Kind *Kind `json:"kind"`

	// Name is the name of the referent.
	Name ObjectName `json:"name"`

	// Namespace is the namespace of the referent.  If not present,
	// the namespace of the referent is assumed to be the same as
	// the namespace of the referring object.
	//
	// +optional
	Namespace *Namespace `json:"namespace,omitempty"`
}

ParentGatewayReference identifies an API object including its namespace, defaulting to Gateway.

func (*ParentGatewayReference) DeepCopy ¶ added in v0.5.0

func (in *ParentGatewayReference) DeepCopy() *ParentGatewayReference

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ParentGatewayReference.

func (*ParentGatewayReference) DeepCopyInto ¶ added in v0.5.0

func (in *ParentGatewayReference) DeepCopyInto(out *ParentGatewayReference)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type PoolObjectReference ¶

type PoolObjectReference struct {
	// Group is the group of the referent.
	//
	// +optional
	// +kubebuilder:default="inference.networking.x-k8s.io"
	Group Group `json:"group,omitempty"`

	// Kind is kind of the referent. For example "InferencePool".
	//
	// +optional
	// +kubebuilder:default="InferencePool"
	Kind Kind `json:"kind,omitempty"`

	// Name is the name of the referent.
	//
	// +kubebuilder:validation:Required
	Name ObjectName `json:"name"`
}

PoolObjectReference identifies an API object within the namespace of the referrer.

func (*PoolObjectReference) DeepCopy ¶

func (in *PoolObjectReference) DeepCopy() *PoolObjectReference

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PoolObjectReference.

func (*PoolObjectReference) DeepCopyInto ¶

func (in *PoolObjectReference) DeepCopyInto(out *PoolObjectReference)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type PoolStatus ¶

type PoolStatus struct {
	// GatewayRef indicates the gateway that observed state of InferencePool.
	GatewayRef ParentGatewayReference `json:"parentRef"`

	// Conditions track the state of the InferencePool.
	//
	// Known condition types are:
	//
	// * "Accepted"
	// * "ResolvedRefs"
	//
	// +optional
	// +listType=map
	// +listMapKey=type
	// +kubebuilder:validation:MaxItems=8
	// +kubebuilder:default={{type: "Accepted", status: "Unknown", reason:"Pending", message:"Waiting for controller", lastTransitionTime: "1970-01-01T00:00:00Z"}}
	Conditions []metav1.Condition `json:"conditions,omitempty"`
}

PoolStatus defines the observed state of InferencePool from a Gateway.

func (*PoolStatus) DeepCopy ¶

func (in *PoolStatus) DeepCopy() *PoolStatus

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PoolStatus.

func (*PoolStatus) DeepCopyInto ¶

func (in *PoolStatus) DeepCopyInto(out *PoolStatus)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

type PortNumber ¶

type PortNumber int32

PortNumber defines a network port.

+kubebuilder:validation:Minimum=1 +kubebuilder:validation:Maximum=65535

type TargetModel ¶

type TargetModel struct {
	// Name is the name of the adapter or base model, as expected by the ModelServer.
	//
	// +kubebuilder:validation:MaxLength=253
	// +kubebuilder:validation:Required
	Name string `json:"name"`

	// Weight is used to determine the proportion of traffic that should be
	// sent to this model when multiple target models are specified.
	//
	// Weight defines the proportion of requests forwarded to the specified
	// model. This is computed as weight/(sum of all weights in this
	// TargetModels list). For non-zero values, there may be some epsilon from
	// the exact proportion defined here depending on the precision an
	// implementation supports. Weight is not a percentage and the sum of
	// weights does not need to equal 100.
	//
	// If a weight is set for any targetModel, it must be set for all targetModels.
	// Conversely weights are optional, so long as ALL targetModels do not specify a weight.
	//
	// +optional
	// +kubebuilder:validation:Minimum=1
	// +kubebuilder:validation:Maximum=1000000
	Weight *int32 `json:"weight,omitempty"`
}

TargetModel represents a deployed model or a LoRA adapter. The Name field is expected to match the name of the LoRA adapter (or base model) as it is registered within the model server. Inference Gateway assumes that the model exists on the model server and it's the responsibility of the user to validate a correct match. Should a model fail to exist at request time, the error is processed by the Inference Gateway and emitted on the appropriate InferenceModel object.

func (*TargetModel) DeepCopy ¶

func (in *TargetModel) DeepCopy() *TargetModel

DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TargetModel.

func (*TargetModel) DeepCopyInto ¶

func (in *TargetModel) DeepCopyInto(out *TargetModel)

DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL