mig

package
v0.0.1-alpha.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 15, 2022 License: Apache-2.0 Imports: 16 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func ExtractMemoryGBFromMigFormat

func ExtractMemoryGBFromMigFormat(migFormatResourceName v1.ResourceName) (int64, error)

func GetGPUAnnotationsFromNode

func GetGPUAnnotationsFromNode(node v1.Node) (GPUStatusAnnotationList, GPUSpecAnnotationList)

func GetKnownGeometries

func GetKnownGeometries() map[GPUModel][]Geometry

func GetRequestedMigResources

func GetRequestedMigResources(pod v1.Pod) map[ProfileName]int

func IsNvidiaMigDevice

func IsNvidiaMigDevice(resourceName v1.ResourceName) bool

func SetKnownGeometries

func SetKnownGeometries(configs map[GPUModel][]Geometry) error

func SpecMatchesStatus

func SpecMatchesStatus(specAnnotations []GPUSpecAnnotation, statusAnnotations []GPUStatusAnnotation) bool

Types

type Client

type Client interface {
	GetMigDeviceResources(ctx context.Context) (DeviceResourceList, gpu.Error)
	GetUsedMigDeviceResources(ctx context.Context) (DeviceResourceList, gpu.Error)
	GetAllocatableMigDeviceResources(ctx context.Context) (DeviceResourceList, gpu.Error)
	CreateMigResources(ctx context.Context, profileList ProfileList) (ProfileList, error)
	DeleteMigResource(ctx context.Context, resource DeviceResource) gpu.Error
	DeleteAllExcept(ctx context.Context, resources DeviceResourceList) error
}

func NewClient

func NewClient(lister pdrv1.PodResourcesListerClient, nvmlClient nvml.Client) Client

type DeviceResource

type DeviceResource struct {
	resource.Device
	// GpuId is the Index of the parent GPU to which the MIG device belongs to
	GpuIndex int
}

func (DeviceResource) FullResourceName

func (m DeviceResource) FullResourceName() string

FullResourceName returns the full resource name of the MIG device, including the name of the resource corresponding to the MIG profile and the index of the GPU to which it belongs to.

func (DeviceResource) GetMigProfileName

func (m DeviceResource) GetMigProfileName() ProfileName

GetMigProfileName returns the name of the Mig profile associated to the device

Example:

Resource name: nvidia.com/mig-1g.10gb
GetMigProfileName() -> 1g.10gb

type DeviceResourceList

type DeviceResourceList []DeviceResource

func (DeviceResourceList) GetFree

func (DeviceResourceList) GetUsed

func (DeviceResourceList) GroupBy

func (l DeviceResourceList) GroupBy(keyFunc func(resource DeviceResource) string) map[string]DeviceResourceList

func (DeviceResourceList) GroupByGpuIndex

func (l DeviceResourceList) GroupByGpuIndex() map[int]DeviceResourceList

func (DeviceResourceList) GroupByMigProfile

func (l DeviceResourceList) GroupByMigProfile() map[Profile]DeviceResourceList

func (DeviceResourceList) SortByDeviceId

func (l DeviceResourceList) SortByDeviceId() DeviceResourceList

type GPU

type GPU struct {
	// contains filtered or unexported fields
}

func NewGPU

func NewGPU(model GPUModel, index int, usedMigDevices, freeMigDevices map[ProfileName]int) (GPU, error)

func NewGpuOrPanic

func NewGpuOrPanic(model GPUModel, index int, usedMigDevices, freeMigDevices map[ProfileName]int) GPU

func (*GPU) AddPod

func (g *GPU) AddPod(pod v1.Pod) error

AddPod adds a Pod to the GPU by updating the free and used MIG devices according to the MIG resources requested by the Pod.

AddPod returns an error if the GPU does not have enough free MIG resources for the Pod.

func (*GPU) AllowsGeometry

func (g *GPU) AllowsGeometry(geometry Geometry) bool

AllowsGeometry returns true if the geometry provided as argument is allowed by the GPU model

func (*GPU) ApplyGeometry

func (g *GPU) ApplyGeometry(geometry Geometry) error

ApplyGeometry applies the MIG geometry provided as argument by changing the free devices of the GPU. It returns an error if the provided geometry is not allowed or if applying it would require to delete any used device of the GPU.

func (*GPU) CanApplyGeometry

func (g *GPU) CanApplyGeometry(geometry Geometry) (bool, string)

CanApplyGeometry returns true if the geometry provided as argument can be applied to the GPU, otherwise it returns false and the reason why the geometry cannot be applied.

func (*GPU) Clone

func (g *GPU) Clone() GPU

func (*GPU) GetAllowedGeometries

func (g *GPU) GetAllowedGeometries() []Geometry

GetAllowedGeometries returns the MIG geometries allowed by the GPU model

func (*GPU) GetFreeMigDevices

func (g *GPU) GetFreeMigDevices() map[ProfileName]int

func (*GPU) GetGeometry

func (g *GPU) GetGeometry() Geometry

func (*GPU) GetIndex

func (g *GPU) GetIndex() int

func (*GPU) GetModel

func (g *GPU) GetModel() GPUModel

func (*GPU) GetUsedMigDevices

func (g *GPU) GetUsedMigDevices() map[ProfileName]int

func (*GPU) HasFreeMigDevices

func (g *GPU) HasFreeMigDevices() bool

func (*GPU) UpdateGeometryFor

func (g *GPU) UpdateGeometryFor(requiredProfiles map[ProfileName]int) bool

UpdateGeometryFor tries to update the geometry of the GPU in order to create the highest possible number of required profiles provided as argument, without deleting any of the used profiles.

The method returns true if the GPU geometry gets updated, false otherwise.

type GPUModel

type GPUModel string
const (
	GPUModel_A100_SXM4_40GB GPUModel = "NVIDIA-A100-40GB-SXM4"
	GPUModel_A100_PCIe_80GB GPUModel = "NVIDIA-A100-80GB-PCIe"
	GPUModel_A30            GPUModel = "A30"
)

type GPUSpecAnnotation

type GPUSpecAnnotation struct {
	Name     string
	Quantity int
}

func NewGPUSpecAnnotationFromNodeAnnotation

func NewGPUSpecAnnotationFromNodeAnnotation(key, value string) (GPUSpecAnnotation, error)

func NewGpuSpecAnnotation

func NewGpuSpecAnnotation(gpuIndex int, profile ProfileName, quantity int) GPUSpecAnnotation

func (GPUSpecAnnotation) GetGPUIndex

func (a GPUSpecAnnotation) GetGPUIndex() int

func (GPUSpecAnnotation) GetGPUIndexWithMigProfile

func (a GPUSpecAnnotation) GetGPUIndexWithMigProfile() string

GetGPUIndexWithMigProfile returns the GPU index included in the annotation together with the respective MIG profile. Example:

Annotation

"n8s.nebuly.ai/spec-gpu-0-1g.10gb"

Result

"0-1g.10gb"

func (GPUSpecAnnotation) GetMigProfileName

func (a GPUSpecAnnotation) GetMigProfileName() ProfileName

func (GPUSpecAnnotation) GetValue

func (a GPUSpecAnnotation) GetValue() string

type GPUSpecAnnotationList

type GPUSpecAnnotationList []GPUSpecAnnotation

func (GPUSpecAnnotationList) GroupByGpuIndex

func (l GPUSpecAnnotationList) GroupByGpuIndex() map[int]GPUSpecAnnotationList

func (GPUSpecAnnotationList) GroupByMigProfile

func (l GPUSpecAnnotationList) GroupByMigProfile() map[Profile]GPUSpecAnnotationList

type GPUStatusAnnotation

type GPUStatusAnnotation struct {
	Name     string
	Quantity int
}

func ComputeStatusAnnotations

func ComputeStatusAnnotations(used []DeviceResource, free []DeviceResource) []GPUStatusAnnotation

func NewGPUStatusAnnotation

func NewGPUStatusAnnotation(key, value string) (GPUStatusAnnotation, error)

func (GPUStatusAnnotation) GetGPUIndex

func (a GPUStatusAnnotation) GetGPUIndex() int

func (GPUStatusAnnotation) GetGPUIndexWithMigProfile

func (a GPUStatusAnnotation) GetGPUIndexWithMigProfile() string

GetGPUIndexWithMigProfile returns the GPU index included in the annotation together with the respective MIG profile. Example:

Annotation

"n8s.nebuly.ai/status-gpu-0-1g.10gb-used"

Result

"0-1g.10gb"

func (GPUStatusAnnotation) GetMigProfileName

func (a GPUStatusAnnotation) GetMigProfileName() ProfileName

func (GPUStatusAnnotation) GetValue

func (a GPUStatusAnnotation) GetValue() string

func (GPUStatusAnnotation) IsFree

func (a GPUStatusAnnotation) IsFree() bool

IsFree returns true if the annotation refers to a free device

func (GPUStatusAnnotation) IsUsed

func (a GPUStatusAnnotation) IsUsed() bool

IsUsed returns true if the annotation refers to a used device

type GPUStatusAnnotationList

type GPUStatusAnnotationList []GPUStatusAnnotation

func (GPUStatusAnnotationList) Equal

func (GPUStatusAnnotationList) Filter

func (l GPUStatusAnnotationList) Filter(filteringFunc func(annotation GPUStatusAnnotation) bool) GPUStatusAnnotationList

func (GPUStatusAnnotationList) GetFree

GetFree return a new GPUStatusAnnotationList containing the annotations referring to free devices

func (GPUStatusAnnotationList) GetUsed

GetUsed return a new GPUStatusAnnotationList containing the annotations referring to used devices

func (GPUStatusAnnotationList) GroupByGpuIndex

func (l GPUStatusAnnotationList) GroupByGpuIndex() map[int]GPUStatusAnnotationList

func (GPUStatusAnnotationList) GroupByMigProfile

func (l GPUStatusAnnotationList) GroupByMigProfile() map[Profile]GPUStatusAnnotationList

type Geometry

type Geometry map[ProfileName]int

Geometry corresponds to the MIG Geometry of a GPU, namely the MIG profiles of the GPU with the respective quantity.

func GetAllowedGeometries

func GetAllowedGeometries(model GPUModel) ([]Geometry, bool)

func (Geometry) AsResources

func (g Geometry) AsResources() map[v1.ResourceName]int

func (Geometry) Id

func (g Geometry) Id() string

func (Geometry) String

func (g Geometry) String() string

type Node

type Node struct {
	Name string
	GPUs []GPU
}

func NewNode

func NewNode(n v1.Node) (Node, error)

NewNode creates a new MIG Node starting from the node provided as argument.

The function constructs the MIG GPUs of the provided node using both the n8s.nebuly.ai MIG status annotations and the labels exposed by the NVIDIA gpu-feature-discovery tool. Specifically, the following labels are used: - GPU product ("nvidia.com/gpu.product") - GPU count ("nvidia.com/gpu.count")

If the v1.Node provided as arg does not have the GPU Product label, returned node will not contain any mig.GPU.

func (*Node) AddPod

func (n *Node) AddPod(pod v1.Pod) error

AddPod adds a Pod to the node by updating the free and used MIG devices of the Node GPUs according to the MIG requested required by the Pod.

AddPod returns an error if the node does not have any GPU providing enough free MIG resources for the Pod.

func (*Node) Clone

func (n *Node) Clone() Node

func (*Node) GetGeometry

func (n *Node) GetGeometry() Geometry

GetGeometry returns the overall MIG geometry of the node, which corresponds to the sum of the MIG geometry of all the GPUs present in the Node.

func (*Node) HasFreeMigCapacity

func (n *Node) HasFreeMigCapacity() bool

HasFreeMigCapacity returns true if the Node has at least one GPU with free MIG capacity, namely it either has a free MIG device or its allowed MIG geometries allow to create at least one more MIG device.

func (*Node) UpdateGeometryFor

func (n *Node) UpdateGeometryFor(profiles map[ProfileName]int) bool

UpdateGeometryFor tries to update the MIG geometry of each single GPU of the node in order to create the MIG profiles provided as argument.

The method returns true if it updates the MIG geometry of any GPU, false otherwise.

type Profile

type Profile struct {
	GpuIndex int
	Name     ProfileName
}

type ProfileList

type ProfileList []Profile

func (ProfileList) GroupByGPU

func (p ProfileList) GroupByGPU() map[int]ProfileList

type ProfileName

type ProfileName string
const (
	Profile1g6gb  ProfileName = "1g.6gb"
	Profile2g12gb ProfileName = "2g.12gb"
	Profile4g24gb ProfileName = "4g.24gb"

	Profile1g5gb  ProfileName = "1g.5gb"
	Profile2g10gb ProfileName = "2g.10gb"
	Profile3g20gb ProfileName = "3g.20gb"
	Profile4g20gb ProfileName = "4g.20gb"
	Profile7g40gb ProfileName = "7g.40gb"

	Profile1g10gb ProfileName = "1g.10gb"
	Profile2g20gb ProfileName = "2g.20gb"
	Profile3g40gb ProfileName = "3g.40gb"
	Profile4g40gb ProfileName = "4g.40gb"
	Profile7g79gb ProfileName = "7g.79gb"
)

func ExtractMigProfile

func ExtractMigProfile(migFormatResourceName v1.ResourceName) (ProfileName, error)

ExtractMigProfile extracts the name of the MIG profile from the provided resource name, and returns an error if the resource name is not a valid NVIDIA MIG resource.

Example:

nvidia.com/mig-1g.10gb => 1g.10gb

func (ProfileName) AsResourceName

func (p ProfileName) AsResourceName() v1.ResourceName

func (ProfileName) AsString

func (p ProfileName) AsString() string

func (ProfileName) SmallerThan

func (p ProfileName) SmallerThan(other ProfileName) bool

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL