kubernetesrm

package
v0.0.0-...-3511abf Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 2, 2023 License: Apache-2.0 Imports: 63 Imported by: 0

Documentation

Index

Constants

View Source
const ResourceTypeNvidia = "nvidia.com/gpu"

ResourceTypeNvidia describes the GPU resource type.

Variables

This section is empty.

Functions

This section is empty.

Types

type ChangePosition

type ChangePosition struct {
	PodID cproto.ID
}

ChangePosition notifies the pods actor of a position change and to resubmit the specified pod.

type ChangePriority

type ChangePriority struct {
	PodID cproto.ID
}

ChangePriority notifies the pods actor of a priority change and to resubmit the specified pod.

type KillTaskPod

type KillTaskPod struct {
	PodID cproto.ID
}

KillTaskPod notifies the pods actor to kill a pod.

type PodsInfo

type PodsInfo struct {
	NumAgents      int
	SlotsAvailable int
}

PodsInfo contains information for pods.

type PreemptTaskPod

type PreemptTaskPod struct {
	PodName string
}

PreemptTaskPod notifies the pods actor to preempt a pod.

type ResourceManager

type ResourceManager struct {
	// contains filtered or unexported fields
}

ResourceManager is a resource manager that manages k8s resources.

func New

func New(
	system *actor.System,
	db *db.PgDB,
	rmConfigs *config.ResourceConfig,
	taskContainerDefaults *model.TaskContainerDefaultsConfig,
	opts *aproto.MasterSetAgentOptions,
	cert *tls.Certificate,
) *ResourceManager

New returns a new ResourceManager, which communicates with and submits work to a Kubernetes apiserver.

func (*ResourceManager) Allocate

Allocate implements rm.ResourceManager.

func (ResourceManager) DeleteJob

DeleteJob implements rm.ResourceManager.

func (*ResourceManager) DisableAgent

func (k *ResourceManager) DisableAgent(
	req *apiv1.DisableAgentRequest,
) (resp *apiv1.DisableAgentResponse, err error)

DisableAgent prevents scheduling on a node and has the option to kill running jobs.

func (ResourceManager) DisableSlot

func (k ResourceManager) DisableSlot(
	req *apiv1.DisableSlotRequest,
) (resp *apiv1.DisableSlotResponse, err error)

DisableSlot implements 'det slot disable...' functionality.

func (*ResourceManager) EnableAgent

func (k *ResourceManager) EnableAgent(
	req *apiv1.EnableAgentRequest,
) (resp *apiv1.EnableAgentResponse, err error)

EnableAgent allows scheduling on a node that has been disabled.

func (ResourceManager) EnableSlot

func (k ResourceManager) EnableSlot(
	req *apiv1.EnableSlotRequest,
) (resp *apiv1.EnableSlotResponse, err error)

EnableSlot implements 'det slot enable...' functionality.

func (ResourceManager) ExternalPreemptionPending

func (ResourceManager) ExternalPreemptionPending(sproto.PendingPreemption) error

ExternalPreemptionPending implements rm.ResourceManager.

func (ResourceManager) GetAgent

GetAgent implements rm.ResourceManager.

func (*ResourceManager) GetAgents

GetAgents implements rm.ResourceManager.

func (*ResourceManager) GetAllocationSummaries

GetAllocationSummaries implements rm.ResourceManager.

func (*ResourceManager) GetAllocationSummary

func (k *ResourceManager) GetAllocationSummary(msg sproto.GetAllocationSummary) (*sproto.AllocationSummary, error)

GetAllocationSummary implements rm.ResourceManager.

func (*ResourceManager) GetDefaultAuxResourcePool

GetDefaultAuxResourcePool implements rm.ResourceManager.

func (*ResourceManager) GetDefaultComputeResourcePool

GetDefaultComputeResourcePool implements rm.ResourceManager.

func (ResourceManager) GetExternalJobs

func (ResourceManager) GetExternalJobs(sproto.GetExternalJobs) ([]*jobv1.Job, error)

GetExternalJobs implements rm.ResourceManager.

func (*ResourceManager) GetJobQ

func (k *ResourceManager) GetJobQ(msg sproto.GetJobQ) (map[model.JobID]*sproto.RMJobInfo, error)

GetJobQ implements rm.ResourceManager.

func (*ResourceManager) GetJobQueueStatsRequest

GetJobQueueStatsRequest implements rm.ResourceManager.

func (*ResourceManager) GetResourcePools

GetResourcePools implements rm.ResourceManager.

func (ResourceManager) GetSlot

GetSlot implements rm.ResourceManager. TODO(DET-9919): Implement GetSlot for Kubernetes RM.

func (ResourceManager) GetSlots

GetSlots implements rm.ResourceManager. TODO(DET-9919): Implement GetSlots for Kubernetes RM.

func (ResourceManager) IsReattachableOnlyAfterStarted

func (k ResourceManager) IsReattachableOnlyAfterStarted() bool

IsReattachableOnlyAfterStarted always returns false for the k8s resource manager.

func (*ResourceManager) MoveJob

func (k *ResourceManager) MoveJob(msg sproto.MoveJob) error

MoveJob implements rm.ResourceManager. TODO(DET-9920): This should know which pool it wants.

func (ResourceManager) NotifyContainerRunning

func (k ResourceManager) NotifyContainerRunning(
	msg sproto.NotifyContainerRunning,
) error

NotifyContainerRunning receives a notification from the container to let the master know that the container is running.

func (*ResourceManager) RecoverJobPosition

func (k *ResourceManager) RecoverJobPosition(msg sproto.RecoverJobPosition)

RecoverJobPosition implements rm.ResourceManager.

func (*ResourceManager) Release

func (k *ResourceManager) Release(msg sproto.ResourcesReleased)

Release implements rm.ResourceManager. TODO(DET-9920): This should know which pool it wants.

func (ResourceManager) ResolveResourcePool

func (k ResourceManager) ResolveResourcePool(
	name string,
	workspaceID int,
	slots int,
) (string, error)

ResolveResourcePool resolves the resource pool completely.

func (*ResourceManager) SetAllocationName

func (k *ResourceManager) SetAllocationName(msg sproto.SetAllocationName)

SetAllocationName implements rm.ResourceManager. TODO(DET-9920): This should know which pool it wants.

func (*ResourceManager) SetGroupMaxSlots

func (k *ResourceManager) SetGroupMaxSlots(msg sproto.SetGroupMaxSlots)

SetGroupMaxSlots implements rm.ResourceManager. TODO(DET-9920): This should know which pool it wants.

func (*ResourceManager) SetGroupPriority

func (k *ResourceManager) SetGroupPriority(msg sproto.SetGroupPriority) error

SetGroupPriority implements rm.ResourceManager. TODO(DET-9920): This should know which pool it wants.

func (*ResourceManager) SetGroupWeight

func (k *ResourceManager) SetGroupWeight(msg sproto.SetGroupWeight) error

SetGroupWeight implements rm.ResourceManager. TODO(DET-9920): This should know which pool it wants.

func (ResourceManager) TaskContainerDefaults

func (k ResourceManager) TaskContainerDefaults(
	pool string,
	fallbackConfig model.TaskContainerDefaultsConfig,
) (result model.TaskContainerDefaultsConfig, err error)

TaskContainerDefaults returns TaskContainerDefaults for the specified pool.

func (*ResourceManager) ValidateCommandResources

ValidateCommandResources implements rm.ResourceManager.

func (ResourceManager) ValidateResourcePool

func (k ResourceManager) ValidateResourcePool(name string) error

ValidateResourcePool validates that the named resource pool exists.

func (ResourceManager) ValidateResourcePoolAvailability

func (k ResourceManager) ValidateResourcePoolAvailability(
	name string,
	slots int,
) ([]command.LaunchWarning, error)

ValidateResourcePoolAvailability checks the available resources for a given pool. This is a no-op for k8s.

func (ResourceManager) ValidateResources

func (k ResourceManager) ValidateResources(
	name string,
	slots int,
	command bool,
) error

ValidateResources ensures enough resources are available in the resource pool. This is a no-op for k8s.

type StartTaskPod

type StartTaskPod struct {
	Req          *sproto.AllocateRequest
	AllocationID model.AllocationID
	Spec         tasks.TaskSpec
	Slots        int
	Rank         int
	ResourcePool string
	Namespace    string

	LogContext logger.Context
}

StartTaskPod notifies the pods actor to start a pod with the task spec.

type SummarizeResources

type SummarizeResources struct {
	PoolName string
}

SummarizeResources summerize pods resource.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL