Documentation ¶
Index ¶
- Constants
- func AddAllocationToApp(app *ApplicationInfo, alloc *AllocationInfo)
- func RemoveAllocationFromApp(app *ApplicationInfo, uuid string)
- func SetGuaranteedResource(info *QueueInfo, res *resources.Resource)
- func UpdateClusterInfoFromConfigFile(clusterInfo *ClusterInfo, rmID string) ([]*PartitionInfo, []*PartitionInfo, error)
- type AllocationInfo
- type ApplicationInfo
- func (ai *ApplicationInfo) GetAllAllocations() []*AllocationInfo
- func (ai *ApplicationInfo) GetAllocatedResource() *resources.Resource
- func (ai *ApplicationInfo) GetApplicationState() string
- func (ai *ApplicationInfo) GetTag(tag string) string
- func (ai *ApplicationInfo) GetUser() security.UserGroup
- func (ai *ApplicationInfo) HandleApplicationEvent(event applicationEvent) error
- func (ai *ApplicationInfo) IsAccepted() bool
- func (ai *ApplicationInfo) IsNew() bool
- func (ai *ApplicationInfo) IsRunning() bool
- func (ai *ApplicationInfo) IsStarting() bool
- func (ai *ApplicationInfo) IsWaiting() bool
- func (ai *ApplicationInfo) SetQueue(leaf *QueueInfo)
- func (ai *ApplicationInfo) String() string
- type ClusterInfo
- type NodeInfo
- func (ni *NodeInfo) AddAllocation(alloc *AllocationInfo)
- func (ni *NodeInfo) FitInNode(resRequest *resources.Resource) bool
- func (ni *NodeInfo) GetAllAllocations() []*AllocationInfo
- func (ni *NodeInfo) GetAllocatedResource() *resources.Resource
- func (ni *NodeInfo) GetAllocation(uuid string) *AllocationInfo
- func (ni *NodeInfo) GetAttribute(key string) string
- func (ni *NodeInfo) GetAvailableResource() *resources.Resource
- func (ni *NodeInfo) GetCapacity() *resources.Resource
- func (ni *NodeInfo) GetOccupiedResource() *resources.Resource
- func (ni *NodeInfo) IsSchedulable() bool
- func (ni *NodeInfo) RemoveAllocation(uuid string) *AllocationInfo
- func (ni *NodeInfo) SetSchedulable(schedulable bool)
- func (ni *NodeInfo) SyncAvailableResource() bool
- type PartitionInfo
- func (pi *PartitionInfo) CalculateNodesResourceUsage() map[string][]int
- func (pi *PartitionInfo) CopyNodeInfos() []*NodeInfo
- func (pi *PartitionInfo) CreateQueues(queueName string) error
- func (pi *PartitionInfo) GetApplications() []*ApplicationInfo
- func (pi *PartitionInfo) GetNode(nodeID string) *NodeInfo
- func (pi *PartitionInfo) GetNodeSortingPolicy() common.SortingPolicy
- func (pi *PartitionInfo) GetNodes() map[string]*NodeInfo
- func (pi *PartitionInfo) GetQueue(name string) *QueueInfo
- func (pi *PartitionInfo) GetRules() []configs.PlacementRule
- func (pi *PartitionInfo) GetTotalAllocationCount() int
- func (pi *PartitionInfo) GetTotalApplicationCount() int
- func (pi *PartitionInfo) GetTotalNodeCount() int
- func (pi *PartitionInfo) GetTotalPartitionResource() *resources.Resource
- func (pi *PartitionInfo) NeedPreemption() bool
- func (pi *PartitionInfo) Remove()
- func (pi *PartitionInfo) RemoveApplication(appID string) (*ApplicationInfo, []*AllocationInfo)
- func (pi *PartitionInfo) RemoveNode(nodeID string) []*AllocationInfo
- type QueueInfo
- func (qi *QueueInfo) CheckAdminAccess(user security.UserGroup) bool
- func (qi *QueueInfo) CheckSubmitAccess(user security.UserGroup) bool
- func (qi *QueueInfo) CurrentState() string
- func (qi *QueueInfo) GetAllocatedResource() *resources.Resource
- func (qi *QueueInfo) GetCopyOfChildren() map[string]*QueueInfo
- func (qi *QueueInfo) GetGuaranteedResource() *resources.Resource
- func (qi *QueueInfo) GetMaxResource() *resources.Resource
- func (qi *QueueInfo) GetProperties() map[string]string
- func (qi *QueueInfo) GetQueueInfos() dao.QueueDAOInfo
- func (qi *QueueInfo) GetQueuePath() string
- func (qi *QueueInfo) HandleQueueEvent(event SchedulingObjectEvent) error
- func (qi *QueueInfo) IncAllocatedResource(alloc *resources.Resource, nodeReported bool) error
- func (qi *QueueInfo) IsDraining() bool
- func (qi *QueueInfo) IsLeafQueue() bool
- func (qi *QueueInfo) IsManaged() bool
- func (qi *QueueInfo) IsRunning() bool
- func (qi *QueueInfo) IsStopped() bool
- func (qi *QueueInfo) MarkQueueForRemoval()
- func (qi *QueueInfo) RemoveQueue() bool
- func (qi *QueueInfo) UpdateUnManagedMaxResource(max *resources.Resource)
- type SchedulingObjectEvent
- type SchedulingObjectState
Constants ¶
const ( RunApplication applicationEvent = iota WaitApplication RejectApplication CompleteApplication KillApplication )
const ( New applicationState = iota Accepted Starting Running Waiting Rejected Completed Killed )
const ( DOT = "." DotReplace = "_dot_" )
Variables ¶
This section is empty.
Functions ¶
func AddAllocationToApp ¶
func AddAllocationToApp(app *ApplicationInfo, alloc *AllocationInfo)
Add allocation to cache app for tests
func RemoveAllocationFromApp ¶ added in v0.9.0
func RemoveAllocationFromApp(app *ApplicationInfo, uuid string)
Remove allocation to cache app for tests
func SetGuaranteedResource ¶
Utility function to allow tests to set the guaranteed resource that is not exported
func UpdateClusterInfoFromConfigFile ¶
func UpdateClusterInfoFromConfigFile(clusterInfo *ClusterInfo, rmID string) ([]*PartitionInfo, []*PartitionInfo, error)
Update the existing cluster info: - add new partitions - update existing partitions - remove deleted partitions updates and add internally are processed differently outside of this method they are the same.
Types ¶
type AllocationInfo ¶
type AllocationInfo struct { // Original protocol AllocationProto *si.Allocation // Other information ApplicationID string AllocatedResource *resources.Resource }
Related to Allocation
func CreateMockAllocationInfo ¶
func CreateMockAllocationInfo(appID string, res *resources.Resource, uuid string, queueName string, nodeID string) *AllocationInfo
AllocationInfo for tests inside and outside the cache
func NewAllocationInfo ¶
func NewAllocationInfo(uuid string, alloc *commonevents.AllocationProposal) *AllocationInfo
type ApplicationInfo ¶
type ApplicationInfo struct { ApplicationID string Partition string QueueName string SubmissionTime int64 sync.RWMutex // contains filtered or unexported fields }
Related to applications
func NewApplicationInfo ¶
func NewApplicationInfo(appID, partition, queueName string, ugi security.UserGroup, tags map[string]string) *ApplicationInfo
Create a new application
func (*ApplicationInfo) GetAllAllocations ¶
func (ai *ApplicationInfo) GetAllAllocations() []*AllocationInfo
Return the current allocations for the application.
func (*ApplicationInfo) GetAllocatedResource ¶
func (ai *ApplicationInfo) GetAllocatedResource() *resources.Resource
Return the total allocated resources for the application.
func (*ApplicationInfo) GetApplicationState ¶
func (ai *ApplicationInfo) GetApplicationState() string
Return the current state or a checked specific state for the application. The state machine handles the locking.
func (*ApplicationInfo) GetTag ¶
func (ai *ApplicationInfo) GetTag(tag string) string
Get a tag from the application Note: Tags are not case sensitive
func (*ApplicationInfo) GetUser ¶
func (ai *ApplicationInfo) GetUser() security.UserGroup
get a copy of the user details for the application
func (*ApplicationInfo) HandleApplicationEvent ¶
func (ai *ApplicationInfo) HandleApplicationEvent(event applicationEvent) error
Handle the state event for the application. The state machine handles the locking.
func (*ApplicationInfo) IsAccepted ¶ added in v0.9.0
func (ai *ApplicationInfo) IsAccepted() bool
func (*ApplicationInfo) IsNew ¶ added in v0.9.0
func (ai *ApplicationInfo) IsNew() bool
func (*ApplicationInfo) IsRunning ¶ added in v0.9.0
func (ai *ApplicationInfo) IsRunning() bool
func (*ApplicationInfo) IsStarting ¶ added in v0.9.0
func (ai *ApplicationInfo) IsStarting() bool
func (*ApplicationInfo) IsWaiting ¶ added in v0.9.0
func (ai *ApplicationInfo) IsWaiting() bool
func (*ApplicationInfo) SetQueue ¶
func (ai *ApplicationInfo) SetQueue(leaf *QueueInfo)
Set the leaf queue the application runs in. Update the queue name also to match as this might be different from the queue that was given when submitting the application.
func (*ApplicationInfo) String ¶ added in v0.9.0
func (ai *ApplicationInfo) String() string
type ClusterInfo ¶
type ClusterInfo struct { // RM Event Handler EventHandlers handler.EventHandlers sync.RWMutex // contains filtered or unexported fields }
func NewClusterInfo ¶
func NewClusterInfo() (info *ClusterInfo)
func (*ClusterInfo) GetPartition ¶
func (m *ClusterInfo) GetPartition(name string) *PartitionInfo
Get the partition by name. Locked call, used outside of the cache.
func (*ClusterInfo) HandleEvent ¶
func (m *ClusterInfo) HandleEvent(ev interface{})
Implement methods for Cache events
func (*ClusterInfo) ListPartitions ¶
func (m *ClusterInfo) ListPartitions() []string
Get the list of partitions. Locked call, used outside of the cache.
func (*ClusterInfo) StartService ¶
func (m *ClusterInfo) StartService(handlers handler.EventHandlers)
Start service
type NodeInfo ¶
type NodeInfo struct { // Fields for fast access These fields are considered read only. // Values should only be set when creating a new node and never changed. NodeID string Hostname string Rackname string Partition string sync.RWMutex // contains filtered or unexported fields }
The node structure used throughout the system
func NewNodeForSort ¶
Node to test with sorters (setting available resources)
func NewNodeForTest ¶
Node to test with anything but the sorters (setting total resources)
func NewNodeInfo ¶
func NewNodeInfo(proto *si.NewNodeInfo) *NodeInfo
Create a new node from the protocol object. The object can only be nil if the si.NewNodeInfo is nil, otherwise a valid object is returned.
func (*NodeInfo) AddAllocation ¶
func (ni *NodeInfo) AddAllocation(alloc *AllocationInfo)
Add the allocation to the node.Used resources will increase available will decrease. This cannot fail. A nil AllocationInfo makes no changes.
func (*NodeInfo) FitInNode ¶
Check if the allocation fits int the nodes resources. unlocked call as the totalResource can not be changed
func (*NodeInfo) GetAllAllocations ¶
func (ni *NodeInfo) GetAllAllocations() []*AllocationInfo
Get a copy of the allocations on this node
func (*NodeInfo) GetAllocatedResource ¶
Return the currently allocated resource for the node. It returns a cloned object as we do not want to allow modifications to be made to the value of the node.
func (*NodeInfo) GetAllocation ¶
func (ni *NodeInfo) GetAllocation(uuid string) *AllocationInfo
Return the allocation based on the uuid of the allocation. returns nil if the allocation is not found
func (*NodeInfo) GetAttribute ¶
Get an attribute by name. The most used attributes can be directly accessed via the fields: HostName, RackName and Partition. This is a lock free call. All attributes are considered read only
func (*NodeInfo) GetAvailableResource ¶
Return the currently available resource for the node. It returns a cloned object as we do not want to allow modifications to be made to the value of the node.
func (*NodeInfo) GetCapacity ¶
func (*NodeInfo) GetOccupiedResource ¶
func (*NodeInfo) IsSchedulable ¶
Can this node be used in scheduling.
func (*NodeInfo) RemoveAllocation ¶
func (ni *NodeInfo) RemoveAllocation(uuid string) *AllocationInfo
Remove the allocation to the node. Returns nil if the allocation was not found and no changes are made. If the allocation is found the AllocationInfo removed is returned. Used resources will decrease available will increase as per the allocation removed.
func (*NodeInfo) SetSchedulable ¶
Set the node to unschedulable. This will cause the node to be skipped during the scheduling cycle. Visible for testing only
func (*NodeInfo) SyncAvailableResource ¶ added in v0.9.0
Return the fact that the available resource has changed for the node. This should only be called by the SchedulingNode when it checks to update its cached value. This handles two cases: - removal of allocations via events - update of the node capacity via events
type PartitionInfo ¶
type PartitionInfo struct { Name string Root *QueueInfo RmID string sync.RWMutex // contains filtered or unexported fields }
Related to partitions
func CreatePartitionInfo ¶
func CreatePartitionInfo(data []byte) (*PartitionInfo, error)
Create a partition for testing from a yaml configuration
func SetClusterInfoFromConfigFile ¶
func SetClusterInfoFromConfigFile(clusterInfo *ClusterInfo, rmID string, policyGroup string) ([]*PartitionInfo, error)
Create the mew partition configuration and ass all of them to the cluster. This function may only be called by the scheduler when a RM registers. It creates a new PartitionInfo from scratch and does not merge the configurations.
func (*PartitionInfo) CalculateNodesResourceUsage ¶
func (pi *PartitionInfo) CalculateNodesResourceUsage() map[string][]int
calculate overall nodes resource usage and returns a map as the result, where the key is the resource name, e.g memory, and the value is a []int, which is a slice with 10 elements, each element represents a range of resource usage, such as
0: 0%->10% 1: 10% -> 20% ... 9: 90% -> 100%
the element value represents number of nodes fall into this bucket. if slice[9] = 3, this means there are 3 nodes resource usage is in the range 80% to 90%.
func (*PartitionInfo) CopyNodeInfos ¶
func (pi *PartitionInfo) CopyNodeInfos() []*NodeInfo
Return a copy of all the nodes registers to this partition
func (*PartitionInfo) CreateQueues ¶
func (pi *PartitionInfo) CreateQueues(queueName string) error
Create the new queue that is returned from a rule. It creates a queue with all parents needed.
func (*PartitionInfo) GetApplications ¶
func (pi *PartitionInfo) GetApplications() []*ApplicationInfo
func (*PartitionInfo) GetNode ¶
func (pi *PartitionInfo) GetNode(nodeID string) *NodeInfo
Get the node object for the node ID as tracked by the partition. This will return nil if the node is not part of this partition. Visible by tests
func (*PartitionInfo) GetNodeSortingPolicy ¶
func (pi *PartitionInfo) GetNodeSortingPolicy() common.SortingPolicy
Is bin-packing scheduling enabled? TODO: more finer enum based return model here is better instead of bool.
func (*PartitionInfo) GetNodes ¶
func (pi *PartitionInfo) GetNodes() map[string]*NodeInfo
func (*PartitionInfo) GetQueue ¶
func (pi *PartitionInfo) GetQueue(name string) *QueueInfo
Get the queue from the structure based on the fully qualified name. Wrapper around the unlocked version getQueue()
func (*PartitionInfo) GetRules ¶
func (pi *PartitionInfo) GetRules() []configs.PlacementRule
Return the config element for the placement rules
func (*PartitionInfo) GetTotalAllocationCount ¶
func (pi *PartitionInfo) GetTotalAllocationCount() int
func (*PartitionInfo) GetTotalApplicationCount ¶
func (pi *PartitionInfo) GetTotalApplicationCount() int
func (*PartitionInfo) GetTotalNodeCount ¶
func (pi *PartitionInfo) GetTotalNodeCount() int
func (*PartitionInfo) GetTotalPartitionResource ¶
func (pi *PartitionInfo) GetTotalPartitionResource() *resources.Resource
func (*PartitionInfo) NeedPreemption ¶
func (pi *PartitionInfo) NeedPreemption() bool
Does the partition allow pre-emption?
func (*PartitionInfo) Remove ¶
func (pi *PartitionInfo) Remove()
The partition has been removed from the configuration and must be removed. This is the cleanup triggered by the exiting scheduler partition. Just unlinking from the cluster should be enough. All other clean up is triggered from the scheduler
func (*PartitionInfo) RemoveApplication ¶
func (pi *PartitionInfo) RemoveApplication(appID string) (*ApplicationInfo, []*AllocationInfo)
Remove the application from the partition. This will also release all the allocations for application from the queue and nodes.
func (*PartitionInfo) RemoveNode ¶
func (pi *PartitionInfo) RemoveNode(nodeID string) []*AllocationInfo
Remove a node from the partition. This locks the partition and calls the internal unlocked version.
type QueueInfo ¶
type QueueInfo struct { Name string Parent *QueueInfo // link to the parent queue sync.RWMutex // lock for updating the queue // contains filtered or unexported fields }
The queue structure as used throughout the scheduler
func NewManagedQueue ¶
func NewManagedQueue(conf configs.QueueConfig, parent *QueueInfo) (*QueueInfo, error)
Create a new queue from the configuration object. The configuration is validated before we call this: we should not see any errors.
func NewUnmanagedQueue ¶
Create a new unmanaged queue. An unmanaged queue is created as result of a rule. Rule based queues which might not fit in the structure or fail parsing.
func (*QueueInfo) CheckAdminAccess ¶
Check if the user has access to the queue for admin actions recursively.
func (*QueueInfo) CheckSubmitAccess ¶
Check if the user has access to the queue to submit an application recursively. This will check the submit ACL and the admin ACL.
func (*QueueInfo) CurrentState ¶
Return the current state of the queue
func (*QueueInfo) GetAllocatedResource ¶
Return the currently allocated resource for the queue. It returns a cloned object as we do not want to allow modifications to be made to the value of the queue.
func (*QueueInfo) GetCopyOfChildren ¶
func (*QueueInfo) GetGuaranteedResource ¶
Return the guaranteed resource for the queue.
func (*QueueInfo) GetMaxResource ¶
Return the max resource for the queue. If not set the returned resource will be nil.
func (*QueueInfo) GetProperties ¶ added in v0.9.0
Return a copy of the properties for this queue
func (*QueueInfo) GetQueueInfos ¶ added in v0.9.0
func (qi *QueueInfo) GetQueueInfos() dao.QueueDAOInfo
func (*QueueInfo) GetQueuePath ¶
Get the fully qualified path name
func (*QueueInfo) HandleQueueEvent ¶
func (qi *QueueInfo) HandleQueueEvent(event SchedulingObjectEvent) error
Handle the state event for the queue. The state machine handles the locking.
func (*QueueInfo) IncAllocatedResource ¶
Increment the allocated resources for this queue (recursively) Guard against going over max resources if set
func (*QueueInfo) IsDraining ¶
Is the queue marked for deletion and can only handle existing application requests. No new applications will be accepted.
func (*QueueInfo) IsLeafQueue ¶
Return if this is a leaf queue or not
func (*QueueInfo) MarkQueueForRemoval ¶
func (qi *QueueInfo) MarkQueueForRemoval()
Mark the managed queue for removal from the system. This can be executed multiple times and is only effective the first time. This is a noop on an unmanaged queue
func (*QueueInfo) RemoveQueue ¶
Remove the queue from the structure. Since nothing is allocated there shouldn't be anything referencing this queue any more. The real removal is removing the queue from the parent's child list
func (*QueueInfo) UpdateUnManagedMaxResource ¶ added in v0.9.0
Update the max resource for an unmanaged leaf queue.
type SchedulingObjectEvent ¶
type SchedulingObjectEvent int
---------------------------------- object events these events are used for: partitions and managed queues ----------------------------------
const ( Remove SchedulingObjectEvent = iota Start Stop )
func (SchedulingObjectEvent) String ¶
func (soe SchedulingObjectEvent) String() string
type SchedulingObjectState ¶
type SchedulingObjectState int
---------------------------------- object states these states are used by: partitions and managed queues ----------------------------------
const ( Active SchedulingObjectState = iota Draining Stopped )
func (SchedulingObjectState) String ¶
func (sos SchedulingObjectState) String() string