Package controller provides a Kubernetes controller for a PyTorchJob resource.

var (
	// KeyFunc is the short name to DeletionHandlingMetaNamespaceKeyFunc.
	// IndexerInformer uses a delta queue, therefore for deletes we have to use this
	// key function but it should be just fine for non delete events.
	KeyFunc = cache.DeletionHandlingMetaNamespaceKeyFunc

	// DefaultPyTorchControllerConfiguration is the suggested operator configuration for production.
	DefaultPyTorchControllerConfiguration = jobcontroller.JobControllerConfiguration{
		ReconcilerSyncLoopPeriod: metav1.Duration{Duration: 15 * time.Second},
		EnableGangScheduling:     false,


func ContainMasterSpec

func ContainMasterSpec(job *v1beta2.PyTorchJob) bool

func GetPortFromPyTorchJob

func GetPortFromPyTorchJob(job *v1beta2.PyTorchJob, rtype v1beta2.PyTorchReplicaType) (int32, error)

GetPortFromPyTorchJob gets the port of pytorch container.

func NewUnstructuredPyTorchJobInformer

func NewUnstructuredPyTorchJobInformer(restConfig *restclientset.Config, namespace string) jobinformersv1beta2.PyTorchJobInformer


type PyTorchController

type PyTorchController struct {
	// contains filtered or unexported fields

PyTorchController is the type for PyTorchJob Controller, which manages the lifecycle of PyTorchJobs.

func NewPyTorchController

func NewPyTorchController(

	jobInformer jobinformersv1beta2.PyTorchJobInformer,
	kubeClientSet kubeclientset.Interface,
	kubeBatchClientSet kubebatchclient.Interface,
	jobClientSet jobclientset.Interface,
	kubeInformerFactory kubeinformers.SharedInformerFactory,

	jobInformerFactory jobinformers.SharedInformerFactory,
	option options.ServerOption) *PyTorchController

NewPyTorchController returns a new PyTorchJob controller.

func (*PyTorchController) ControllerName

func (pc *PyTorchController) ControllerName() string

func (*PyTorchController) GetAPIGroupVersion

func (pc *PyTorchController) GetAPIGroupVersion() schema.GroupVersion

func (*PyTorchController) GetAPIGroupVersionKind

func (pc *PyTorchController) GetAPIGroupVersionKind() schema.GroupVersionKind

func (*PyTorchController) GetGroupNameLabelKey

func (pc *PyTorchController) GetGroupNameLabelKey() string

func (*PyTorchController) GetGroupNameLabelValue

func (pc *PyTorchController) GetGroupNameLabelValue() string

func (*PyTorchController) GetJobFromAPIClient

func (pc *PyTorchController) GetJobFromAPIClient(namespace, name string) (metav1.Object, error)

func (*PyTorchController) GetJobFromInformerCache

func (pc *PyTorchController) GetJobFromInformerCache(namespace, name string) (metav1.Object, error)

func (*PyTorchController) GetJobNameLabelKey

func (pc *PyTorchController) GetJobNameLabelKey() string

func (*PyTorchController) GetJobRoleKey

func (pc *PyTorchController) GetJobRoleKey() string

func (*PyTorchController) GetReplicaIndexLabelKey

func (pc *PyTorchController) GetReplicaIndexLabelKey() string

func (*PyTorchController) GetReplicaTypeLabelKey

func (pc *PyTorchController) GetReplicaTypeLabelKey() string

func (*PyTorchController) NewPyTorchJobInformer

func (pc *PyTorchController) NewPyTorchJobInformer(jobInformerFactory jobinformers.SharedInformerFactory) jobinformersv1beta2.PyTorchJobInformer

NewPyTorchJobInformer returns PyTorchJobInformer from the given factory.

func (*PyTorchController) Run

func (pc *PyTorchController) Run(threadiness int, stopCh <-chan struct{}) error

Run will set up the event handlers for types we are interested in, as well as syncing informer caches and starting workers. It will block until stopCh is closed, at which point it will shutdown the workqueue and wait for workers to finish processing their current work items.