Documentation
¶
Index ¶
- Constants
- Variables
- func AddLabelsToObject(ctx context.Context, client crclient.Client, obj metav1.Object, ...) error
- func CopyLabelsOrAnnotations(target, source map[string]string) map[string]string
- func DPUCondition(condType provisioningv1.DPUConditionType, reason, message string) *metav1.Condition
- func DeleteObjects(ctx context.Context, client crclient.Client, objs ...crclient.Object) error
- func GenerateBFBCFGFilePath(filename string) string
- func GenerateBFBFilePath(filename string) string
- func GenerateBFBTMPFilePath(uid string) string
- func GenerateBFBTaskName(bfb provisioningv1.BFB) string
- func GenerateBFBVersionFromURL(bfbURL string) string
- func GenerateBFCFGFileName(dpuName string, uid string) string
- func GenerateDMSPodName(dpuNodeName string) string
- func GenerateDMSServerCertName(dpuName string) string
- func GenerateDMSServerSecretName(dpuName string) string
- func GenerateNodeName(dpu *provisioningv1.DPU) string
- func GetClientset(ctx context.Context, client crclient.Client, dc *provisioningv1.DPUCluster) (*kubernetes.Clientset, []byte, error)
- func GetDPUCondition(status *provisioningv1.DPUStatus, conditionType string) (int, *metav1.Condition)
- func GetNamespacedName(obj metav1.Object) types.NamespacedName
- func GetObjects(ctx context.Context, client crclient.Client, objects []crclient.Object) (existObjects []crclient.Object, err error)
- func GetPCIAddrFromDPU(dpu *provisioningv1.DPU, removePrefix bool) (string, error)
- func IsClusterCreated(conditions []metav1.Condition) bool
- func IsNodeReady(node *corev1.Node) bool
- func NeedUpdateLabels(label1 map[string]string, label2 map[string]string) bool
- func NewCondition(condType string, err error, reason, message string) *metav1.Condition
- func RemoteExec(ns, name, container, cmd string) (string, string, error)
- func ReplaceDaemonSetPodNodeNameNodeAffinity(affinity *corev1.Affinity, nodename string) *corev1.Affinity
- func SetDPUCondition(status *provisioningv1.DPUStatus, condition *metav1.Condition) bool
Constants ¶
const ( // RequeueInterval is the interval to requeue the request. RequeueInterval = 5 * time.Second // CFGExtension is the extension of the BFB configuration file. CFGExtension = "cfg" // DPUProvisioningLabelPrefix is the prefix for all DPU provisioning labels. DPUProvisioningLabelPrefix = "provisioning.dpu.nvidia.com/" // DPUNodeRebootMethodLabel is the label that specify the reboot method DPUNodeRebootMethodLabel = DPUProvisioningLabelPrefix + "reboot-method" // DPUNodeScriptNameLabel is the label that specify the script name for the custom script reboot method. DPUNodeScriptNameLabel = DPUProvisioningLabelPrefix + "script-name" // DPUSetNameLabel is the label that indicates the name of the DPUSet. DPUSetNameLabel = DPUProvisioningLabelPrefix + "dpuset-name" // DPUSetNamespaceLabel is the label that indicates the namespace of the DPUSet. DPUSetNamespaceLabel = DPUProvisioningLabelPrefix + "dpuset-namespace" // DPUNodeNameLabel is the label that indicates the name of the DPUNode the DPU is associated with. DPUNodeNameLabel = DPUProvisioningLabelPrefix + "dpunode-name" // DPUDeviceNameLabel is the label that indicates the name of the DPUDevice the DPU is associated with. DPUDeviceNameLabel = DPUProvisioningLabelPrefix + "dpudevice-name" // DPUDevicePCIAddressLabel is the label that indicates the PCI address of the DPU device. DPUDevicePCIAddressLabel = DPUProvisioningLabelPrefix + "dpudevice-pciAddress" // DPUDevicePSIDLabel is the label that indicates the PSID of the DPU device. DPUDevicePSIDLabel = DPUProvisioningLabelPrefix + "dpudevice-psid" // DPUDeviceOPNLabel is the label that indicates the OPN of the DPU device. DPUDeviceOPNLabel = DPUProvisioningLabelPrefix + "dpudevice-opn" // DPUDeviceNumOfPFsLabel is the label that indicates the number of PFs on the DPU device. DPUDeviceNumOfPFsLabel = DPUProvisioningLabelPrefix + "dpudevice-num-of-pfs" // DPUDevicePF0NameLabel is the label that indicates the name of the PF0 on the DPU device. DPUDevicePF0NameLabel = DPUProvisioningLabelPrefix + "dpudevice-pf0-name" // DPUDeviceBMCIPLabel is the label that indicates the BMC IP of the DPU device. DPUDeviceBMCIPLabel = DPUProvisioningLabelPrefix + "dpudevice-bmc-ip" // TolerationNotReadyKey is the key for the NotReady taint. TolerationNotReadyKey = "node.kubernetes.io/not-ready" // TolerationUnreachableyKey is the key for the Unreachable taint. TolerationUnreachableyKey = "node.kubernetes.io/unreachable" // TolerationUnschedulableKey is the key for the Unschedulable taint. TolerationUnschedulableKey = "node.kubernetes.io/unschedulable" // DPUOOBBridgeConfiguredLabel is the label that indicates that the DPU OOB bridge is configured. DPUOOBBridgeConfiguredLabel = "dpu-oob-bridge-configured" // NodeFeatureDiscoveryLabelPrefix is the prefix for all NodeFeatureDiscovery labels. NodeFeatureDiscoveryLabelPrefix = "feature.node.kubernetes.io/" // NodeSelectorLabel is a label for linking Node with DPU. NodeSelectorLabel = NodeFeatureDiscoveryLabelPrefix + "dpu-enabled" // NodeMaintenanceRequestorID is the requestor ID used for NodeMaintenance CRs NodeMaintenanceRequestorID = "dpu.nvidia.com" // ProvisioningGroupName is the provisioning group, used to identify provisioning as // additional Requestors in NodeMaintenance CR. ProvisioningGroupName = "provisioning.dpu.nvidia.com" // OverrideDMSPodNameAnnotationKey overrides the namespace and name of the pod used as DMS. // TODO: these annotations do not align with the new flow and should be removed. Here's how to simulate the new flow with mock dms: // 1. As we no longer retrieve the address of DMS server from the DMS pod, // the mock dms should set its IP and listener port in DPUNode.spec.DMSAddress // 2. No host network pod will be created in the new flow. // Instead, the provisioning controller calls DMS debug API to run the hostnetwork script on the host. // As a result, the annotaion can be removed, and the Get service of the mock dms should be extended OverrideDMSPortAnnotationKey = "provisioning.dpu.nvidia.com/override-dms-port" OverrideHostNetworkAnnotationKey = "provisioning.dpu.nvidia.com/override-host-network-pod-name" HoldNodeEffectKey = DPUProvisioningLabelPrefix + "wait-for-external-nodeeffect" TrustedSFCount = DPUProvisioningLabelPrefix + "num-of-trusted-sfs" ProvisioningComponentLabelKey = DPUProvisioningLabelPrefix + "component" )
Variables ¶
var ( // Location of BFB binary files BFBBaseDir = "bfb" KubeconfigBaseDir = "kubeconfig" )
Functions ¶
func AddLabelsToObject ¶
func AddLabelsToObject(ctx context.Context, client crclient.Client, obj metav1.Object, labels map[string]string) error
AddLabelsToObject adds the given labels to any Kubernetes object implementing metav1.Object
func CopyLabelsOrAnnotations ¶
CopyLabelsOrAnnotations merges source labels/annotations into target. If target is nil, it will be initialized.
func DPUCondition ¶
func DPUCondition(condType provisioningv1.DPUConditionType, reason, message string) *metav1.Condition
func DeleteObjects ¶
func GenerateBFBCFGFilePath ¶
func GenerateBFBFilePath ¶
func GenerateBFBTMPFilePath ¶
func GenerateBFBTaskName ¶
func GenerateBFBTaskName(bfb provisioningv1.BFB) string
func GenerateBFCFGFileName ¶
func GenerateDMSPodName ¶
func GenerateNodeName ¶
func GenerateNodeName(dpu *provisioningv1.DPU) string
func GetClientset ¶
func GetClientset(ctx context.Context, client crclient.Client, dc *provisioningv1.DPUCluster) (*kubernetes.Clientset, []byte, error)
func GetDPUCondition ¶
func GetNamespacedName ¶
func GetNamespacedName(obj metav1.Object) types.NamespacedName
func GetObjects ¶
func GetPCIAddrFromDPU ¶
func GetPCIAddrFromDPU(dpu *provisioningv1.DPU, removePrefix bool) (string, error)
func IsClusterCreated ¶
func IsNodeReady ¶
func NeedUpdateLabels ¶
NeedUpdateLabels compares two labels. If label 2 does not contain all the key-value pairs of label 1, then return true. otherwise return false
func NewCondition ¶
NewCondition creates a new metav1.Condition with the given parameters. todo: merge with DPUCondition()
func ReplaceDaemonSetPodNodeNameNodeAffinity ¶
func ReplaceDaemonSetPodNodeNameNodeAffinity(affinity *corev1.Affinity, nodename string) *corev1.Affinity
ReplaceDaemonSetPodNodeNameNodeAffinity replaces the RequiredDuringSchedulingIgnoredDuringExecution NodeAffinity of the given affinity with a new NodeAffinity that selects the given nodeName. Note that this function assumes that no NodeAffinity conflicts with the selected nodeName.
This method is copied from https://github.com/kubernetes/kubernetes/blob/dbc2b0a5c7acc349ea71a14e49913661eaf708d2/pkg/controller/daemon/util/daemonset_util.go#L176
func SetDPUCondition ¶
func SetDPUCondition(status *provisioningv1.DPUStatus, condition *metav1.Condition) bool
Types ¶
This section is empty.