Documentation
¶
Index ¶
- Constants
- Variables
- func AddOrOverrideTFClientMissingAnnotationsBeforePatch(pod *v1.Pod, tfInfo TensorFusionInfo)
- func AddTFDefaultClientConfBeforePatch(ctx context.Context, pod *v1.Pod, pool *tfv1.GPUPool, tfInfo TensorFusionInfo, ...)
- func AddTFHypervisorConfAfterTemplate(ctx context.Context, spec *v1.PodSpec, pool *tfv1.GPUPool)
- func AddTFNodeDiscoveryConfAfterTemplate(ctx context.Context, tmpl *v1.PodTemplateSpec, pool *tfv1.GPUPool, ...)
- func AddWorkerConfAfterTemplate(ctx context.Context, spec *v1.PodSpec, workerConfig *tfv1.WorkerConfig, ...) string
- func AppendTFWorkerLabelsAndAnnotationsAfterTemplate(podTmpl *v1.PodTemplate, workload *tfv1.TensorFusionWorkload, ...) (map[string]string, map[string]string)
- func CalculateExponentialBackoffWithJitter(retryCount int64) time.Duration
- func CompareAndGetObjectHash(hash string, obj ...any) (bool, string)
- func CurrentIP() string
- func CurrentNamespace() string
- func EqualConditionsDisregardTransitionTime(a, b []metav1.Condition) bool
- func EscapeJSONPointer(s string) string
- func ExtractPoolNameFromNodeLabel(node *tfv1.GPUNode) string
- func FindFirstLevelOwnerReference(obj metav1.Object) *metav1.OwnerReference
- func FindRootOwnerReference(ctx context.Context, c client.Client, namespace string, obj metav1.Object) (*metav1.OwnerReference, error)
- func GetEnvOrDefault(key, defaultValue string) string
- func GetGPUResource(pod *corev1.Pod, isRequest bool) (tfv1.Resource, error)
- func GetInitialGPUNodeSelector() []string
- func GetObjectHash(objs ...any) string
- func GetSelfServiceAccountNameFull() string
- func GetSelfServiceAccountNameShort() string
- func HandleFinalizer[T client.Object](ctx context.Context, obj T, r client.Client, ...) (shouldReturn bool, err error)
- func HasGPUResourceRequest(pod *corev1.Pod) bool
- func InitServiceAccountConfig()
- func IsPodConditionTrue(conditions []corev1.PodCondition, conditionType corev1.PodConditionType) bool
- func IsPodStopped(pod *corev1.Pod) bool
- func IsProgressiveMigration() bool
- func IsTensorFusionPod(pod *corev1.Pod) bool
- func IsTensorFusionWorker(pod *corev1.Pod) bool
- func LoadConfigFromFile[T any](filename string, target *T) error
- func NewShortID(length int) string
- func ReadServiceAccountToken() string
- func SetProgressiveMigration(isProgressiveMigration bool)
- func WatchConfigFileChanges(ctx context.Context, filename string) (<-chan []byte, error)
- type TensorFusionInfo
Constants ¶
const ( WatchConfigFileChangesInterval = 15 * time.Second ServiceAccountTokenPath = "/var/run/secrets/kubernetes.io/serviceaccount/token" )
Variables ¶
var ErrNextLoop = errors.New("stop this loop and return the associated Result object")
ErrNextLoop is not a real error. It forces the current reconciliation loop to stop and return the associated Result object
var ErrTerminateLoop = errors.New("stop this loop and do not requeue")
ErrTerminateLoop is not a real error. It forces the current reconciliation loop to stop
var GPUResourceNames = []corev1.ResourceName{
"nvidia.com/gpu",
"amd.com/gpu",
}
var IsTestMode = false
Functions ¶
func AddOrOverrideTFClientMissingAnnotationsBeforePatch ¶ added in v1.37.0
func AddOrOverrideTFClientMissingAnnotationsBeforePatch(pod *v1.Pod, tfInfo TensorFusionInfo)
func AddTFDefaultClientConfBeforePatch ¶ added in v1.37.0
func AddTFHypervisorConfAfterTemplate ¶ added in v1.37.0
func AddTFNodeDiscoveryConfAfterTemplate ¶ added in v1.37.0
func AddWorkerConfAfterTemplate ¶ added in v1.37.0
func AddWorkerConfAfterTemplate(ctx context.Context, spec *v1.PodSpec, workerConfig *tfv1.WorkerConfig, hypervisorConfig *tfv1.HypervisorConfig, workload *tfv1.TensorFusionWorkload) string
func AppendTFWorkerLabelsAndAnnotationsAfterTemplate ¶ added in v1.37.0
func AppendTFWorkerLabelsAndAnnotationsAfterTemplate( podTmpl *v1.PodTemplate, workload *tfv1.TensorFusionWorkload, containerName string, ) (map[string]string, map[string]string)
func CompareAndGetObjectHash ¶ added in v1.28.0
func CurrentNamespace ¶
func CurrentNamespace() string
func EqualConditionsDisregardTransitionTime ¶ added in v1.35.0
func EscapeJSONPointer ¶ added in v1.26.3
EscapeJSONPointer escapes a string according to the JSON Pointer spec (RFC 6901). It escapes '~' as '~0' and '/' as '~1'.
func ExtractPoolNameFromNodeLabel ¶ added in v1.33.1
func FindFirstLevelOwnerReference ¶ added in v1.35.0
func FindFirstLevelOwnerReference(obj metav1.Object) *metav1.OwnerReference
FindFirstLevelOwnerReference recursively finds the root owner reference for a given object (e.g. Pod).
func FindRootOwnerReference ¶ added in v1.26.9
func FindRootOwnerReference(ctx context.Context, c client.Client, namespace string, obj metav1.Object) (*metav1.OwnerReference, error)
FindRootOwnerReference recursively finds the root owner reference for a given object (e.g. Pod).
func GetEnvOrDefault ¶ added in v1.34.0
func GetGPUResource ¶ added in v1.35.0
func GetInitialGPUNodeSelector ¶ added in v1.44.0
func GetInitialGPUNodeSelector() []string
func GetObjectHash ¶
GetObjectHash generates a shorter FNV-1a hash for one or more objects
func GetSelfServiceAccountNameFull ¶ added in v1.37.0
func GetSelfServiceAccountNameFull() string
func GetSelfServiceAccountNameShort ¶ added in v1.37.0
func GetSelfServiceAccountNameShort() string
func HandleFinalizer ¶
func HandleFinalizer[T client.Object]( ctx context.Context, obj T, r client.Client, deleteHook func(context.Context, T) (bool, error), ) (shouldReturn bool, err error)
HandleFinalizer ensures proper finalizer management for Kubernetes resources. It automatically adds the finalizer when needed, and removes it after successful cleanup. Returns (shouldReturn, err):
- shouldReturn: true if the caller should immediately return and wait for the next reconcile.
- err: any error encountered during update or deleteHook.
func HasGPUResourceRequest ¶ added in v1.39.0
func InitServiceAccountConfig ¶ added in v1.36.1
func InitServiceAccountConfig()
func IsPodConditionTrue ¶
func IsPodConditionTrue(conditions []corev1.PodCondition, conditionType corev1.PodConditionType) bool
func IsPodStopped ¶ added in v1.37.0
func IsProgressiveMigration ¶ added in v1.39.0
func IsProgressiveMigration() bool
func IsTensorFusionPod ¶ added in v1.39.0
func IsTensorFusionWorker ¶ added in v1.39.1
func LoadConfigFromFile ¶ added in v1.34.0
func NewShortID ¶ added in v1.35.0
func ReadServiceAccountToken ¶ added in v1.36.1
func ReadServiceAccountToken() string
func SetProgressiveMigration ¶ added in v1.39.0
func SetProgressiveMigration(isProgressiveMigration bool)
For test purpose only
func WatchConfigFileChanges ¶ added in v1.34.0
WatchConfigFileChanges watches a file for changes and sends the file content through a channel when changes are detected. The channel will receive the raw file content as []byte whenever the file is modified. The watch interval is set to 15 seconds by default.
Types ¶
type TensorFusionInfo ¶ added in v1.37.0
type TensorFusionInfo struct { Profile *tfv1.WorkloadProfileSpec DynamicReplicas bool EnabledReplicas *int32 WorkloadName string ContainerNames []string GenWorkload bool // Pod mutating webhook can not get Pod UID sometimes, // thus need pod controller to set the owner reference PendingSetPodAsOwner bool }