utils

package

v1.4.2 Latest Latest Go to latest Published: Jul 16, 2025 License: Apache-2.0, BSD-2-Clause, MIT Imports: 33 Imported by: 27

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/ray-project/kuberay

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
func CalculateAvailableReplicas(pods corev1.PodList) int32
func CalculateDesiredReplicas(ctx context.Context, cluster *rayv1.RayCluster) int32
func CalculateDesiredResources(cluster *rayv1.RayCluster) corev1.ResourceList
func CalculateMaxReplicas(cluster *rayv1.RayCluster) int32
func CalculateMinReplicas(cluster *rayv1.RayCluster) int32
func CalculateMinResources(cluster *rayv1.RayCluster) corev1.ResourceList
func CalculatePodResource(podSpec corev1.PodSpec) corev1.ResourceList
func CalculateReadyReplicas(pods corev1.PodList) int32
func CheckAllPodsRunning(ctx context.Context, runningPods corev1.PodList) bool
func CheckLabel(s string) string
func CheckName(s string) string
func CheckRouteName(ctx context.Context, s string, n string) string
func CompareJsonStruct(objA interface{}, objB interface{}) bool
func Contains(elems []string, searchTerm string) bool
func ConvertResourceListToMapString(resourceList corev1.ResourceList) map[string]resource.Quantity
func EnvVarByName(envName string, envVars []corev1.EnvVar) (corev1.EnvVar, bool)
func EnvVarExists(envName string, envVars []corev1.EnvVar) bool
func ExtractRayIPFromFQDN(fqdnRayIP string) string
func FetchHeadServiceURL(ctx context.Context, cli client.Client, rayCluster *rayv1.RayCluster, ...) (string, error)
func FindContainerPort(container *corev1.Container, portName string, defaultPort int) int
func FindHeadPodReadyCondition(headPod *corev1.Pod) metav1.Condition
func FindRayClusterSuspendStatus(instance *rayv1.RayCluster) rayv1.RayClusterConditionType
func FormatInt32(n int32) string
func GenerateFQDNServiceName(ctx context.Context, cluster rayv1.RayCluster, namespace string) string
func GenerateHeadServiceName(crdType CRDType, clusterSpec rayv1.RayClusterSpec, ownerName string) (string, error)
func GenerateIdentifier(clusterName string, nodeType rayv1.RayNodeType) string
func GenerateIngressName(clusterName string) string
func GenerateJsonHash(obj interface{}) (string, error)
func GenerateRayClusterName(serviceName string) string
func GenerateRayJobId(rayjob string) string
func GenerateRouteName(clusterName string) string
func GenerateServeServiceLabel(serviceName string) string
func GenerateServeServiceName(serviceName string) string
func GetClusterDomainName() string
func GetClusterType() bool
func GetContainerCommand(additionalOptions []string) []string
func GetHeadGroupServiceAccountName(cluster *rayv1.RayCluster) string
func GetNamespace(metaData metav1.ObjectMeta) string
func GetRayClusterNameFromService(svc *corev1.Service) string
func GetRayDashboardClientFunc(mgr ctrl.Manager, useKubernetesProxy bool) func() RayDashboardClientInterface
func GetRayHttpProxyClientFunc(mgr ctrl.Manager, useKubernetesProxy bool) func() RayHttpProxyClientInterface
func GetWorkerGroupDesiredReplicas(ctx context.Context, workerGroupSpec rayv1.WorkerGroupSpec) int32
func IsAutoscalingEnabled(spec *rayv1.RayClusterSpec) bool
func IsAutoscalingV2Enabled(spec *rayv1.RayClusterSpec) bool
func IsCreated(pod *corev1.Pod) bool
func IsGCSFaultToleranceEnabled(spec *rayv1.RayClusterSpec, annotations map[string]string) bool
func IsJobFinished(j *batchv1.Job) (batchv1.JobConditionType, bool)
func IsRunningAndReady(pod *corev1.Pod) bool
func ManagedByExternalController(controllerName *string) *string
func PodName(prefix string, nodeType rayv1.RayNodeType, isGenerateName bool) string
func RayClusterReplicaFailureReason(err error) string
func RayOriginatedFromCRDLabelValue(crdType CRDType) string
func SafeUint64ToInt64(n uint64) int64
func TrimJobName(jobName string) string
func ValidateRayClusterMetadata(metadata metav1.ObjectMeta) error
func ValidateRayClusterSpec(spec *rayv1.RayClusterSpec, annotations map[string]string) error
func ValidateRayClusterStatus(instance *rayv1.RayCluster) error
func ValidateRayJobMetadata(metadata metav1.ObjectMeta) error
func ValidateRayJobSpec(rayJob *rayv1.RayJob) error
func ValidateRayJobStatus(rayJob *rayv1.RayJob) error
func ValidateRayServiceMetadata(metadata metav1.ObjectMeta) error
func ValidateRayServiceSpec(rayService *rayv1.RayService) error
type BaseDashboardClient
type CRDType
- func GetCRDType(key string) CRDType
type ClientProvider
type FakeRayDashboardClient
- func (r *FakeRayDashboardClient) DeleteJob(_ context.Context, _ string) error
- func (r *FakeRayDashboardClient) GetJobInfo(ctx context.Context, jobId string) (*RayJobInfo, error)
- func (r *FakeRayDashboardClient) GetJobLog(_ context.Context, _ string) (*string, error)
- func (r *FakeRayDashboardClient) GetMultiApplicationStatus(_ context.Context) (map[string]*ServeApplicationStatus, error)
- func (r *FakeRayDashboardClient) GetServeDetails(_ context.Context) (*ServeDetails, error)
- func (r *FakeRayDashboardClient) InitClient(_ context.Context, url string, _ *rayv1.RayCluster) error
- func (r *FakeRayDashboardClient) ListJobs(ctx context.Context) (*[]RayJobInfo, error)
- func (r *FakeRayDashboardClient) SetMultiApplicationStatuses(statuses map[string]*ServeApplicationStatus)
- func (r *FakeRayDashboardClient) StopJob(_ context.Context, _ string) (err error)
- func (r *FakeRayDashboardClient) SubmitJob(_ context.Context, _ *rayv1.RayJob) (jobId string, err error)
- func (r *FakeRayDashboardClient) SubmitJobReq(_ context.Context, _ *RayJobRequest, _ *string) (string, error)
- func (r *FakeRayDashboardClient) UpdateDeployments(_ context.Context, _ []byte) error
type FakeRayHttpProxyClient
- func (fc *FakeRayHttpProxyClient) CheckProxyActorHealth(_ context.Context) error
- func (fc *FakeRayHttpProxyClient) InitClient()
- func (fc *FakeRayHttpProxyClient) SetHostIp(_, _, _ string, _ int)
type K8sEventType
type RayDashboardClient
- func (r *RayDashboardClient) ConvertServeDetailsToApplicationStatuses(serveDetails *ServeDetails) (map[string]*ServeApplicationStatus, error)
- func (r *RayDashboardClient) DeleteJob(ctx context.Context, jobName string) error
- func (r *RayDashboardClient) GetJobInfo(ctx context.Context, jobId string) (*RayJobInfo, error)
- func (r *RayDashboardClient) GetJobLog(ctx context.Context, jobName string) (*string, error)
- func (r *RayDashboardClient) GetMultiApplicationStatus(ctx context.Context) (map[string]*ServeApplicationStatus, error)
- func (r *RayDashboardClient) GetServeDetails(ctx context.Context) (*ServeDetails, error)
- func (r *RayDashboardClient) InitClient(ctx context.Context, url string, rayCluster *rayv1.RayCluster) error
- func (r *RayDashboardClient) ListJobs(ctx context.Context) (*[]RayJobInfo, error)
- func (r *RayDashboardClient) StopJob(ctx context.Context, jobName string) (err error)
- func (r *RayDashboardClient) SubmitJob(ctx context.Context, rayJob *rayv1.RayJob) (jobId string, err error)
- func (r *RayDashboardClient) SubmitJobReq(ctx context.Context, request *RayJobRequest, name *string) (jobId string, err error)
- func (r *RayDashboardClient) UpdateDeployments(ctx context.Context, configJson []byte) error
type RayDashboardClientInterface
type RayHttpProxyClient
- func (r *RayHttpProxyClient) CheckProxyActorHealth(ctx context.Context) error
- func (r *RayHttpProxyClient) InitClient()
- func (r *RayHttpProxyClient) SetHostIp(hostIp, podNamespace, podName string, port int)
type RayHttpProxyClientInterface
type RayJobInfo
type RayJobLogsResponse
type RayJobRequest
- func ConvertRayJobToReq(rayJob *rayv1.RayJob) (*RayJobRequest, error)
type RayJobResponse
type RayJobStopResponse
type RuntimeEnvType
- func UnmarshalRuntimeEnvYAML(runtimeEnvYAML string) (RuntimeEnvType, error)
type ServeApplicationDetails
type ServeApplicationStatus
type ServeDeploymentDetails
type ServeDeploymentStatus
type ServeDetails
type ServiceType

Constants ¶

View Source

const (

	// Default application name
	DefaultServeAppName = "default"

	// RayOriginatedFromCRNameLabelKey and RayOriginatedFromCRDLabelKey are the labels used to associate the root KubeRay Custom Resource.
	// [Example 1] If we create a RayJob named `myjob`, then (1) the RayCluster and (2) the submitter K8s Job will have a
	// `ray.io/originated-from-cr-name=myjob` and a `ray.io/originated-from-crd=RayJob` label.
	//
	// [Example 2] If we create a RayService named `mysvc`, then (1) the RayCluster and (2) the Kubernetes services managed by the RayService
	// will have a `ray.io/originated-from-cr-name=mysvc` and a `ray.io/originated-from-crd=RayService` label.
	RayOriginatedFromCRNameLabelKey          = "ray.io/originated-from-cr-name"
	RayOriginatedFromCRDLabelKey             = "ray.io/originated-from-crd"
	RayClusterLabelKey                       = "ray.io/cluster"
	RayNodeTypeLabelKey                      = "ray.io/node-type"
	RayNodeGroupLabelKey                     = "ray.io/group"
	RayNodeLabelKey                          = "ray.io/is-ray-node"
	RayIDLabelKey                            = "ray.io/identifier"
	RayClusterServingServiceLabelKey         = "ray.io/serve"
	RayClusterHeadlessServiceLabelKey        = "ray.io/headless-worker-svc"
	HashWithoutReplicasAndWorkersToDeleteKey = "ray.io/hash-without-replicas-and-workers-to-delete"
	NumWorkerGroupsKey                       = "ray.io/num-worker-groups"
	KubeRayVersion                           = "ray.io/kuberay-version"

	// In KubeRay, the Ray container must be the first application container in a head or worker Pod.
	RayContainerIndex = 0

	// Batch scheduling labels
	// TODO(tgaddair): consider making these part of the CRD
	RaySchedulerName                = "ray.io/scheduler-name"
	RayPriorityClassName            = "ray.io/priority-class-name"
	RayClusterGangSchedulingEnabled = "ray.io/gang-scheduling-enabled"

	// Ray GCS FT related annotations
	RayFTEnabledAnnotationKey         = "ray.io/ft-enabled"
	RayExternalStorageNSAnnotationKey = "ray.io/external-storage-namespace"

	// If this annotation is set to "true", the KubeRay operator will not modify the container's command.
	// However, the generated `ray start` command will still be stored in the container's environment variable
	// `KUBERAY_GEN_RAY_START_CMD`.
	RayOverwriteContainerCmdAnnotationKey = "ray.io/overwrite-container-cmd"

	// Finalizers for GCS fault tolerance
	GCSFaultToleranceRedisCleanupFinalizer = "ray.io/gcs-ft-redis-cleanup-finalizer"

	// EnableServeServiceKey is exclusively utilized to indicate if a RayCluster is directly used for serving.
	// See https://github.com/ray-project/kuberay/pull/1672 for more details.
	EnableServeServiceKey  = "ray.io/enable-serve-service"
	EnableServeServiceTrue = "true"

	EnableRayClusterServingServiceTrue  = "true"
	EnableRayClusterServingServiceFalse = "false"

	KubernetesApplicationNameLabelKey = "app.kubernetes.io/name"
	KubernetesCreatedByLabelKey       = "app.kubernetes.io/created-by"

	// Use as separator for pod name, for example, raycluster-small-size-worker-0
	DashSymbol = "-"

	// Use as default port
	DefaultClientPort               = 10001
	DefaultGcsServerPort            = 6379
	DefaultDashboardPort            = 8265
	DefaultMetricsPort              = 8080
	DefaultDashboardAgentListenPort = 52365
	DefaultServingPort              = 8000

	ClientPortName    = "client"
	GcsServerPortName = "gcs-server"
	DashboardPortName = "dashboard"
	MetricsPortName   = "metrics"
	ServingPortName   = "serve"

	// The default AppProtocol for Kubernetes service
	DefaultServiceAppProtocol = "tcp"

	// The default application name
	ApplicationName = "kuberay"

	// The default name for kuberay operator
	ComponentName = "kuberay-operator"

	// The default suffix for Headless Service for multi-host worker groups.
	// The full name will be of the form "${RayCluster_Name}-headless".
	HeadlessServiceSuffix = "headless"

	// Use as container env variable
	RAY_CLUSTER_NAME                        = "RAY_CLUSTER_NAME"
	RAY_IP                                  = "RAY_IP"
	FQ_RAY_IP                               = "FQ_RAY_IP"
	RAY_PORT                                = "RAY_PORT"
	RAY_ADDRESS                             = "RAY_ADDRESS"
	RAY_REDIS_ADDRESS                       = "RAY_REDIS_ADDRESS"
	REDIS_PASSWORD                          = "REDIS_PASSWORD"
	REDIS_USERNAME                          = "REDIS_USERNAME"
	RAY_DASHBOARD_ENABLE_K8S_DISK_USAGE     = "RAY_DASHBOARD_ENABLE_K8S_DISK_USAGE"
	RAY_EXTERNAL_STORAGE_NS                 = "RAY_external_storage_namespace"
	RAY_GCS_RPC_SERVER_RECONNECT_TIMEOUT_S  = "RAY_gcs_rpc_server_reconnect_timeout_s"
	RAY_TIMEOUT_MS_TASK_WAIT_FOR_DEATH_INFO = "RAY_timeout_ms_task_wait_for_death_info"
	RAY_GCS_SERVER_REQUEST_TIMEOUT_SECONDS  = "RAY_gcs_server_request_timeout_seconds"
	RAY_SERVE_KV_TIMEOUT_S                  = "RAY_SERVE_KV_TIMEOUT_S"
	RAY_USAGE_STATS_KUBERAY_IN_USE          = "RAY_USAGE_STATS_KUBERAY_IN_USE"
	RAY_USAGE_STATS_EXTRA_TAGS              = "RAY_USAGE_STATS_EXTRA_TAGS"
	RAYCLUSTER_DEFAULT_REQUEUE_SECONDS_ENV  = "RAYCLUSTER_DEFAULT_REQUEUE_SECONDS_ENV"
	RAYCLUSTER_DEFAULT_REQUEUE_SECONDS      = 300
	KUBERAY_GEN_RAY_START_CMD               = "KUBERAY_GEN_RAY_START_CMD"

	// Environment variables for RayJob submitter Kubernetes Job.
	// Example: ray job submit --address=http://$RAY_DASHBOARD_ADDRESS --submission-id=$RAY_JOB_SUBMISSION_ID ...
	RAY_DASHBOARD_ADDRESS = "RAY_DASHBOARD_ADDRESS"
	RAY_JOB_SUBMISSION_ID = "RAY_JOB_SUBMISSION_ID"

	// Environment variables for Ray Autoscaler V2.
	// The value of RAY_CLOUD_INSTANCE_ID is the Pod name for Autoscaler V2 alpha. This may change in the future.
	RAY_CLOUD_INSTANCE_ID = "RAY_CLOUD_INSTANCE_ID"
	// The value of RAY_NODE_TYPE_NAME is the name of the node group (i.e., the value of the "ray.io/group" label).
	RAY_NODE_TYPE_NAME       = "RAY_NODE_TYPE_NAME"
	RAY_ENABLE_AUTOSCALER_V2 = "RAY_enable_autoscaler_v2"

	// This KubeRay operator environment variable is used to determine if random Pod
	// deletion should be enabled. Note that this only takes effect when autoscaling
	// is enabled for the RayCluster. This is a feature flag for v0.6.0, and will be
	// removed if the default behavior is stable enoguh.
	ENABLE_RANDOM_POD_DELETE = "ENABLE_RANDOM_POD_DELETE"

	// This KubeRay operator environment variable is used to determine if the Redis
	// cleanup Job should be enabled. This is a feature flag for v1.0.0.
	ENABLE_GCS_FT_REDIS_CLEANUP = "ENABLE_GCS_FT_REDIS_CLEANUP"

	// This environment variable for the KubeRay operator is used to determine whether to enable
	// the injection of readiness and liveness probes into Ray head and worker containers.
	// Enabling this feature contributes to the robustness of Ray clusters. It is currently a feature
	// flag for v1.1.0 and will be removed if the behavior proves to be stable enough.
	ENABLE_PROBES_INJECTION = "ENABLE_PROBES_INJECTION"

	// This KubeRay operator environment variable is used to determine
	// if operator should treat OpenShift cluster as Vanilla Kubernetes.
	USE_INGRESS_ON_OPENSHIFT = "USE_INGRESS_ON_OPENSHIFT"

	// If set to true, kuberay creates a normal ClusterIP service for a Ray Head instead of a Headless service.
	ENABLE_RAY_HEAD_CLUSTER_IP_SERVICE = "ENABLE_RAY_HEAD_CLUSTER_IP_SERVICE"

	// If set to true, the RayJob CR itself will be deleted if shutdownAfterJobFinishes is set to true. Note that all resources created by the RayJob CR will be deleted, including the K8s Job.
	DELETE_RAYJOB_CR_AFTER_JOB_FINISHES = "DELETE_RAYJOB_CR_AFTER_JOB_FINISHES"

	// If this occurs, it is likely due to a system-level issue (e.g., a Ray bug) that prevents the
	// `ray job submit` process in the Kubernetes Job submitter from exiting.
	RAYJOB_DEPLOYMENT_STATUS_TRANSITION_GRACE_PERIOD_SECONDS         = "RAYJOB_DEPLOYMENT_STATUS_TRANSITION_GRACE_PERIOD_SECONDS"
	DEFAULT_RAYJOB_DEPLOYMENT_STATUS_TRANSITION_GRACE_PERIOD_SECONDS = 300

	// This environment variable for the KubeRay operator determines whether to enable
	// a login shell by passing the -l option to the container command /bin/bash.
	// The -l flag was added by default before KubeRay v1.4.0, but it is no longer added
	// by default starting with v1.4.0.
	ENABLE_LOGIN_SHELL = "ENABLE_LOGIN_SHELL"

	// Ray core default configurations
	DefaultWorkerRayGcsReconnectTimeoutS = "600"

	LOCAL_HOST = "127.0.0.1"
	// Ray FT default readiness probe values
	DefaultReadinessProbeInitialDelaySeconds = 10
	DefaultReadinessProbeTimeoutSeconds      = 2
	// Probe timeout for Head pod needs to be longer as it queries two endpoints (api/local_raylet_healthz & api/gcs_healthz)
	DefaultHeadReadinessProbeTimeoutSeconds = 5
	DefaultReadinessProbePeriodSeconds      = 5
	DefaultReadinessProbeSuccessThreshold   = 1
	DefaultReadinessProbeFailureThreshold   = 10
	ServeReadinessProbeFailureThreshold     = 1

	// Ray FT default liveness probe values
	DefaultLivenessProbeInitialDelaySeconds = 30
	DefaultLivenessProbeTimeoutSeconds      = 2
	// Probe timeout for Head pod needs to be longer as it queries two endpoints (api/local_raylet_healthz & api/gcs_healthz)
	DefaultHeadLivenessProbeTimeoutSeconds = 5
	DefaultLivenessProbePeriodSeconds      = 5
	DefaultLivenessProbeSuccessThreshold   = 1
	DefaultLivenessProbeFailureThreshold   = 120

	// Ray health check related configurations
	// Note: Since the Raylet process and the dashboard agent process are fate-sharing,
	// only one of them needs to be checked. So, RayAgentRayletHealthPath accesses the dashboard agent's API endpoint
	// to check the health of the Raylet process.
	// TODO (kevin85421): Should we take the dashboard process into account?
	RayAgentRayletHealthPath  = "api/local_raylet_healthz"
	RayDashboardGCSHealthPath = "api/gcs_healthz"
	RayServeProxyHealthPath   = "-/healthz"
	BaseWgetHealthCommand     = "wget --tries 1 -T %d -q -O- http://localhost:%d/%s | grep success"

	// Finalizers for RayJob
	RayJobStopJobFinalizer = "ray.io/rayjob-finalizer"

	// RayNodeHeadGroupLabelValue is the value for the RayNodeGroupLabelKey label on a head node
	RayNodeHeadGroupLabelValue = "headgroup"

	// KUBERAY_VERSION is the build version of KubeRay.
	// The version is included in the RAY_USAGE_STATS_EXTRA_TAGS environment variable
	// as well as the user-agent. This constant is updated before release.
	// TODO: Update KUBERAY_VERSION to be a build-time variable.
	KUBERAY_VERSION = "v1.4.2"

	// KubeRayController represents the value of the default job controller
	KubeRayController = "ray.io/kuberay-operator"

	ServeConfigLRUSize = 1000

	// MaxRayClusterNameLength is the maximum RayCluster name to make sure we don't truncate
	// their k8s service names. Currently, "-serve-svc" is the longest service suffix:
	// 63 - len("-serve-svc") == 53, so the name should not be longer than 53 characters.
	MaxRayClusterNameLength = 53
	// MaxRayServiceNameLength is the maximum RayService name to make sure it pass the RayCluster validation.
	// Minus 6 since we append 6 characters to the RayService name to create the cluster (GenerateRayClusterName).
	MaxRayServiceNameLength = MaxRayClusterNameLength - 6
	// MaxRayJobNameLength is the maximum RayJob name to make sure it pass the RayCluster validation
	// Minus 6 since we append 6 characters to the RayJob name to create the cluster (GenerateRayClusterName).
	MaxRayJobNameLength = MaxRayClusterNameLength - 6
)

View Source

const (
	ServeName           = "serve"
	ClusterDomainEnvKey = "CLUSTER_DOMAIN"
	DefaultDomainName   = "cluster.local"
)

Variables ¶

View Source

var (
	ErrFailedDeleteAllPods   = &errRayClusterReplicaFailure{reason: "FailedDeleteAllPods"}
	ErrFailedDeleteHeadPod   = &errRayClusterReplicaFailure{reason: "FailedDeleteHeadPod"}
	ErrFailedCreateHeadPod   = &errRayClusterReplicaFailure{reason: "FailedCreateHeadPod"}
	ErrFailedDeleteWorkerPod = &errRayClusterReplicaFailure{reason: "FailedDeleteWorkerPod"}
	ErrFailedCreateWorkerPod = &errRayClusterReplicaFailure{reason: "FailedCreateWorkerPod"}
)

These are markers used by the calculateStatus() for setting the RayClusterReplicaFailure condition.

View Source

var (
	// Multi-application URL paths
	ServeDetailsPath = "/api/serve/applications/"
	DeployPathV2     = "/api/serve/applications/"
	// Job URL paths
	JobPath = "/api/jobs/"
)

Functions ¶

func CalculateAvailableReplicas ¶

func CalculateAvailableReplicas(pods corev1.PodList) int32

CalculateAvailableReplicas calculates available worker replicas at the cluster level A worker is available if its Pod is running

func CalculateDesiredReplicas ¶

func CalculateDesiredReplicas(ctx context.Context, cluster *rayv1.RayCluster) int32

CalculateDesiredReplicas calculate desired worker replicas at the cluster level

func CalculateDesiredResources ¶

func CalculateDesiredResources(cluster *rayv1.RayCluster) corev1.ResourceList

func CalculateMaxReplicas ¶

func CalculateMaxReplicas(cluster *rayv1.RayCluster) int32

CalculateMaxReplicas calculates max worker replicas at the cluster level

func CalculateMinReplicas ¶

func CalculateMinReplicas(cluster *rayv1.RayCluster) int32

CalculateMinReplicas calculates min worker replicas at the cluster level

func CalculateMinResources ¶

func CalculateMinResources(cluster *rayv1.RayCluster) corev1.ResourceList

func CalculatePodResource ¶ added in v1.2.2

func CalculatePodResource(podSpec corev1.PodSpec) corev1.ResourceList

CalculatePodResource returns the total resources of a pod. Request values take precedence over limit values.

func CalculateReadyReplicas ¶ added in v1.2.0

func CalculateReadyReplicas(pods corev1.PodList) int32

CalculateReadyReplicas calculates ready worker replicas at the cluster level A worker is ready if its Pod has a PodCondition with type == Ready and status == True

func CheckAllPodsRunning ¶ added in v0.6.0

func CheckAllPodsRunning(ctx context.Context, runningPods corev1.PodList) bool

CheckAllPodsRunning returns true if all the RayCluster's Pods are running, false otherwise

func CheckLabel ¶

func CheckLabel(s string) string

CheckLabel makes sure the label value does not start with a punctuation and the total length is < 63 char

func CheckName ¶

func CheckName(s string) string

CheckName makes sure the name does not start with a numeric value and the total length is < 63 char

func CheckRouteName ¶ added in v1.1.0

func CheckRouteName(ctx context.Context, s string, n string) string

func CompareJsonStruct ¶

func CompareJsonStruct(objA interface{}, objB interface{}) bool

CompareJsonStruct This is a way to better compare if two objects are the same when they are json/yaml structs. reflect.DeepEqual will fail in some cases.

func Contains ¶

func Contains(elems []string, searchTerm string) bool

func ConvertResourceListToMapString ¶ added in v1.2.2

func ConvertResourceListToMapString(resourceList corev1.ResourceList) map[string]resource.Quantity

func EnvVarByName ¶ added in v1.1.0

func EnvVarByName(envName string, envVars []corev1.EnvVar) (corev1.EnvVar, bool)

EnvVarByName returns an entry in []corev1.EnvVar that matches a name. Also returns a bool for whether the env var exists.

func EnvVarExists ¶ added in v1.1.0

func EnvVarExists(envName string, envVars []corev1.EnvVar) bool

func ExtractRayIPFromFQDN ¶

func ExtractRayIPFromFQDN(fqdnRayIP string) string

ExtractRayIPFromFQDN extracts the head service name (i.e., RAY_IP, deprecated) from a fully qualified domain name (FQDN). This function is provided for backward compatibility purposes only.

func FetchHeadServiceURL ¶ added in v0.6.0

func FetchHeadServiceURL(ctx context.Context, cli client.Client, rayCluster *rayv1.RayCluster, defaultPortName string) (string, error)

FetchHeadServiceURL fetches the URL that consists of the FQDN for the RayCluster's head service and the port with the given port name (defaultPortName).

func FindContainerPort ¶ added in v0.6.0

func FindContainerPort(container *corev1.Container, portName string, defaultPort int) int

FindContainerPort searches for a specific port $portName in the container. If the port is found in the container, the corresponding port is returned. If the port is not found, the $defaultPort is returned instead.

func FindHeadPodReadyCondition ¶ added in v1.2.0

func FindHeadPodReadyCondition(headPod *corev1.Pod) metav1.Condition

func FindRayClusterSuspendStatus ¶ added in v1.3.0

func FindRayClusterSuspendStatus(instance *rayv1.RayCluster) rayv1.RayClusterConditionType

FindRayClusterSuspendStatus returns the current suspend status from two conditions:

rayv1.RayClusterSuspending
rayv1.RayClusterSuspended

The two conditions should not be both True at the same time. The transition logic should be the following:

rayv1.RayClusterSuspending:
  False by default
  False -> True: when `spec.Suspend` is true.
  True -> False: when all Pods are deleted, set rayv1.RayClusterSuspended from False to True.
rayv1.RayClusterSuspended
  False by default
  False -> True: when suspending transitions from True to False
  True -> False: when `spec.Suspend` is false.

If both rayv1.RayClusterSuspending and rayv1.RayClusterSuspended are False, FindRayClusterSuspendStatus returns "".

func FormatInt32 ¶

func FormatInt32(n int32) string

FormatInt returns the string representation of i in the given base, for 2 <= base <= 36. The result uses the lower-case letters 'a' to 'z' for digit values >= 10.

func GenerateFQDNServiceName ¶

func GenerateFQDNServiceName(ctx context.Context, cluster rayv1.RayCluster, namespace string) string

GenerateFQDNServiceName generates a Fully Qualified Domain Name.

func GenerateHeadServiceName ¶ added in v1.0.0

func GenerateHeadServiceName(crdType CRDType, clusterSpec rayv1.RayClusterSpec, ownerName string) (string, error)

GenerateHeadServiceName generates a Ray head service name. Note that there are two types of head services:

(1) For RayCluster: If `HeadService.Name` in the cluster spec is not empty, it will be used as the head service name. Otherwise, the name is generated based on the RayCluster CR's name. (2) For RayService: It's important to note that the RayService CR not only possesses a head service owned by its RayCluster CR but also maintains a separate head service for itself to facilitate zero-downtime upgrades. The name of the head service owned by the RayService CR is generated based on the RayService CR's name.

@param crdType: The type of the CRD that owns the head service. @param clusterSpec: `RayClusterSpec` @param ownerName: The name of the CR that owns the head service.

func GenerateIdentifier ¶

func GenerateIdentifier(clusterName string, nodeType rayv1.RayNodeType) string

GenerateIdentifier generates identifier of same group pods

func GenerateIngressName ¶

func GenerateIngressName(clusterName string) string

GenerateIngressName generates an ingress name from cluster name

func GenerateJsonHash ¶

func GenerateJsonHash(obj interface{}) (string, error)

Json-serializes obj and returns its hash string

func GenerateRayClusterName ¶

func GenerateRayClusterName(serviceName string) string

GenerateRayClusterName generates a ray cluster name from ray service name

func GenerateRayJobId ¶

func GenerateRayJobId(rayjob string) string

GenerateRayJobId generates a ray job id for submission

func GenerateRouteName ¶ added in v1.0.0

func GenerateRouteName(clusterName string) string

GenerateRouteName generates an ingress name from cluster name

func GenerateServeServiceLabel ¶

func GenerateServeServiceLabel(serviceName string) string

GenerateServeServiceLabel generates label value for serve service selector.

func GenerateServeServiceName ¶

func GenerateServeServiceName(serviceName string) string

GenerateServeServiceName generates name for serve service.

func GetClusterDomainName ¶

func GetClusterDomainName() string

GetClusterDomainName returns cluster's domain name

func GetClusterType ¶ added in v1.4.0

func GetClusterType() bool

Check where we are running. We are trying to distinguish here whether this is vanilla kubernetes cluster or Openshift

func GetContainerCommand ¶ added in v1.4.0

func GetContainerCommand(additionalOptions []string) []string

func GetHeadGroupServiceAccountName ¶

func GetHeadGroupServiceAccountName(cluster *rayv1.RayCluster) string

GetHeadGroupServiceAccountName returns the head group service account if it exists. Otherwise, it returns the name of the cluster itself.

func GetNamespace ¶

func GetNamespace(metaData metav1.ObjectMeta) string

GetNamespace return namespace

func GetRayClusterNameFromService ¶ added in v1.3.0

func GetRayClusterNameFromService(svc *corev1.Service) string

GetRayClusterNameFromService returns the name of the RayCluster that the service points to

func GetRayDashboardClientFunc ¶

func GetRayDashboardClientFunc(mgr ctrl.Manager, useKubernetesProxy bool) func() RayDashboardClientInterface

func GetRayHttpProxyClientFunc ¶

func GetRayHttpProxyClientFunc(mgr ctrl.Manager, useKubernetesProxy bool) func() RayHttpProxyClientInterface

func GetWorkerGroupDesiredReplicas ¶ added in v1.0.0

func GetWorkerGroupDesiredReplicas(ctx context.Context, workerGroupSpec rayv1.WorkerGroupSpec) int32

func IsAutoscalingEnabled ¶ added in v1.3.0

func IsAutoscalingEnabled(spec *rayv1.RayClusterSpec) bool

func IsAutoscalingV2Enabled ¶ added in v1.4.0

func IsAutoscalingV2Enabled(spec *rayv1.RayClusterSpec) bool

func IsCreated ¶

func IsCreated(pod *corev1.Pod) bool

IsCreated returns true if pod has been created and is maintained by the API server

func IsGCSFaultToleranceEnabled ¶ added in v1.3.0

func IsGCSFaultToleranceEnabled(spec *rayv1.RayClusterSpec, annotations map[string]string) bool

Check if the RayCluster has GCS fault tolerance enabled.

func IsJobFinished ¶ added in v1.1.0

func IsJobFinished(j *batchv1.Job) (batchv1.JobConditionType, bool)

IsJobFinished checks whether the given Job has finished execution. It does not discriminate between successful and failed terminations. src: https://github.com/kubernetes/kubernetes/blob/a8a1abc25cad87333840cd7d54be2efaf31a3177/pkg/controller/job/utils.go#L26

func IsRunningAndReady ¶

func IsRunningAndReady(pod *corev1.Pod) bool

IsRunningAndReady returns true if pod is in the PodRunning Phase, if it has a condition of PodReady.

func ManagedByExternalController ¶ added in v1.3.0

func ManagedByExternalController(controllerName *string) *string

func PodName ¶ added in v1.4.0

func PodName(prefix string, nodeType rayv1.RayNodeType, isGenerateName bool) string

PodName returns the value that should be used for a Pod's Name or GenerateName based on the RayCluster name and node type (head or worker).

func RayClusterReplicaFailureReason ¶ added in v1.2.0

func RayClusterReplicaFailureReason(err error) string

func RayOriginatedFromCRDLabelValue ¶ added in v1.1.0

func RayOriginatedFromCRDLabelValue(crdType CRDType) string

RayOriginatedFromCRDLabelValue generates a value for the label RayOriginatedFromCRDLabelKey This is also the only function to construct label filter of resources originated from a given CRDType.

func SafeUint64ToInt64 ¶ added in v1.4.0

func SafeUint64ToInt64(n uint64) int64

SafeUint64ToInt64 safely converts a uint64 to int64. If the uint64 value exceeds the maximum int64 value, the function will panic.

func TrimJobName ¶ added in v1.3.0

func TrimJobName(jobName string) string

TrimJobName uses CheckLabel to trim Kubernetes job to constrains

func ValidateRayClusterMetadata ¶ added in v1.4.0

func ValidateRayClusterMetadata(metadata metav1.ObjectMeta) error

func ValidateRayClusterSpec ¶ added in v1.3.0

func ValidateRayClusterSpec(spec *rayv1.RayClusterSpec, annotations map[string]string) error

Validation for invalid Ray Cluster configurations.

func ValidateRayClusterStatus ¶ added in v1.3.0

func ValidateRayClusterStatus(instance *rayv1.RayCluster) error

func ValidateRayJobMetadata ¶ added in v1.4.0

func ValidateRayJobMetadata(metadata metav1.ObjectMeta) error

func ValidateRayJobSpec ¶ added in v1.3.0

func ValidateRayJobSpec(rayJob *rayv1.RayJob) error

func ValidateRayJobStatus ¶ added in v1.3.0

func ValidateRayJobStatus(rayJob *rayv1.RayJob) error

func ValidateRayServiceMetadata ¶ added in v1.4.0

func ValidateRayServiceMetadata(metadata metav1.ObjectMeta) error

func ValidateRayServiceSpec ¶ added in v1.3.0

func ValidateRayServiceSpec(rayService *rayv1.RayService) error

Types ¶

type BaseDashboardClient ¶ added in v0.6.0

type BaseDashboardClient struct {
	// contains filtered or unexported fields
}

type CRDType ¶ added in v1.0.0

type CRDType string

TODO (kevin85421): Define CRDType here rather than constant.go to avoid circular dependency.

const (
	RayClusterCRD CRDType = "RayCluster"
	RayJobCRD     CRDType = "RayJob"
	RayServiceCRD CRDType = "RayService"
)

func GetCRDType ¶ added in v1.1.0

func GetCRDType(key string) CRDType

type ClientProvider ¶ added in v1.2.0

type ClientProvider interface {
	GetDashboardClient(mgr manager.Manager) func() RayDashboardClientInterface
	GetHttpProxyClient(mgr manager.Manager) func() RayHttpProxyClientInterface
}

type FakeRayDashboardClient ¶

type FakeRayDashboardClient struct {
	GetJobInfoMock atomic.Pointer[func(context.Context, string) (*RayJobInfo, error)]
	BaseDashboardClient
	// contains filtered or unexported fields
}

func (*FakeRayDashboardClient) DeleteJob ¶ added in v1.1.0

func (r *FakeRayDashboardClient) DeleteJob(_ context.Context, _ string) error

func (*FakeRayDashboardClient) GetJobInfo ¶

func (r *FakeRayDashboardClient) GetJobInfo(ctx context.Context, jobId string) (*RayJobInfo, error)

func (*FakeRayDashboardClient) GetJobLog ¶ added in v1.1.0

func (r *FakeRayDashboardClient) GetJobLog(_ context.Context, _ string) (*string, error)

func (*FakeRayDashboardClient) GetMultiApplicationStatus ¶ added in v0.6.0

func (r *FakeRayDashboardClient) GetMultiApplicationStatus(_ context.Context) (map[string]*ServeApplicationStatus, error)

func (*FakeRayDashboardClient) GetServeDetails ¶ added in v0.6.0

func (r *FakeRayDashboardClient) GetServeDetails(_ context.Context) (*ServeDetails, error)

func (*FakeRayDashboardClient) InitClient ¶

func (r *FakeRayDashboardClient) InitClient(_ context.Context, url string, _ *rayv1.RayCluster) error

func (*FakeRayDashboardClient) ListJobs ¶ added in v1.1.0

func (r *FakeRayDashboardClient) ListJobs(ctx context.Context) (*[]RayJobInfo, error)

func (*FakeRayDashboardClient) SetMultiApplicationStatuses ¶ added in v0.6.0

func (r *FakeRayDashboardClient) SetMultiApplicationStatuses(statuses map[string]*ServeApplicationStatus)

func (*FakeRayDashboardClient) StopJob ¶

func (r *FakeRayDashboardClient) StopJob(_ context.Context, _ string) (err error)

func (*FakeRayDashboardClient) SubmitJob ¶

func (r *FakeRayDashboardClient) SubmitJob(_ context.Context, _ *rayv1.RayJob) (jobId string, err error)

func (*FakeRayDashboardClient) SubmitJobReq ¶ added in v1.1.0

func (r *FakeRayDashboardClient) SubmitJobReq(_ context.Context, _ *RayJobRequest, _ *string) (string, error)

func (*FakeRayDashboardClient) UpdateDeployments ¶

func (r *FakeRayDashboardClient) UpdateDeployments(_ context.Context, _ []byte) error

type FakeRayHttpProxyClient ¶

type FakeRayHttpProxyClient struct {
	IsHealthy bool
}

func (*FakeRayHttpProxyClient) CheckProxyActorHealth ¶ added in v1.2.0

func (fc *FakeRayHttpProxyClient) CheckProxyActorHealth(_ context.Context) error

func (*FakeRayHttpProxyClient) InitClient ¶

func (fc *FakeRayHttpProxyClient) InitClient()

func (*FakeRayHttpProxyClient) SetHostIp ¶

func (fc *FakeRayHttpProxyClient) SetHostIp(_, _, _ string, _ int)

type K8sEventType ¶ added in v1.2.2

type K8sEventType string

Currently, KubeRay fires events when failures occur during the creation or deletion of resources.

const (
	// RayCluster event list
	InvalidRayClusterStatus   K8sEventType = "InvalidRayClusterStatus"
	InvalidRayClusterSpec     K8sEventType = "InvalidRayClusterSpec"
	InvalidRayClusterMetadata K8sEventType = "InvalidRayClusterMetadata"
	// Head Pod event list
	CreatedHeadPod        K8sEventType = "CreatedHeadPod"
	FailedToCreateHeadPod K8sEventType = "FailedToCreateHeadPod"
	DeletedHeadPod        K8sEventType = "DeletedHeadPod"
	FailedToDeleteHeadPod K8sEventType = "FailedToDeleteHeadPod"

	// Worker Pod event list
	CreatedWorkerPod                  K8sEventType = "CreatedWorkerPod"
	FailedToCreateWorkerPod           K8sEventType = "FailedToCreateWorkerPod"
	DeletedWorkerPod                  K8sEventType = "DeletedWorkerPod"
	FailedToDeleteWorkerPod           K8sEventType = "FailedToDeleteWorkerPod"
	FailedToDeleteWorkerPodCollection K8sEventType = "FailedToDeleteWorkerPodCollection"

	// Redis Cleanup Job event list
	CreatedRedisCleanupJob        K8sEventType = "CreatedRedisCleanupJob"
	FailedToCreateRedisCleanupJob K8sEventType = "FailedToCreateRedisCleanupJob"

	// RayJob event list
	InvalidRayJobSpec             K8sEventType = "InvalidRayJobSpec"
	InvalidRayJobMetadata         K8sEventType = "InvalidRayJobMetadata"
	InvalidRayJobStatus           K8sEventType = "InvalidRayJobStatus"
	CreatedRayJobSubmitter        K8sEventType = "CreatedRayJobSubmitter"
	DeletedRayJobSubmitter        K8sEventType = "DeletedRayJobSubmitter"
	FailedToCreateRayJobSubmitter K8sEventType = "FailedToCreateRayJobSubmitter"
	FailedToDeleteRayJobSubmitter K8sEventType = "FailedToDeleteRayJobSubmitter"
	CreatedRayCluster             K8sEventType = "CreatedRayCluster"
	UpdatedRayCluster             K8sEventType = "UpdatedRayCluster"
	DeletedRayCluster             K8sEventType = "DeletedRayCluster"
	FailedToCreateRayCluster      K8sEventType = "FailedToCreateRayCluster"
	FailedToDeleteRayCluster      K8sEventType = "FailedToDeleteRayCluster"
	FailedToUpdateRayCluster      K8sEventType = "FailedToUpdateRayCluster"

	// RayService event list
	InvalidRayServiceSpec           K8sEventType = "InvalidRayServiceSpec"
	InvalidRayServiceMetadata       K8sEventType = "InvalidRayServiceMetadata"
	UpdatedHeadPodServeLabel        K8sEventType = "UpdatedHeadPodServeLabel"
	UpdatedServeApplications        K8sEventType = "UpdatedServeApplications"
	FailedToUpdateHeadPodServeLabel K8sEventType = "FailedToUpdateHeadPodServeLabel"
	FailedToUpdateServeApplications K8sEventType = "FailedToUpdateServeApplications"

	// Generic Pod event list
	DeletedPod                  K8sEventType = "DeletedPod"
	FailedToDeletePod           K8sEventType = "FailedToDeletePod"
	FailedToDeletePodCollection K8sEventType = "FailedToDeletePodCollection"

	// Ingress event list
	CreatedIngress        K8sEventType = "CreatedIngress"
	FailedToCreateIngress K8sEventType = "FailedToCreateIngress"

	// Route event list
	CreatedRoute        K8sEventType = "CreatedRoute"
	FailedToCreateRoute K8sEventType = "FailedToCreateRoute"

	// Service event list
	CreatedService        K8sEventType = "CreatedService"
	UpdatedService        K8sEventType = "UpdatedService"
	FailedToCreateService K8sEventType = "FailedToCreateService"
	FailedToUpdateService K8sEventType = "FailedToUpdateService"

	// ServiceAccount event list
	CreatedServiceAccount            K8sEventType = "CreatedServiceAccount"
	FailedToCreateServiceAccount     K8sEventType = "FailedToCreateServiceAccount"
	AutoscalerServiceAccountNotFound K8sEventType = "AutoscalerServiceAccountNotFound"

	// Role event list
	CreatedRole        K8sEventType = "CreatedRole"
	FailedToCreateRole K8sEventType = "FailedToCreateRole"

	// RoleBinding list
	CreatedRoleBinding        K8sEventType = "CreatedRoleBinding"
	FailedToCreateRoleBinding K8sEventType = "FailedToCreateRoleBinding"
)

type RayDashboardClient ¶

type RayDashboardClient struct {
	BaseDashboardClient
	// contains filtered or unexported fields
}

func (*RayDashboardClient) ConvertServeDetailsToApplicationStatuses ¶ added in v0.6.0

func (r *RayDashboardClient) ConvertServeDetailsToApplicationStatuses(serveDetails *ServeDetails) (map[string]*ServeApplicationStatus, error)

func (*RayDashboardClient) DeleteJob ¶ added in v1.1.0

func (r *RayDashboardClient) DeleteJob(ctx context.Context, jobName string) error

func (*RayDashboardClient) GetJobInfo ¶

func (r *RayDashboardClient) GetJobInfo(ctx context.Context, jobId string) (*RayJobInfo, error)

Note that RayJobInfo and error can't be nil at the same time. Please make sure if the Ray job with JobId can't be found. Return a BadRequest error.

func (*RayDashboardClient) GetJobLog ¶ added in v1.1.0

func (r *RayDashboardClient) GetJobLog(ctx context.Context, jobName string) (*string, error)

Get Job Log

func (*RayDashboardClient) GetMultiApplicationStatus ¶ added in v0.6.0

func (r *RayDashboardClient) GetMultiApplicationStatus(ctx context.Context) (map[string]*ServeApplicationStatus, error)

func (*RayDashboardClient) GetServeDetails ¶ added in v0.6.0

func (r *RayDashboardClient) GetServeDetails(ctx context.Context) (*ServeDetails, error)

GetServeDetails gets details on all live applications on the Ray cluster.

func (*RayDashboardClient) InitClient ¶

func (r *RayDashboardClient) InitClient(ctx context.Context, url string, rayCluster *rayv1.RayCluster) error

func (*RayDashboardClient) ListJobs ¶ added in v1.1.0

func (r *RayDashboardClient) ListJobs(ctx context.Context) (*[]RayJobInfo, error)

func (*RayDashboardClient) StopJob ¶

func (r *RayDashboardClient) StopJob(ctx context.Context, jobName string) (err error)

func (*RayDashboardClient) SubmitJob ¶

func (r *RayDashboardClient) SubmitJob(ctx context.Context, rayJob *rayv1.RayJob) (jobId string, err error)

func (*RayDashboardClient) SubmitJobReq ¶ added in v1.1.0

func (r *RayDashboardClient) SubmitJobReq(ctx context.Context, request *RayJobRequest, name *string) (jobId string, err error)

func (*RayDashboardClient) UpdateDeployments ¶

func (r *RayDashboardClient) UpdateDeployments(ctx context.Context, configJson []byte) error

UpdateDeployments update the deployments in the Ray cluster.

type RayDashboardClientInterface ¶

type RayDashboardClientInterface interface {
	InitClient(ctx context.Context, url string, rayCluster *rayv1.RayCluster) error
	UpdateDeployments(ctx context.Context, configJson []byte) error
	// V2/multi-app Rest API
	GetServeDetails(ctx context.Context) (*ServeDetails, error)
	GetMultiApplicationStatus(context.Context) (map[string]*ServeApplicationStatus, error)
	GetJobInfo(ctx context.Context, jobId string) (*RayJobInfo, error)
	ListJobs(ctx context.Context) (*[]RayJobInfo, error)
	SubmitJob(ctx context.Context, rayJob *rayv1.RayJob) (string, error)
	SubmitJobReq(ctx context.Context, request *RayJobRequest, name *string) (string, error)
	GetJobLog(ctx context.Context, jobName string) (*string, error)
	StopJob(ctx context.Context, jobName string) error
	DeleteJob(ctx context.Context, jobName string) error
}

type RayHttpProxyClient ¶

type RayHttpProxyClient struct {
	// contains filtered or unexported fields
}

func (*RayHttpProxyClient) CheckProxyActorHealth ¶ added in v1.2.0

func (r *RayHttpProxyClient) CheckProxyActorHealth(ctx context.Context) error

CheckProxyActorHealth checks the health status of the Ray Serve proxy actor.

func (*RayHttpProxyClient) InitClient ¶

func (r *RayHttpProxyClient) InitClient()

func (*RayHttpProxyClient) SetHostIp ¶

func (r *RayHttpProxyClient) SetHostIp(hostIp, podNamespace, podName string, port int)

type RayHttpProxyClientInterface ¶

type RayHttpProxyClientInterface interface {
	InitClient()
	CheckProxyActorHealth(ctx context.Context) error
	SetHostIp(hostIp, podNamespace, podName string, port int)
}

type RayJobInfo ¶

type RayJobInfo struct {
	ErrorType    *string           `json:"error_type,omitempty"`
	Metadata     map[string]string `json:"metadata,omitempty"`
	RuntimeEnv   RuntimeEnvType    `json:"runtime_env,omitempty"`
	JobStatus    rayv1.JobStatus   `json:"status,omitempty"`
	Entrypoint   string            `json:"entrypoint,omitempty"`
	JobId        string            `json:"job_id,omitempty"`
	SubmissionId string            `json:"submission_id,omitempty"`
	Message      string            `json:"message,omitempty"`
	StartTime    uint64            `json:"start_time,omitempty"`
	EndTime      uint64            `json:"end_time,omitempty"`
}

RayJobInfo is the response of "ray job status" api. Reference to https://docs.ray.io/en/latest/cluster/running-applications/job-submission/rest.html#ray-job-rest-api-spec Reference to https://github.com/ray-project/ray/blob/cfbf98c315cfb2710c56039a3c96477d196de049/dashboard/modules/job/pydantic_models.py#L38-L107

type RayJobLogsResponse ¶ added in v1.1.0

type RayJobLogsResponse struct {
	Logs string `json:"logs,omitempty"`
}

type RayJobRequest ¶

type RayJobRequest struct {
	RuntimeEnv   RuntimeEnvType     `json:"runtime_env,omitempty"`
	Metadata     map[string]string  `json:"metadata,omitempty"`
	Resources    map[string]float32 `json:"entrypoint_resources,omitempty"`
	Entrypoint   string             `json:"entrypoint"`
	SubmissionId string             `json:"submission_id,omitempty"`
	NumCpus      float32            `json:"entrypoint_num_cpus,omitempty"`
	NumGpus      float32            `json:"entrypoint_num_gpus,omitempty"`
}

RayJobRequest is the request body to submit. Reference to https://docs.ray.io/en/latest/cluster/running-applications/job-submission/rest.html#ray-job-rest-api-spec Reference to https://github.com/ray-project/ray/blob/cfbf98c315cfb2710c56039a3c96477d196de049/dashboard/modules/job/common.py#L325-L353

func ConvertRayJobToReq ¶

func ConvertRayJobToReq(rayJob *rayv1.RayJob) (*RayJobRequest, error)

type RayJobResponse ¶

type RayJobResponse struct {
	JobId string `json:"job_id"`
}

type RayJobStopResponse ¶

type RayJobStopResponse struct {
	Stopped bool `json:"stopped"`
}

type RuntimeEnvType ¶ added in v1.1.0

type RuntimeEnvType map[string]interface{}

func UnmarshalRuntimeEnvYAML ¶ added in v1.1.0

func UnmarshalRuntimeEnvYAML(runtimeEnvYAML string) (RuntimeEnvType, error)

type ServeApplicationDetails ¶ added in v0.6.0

type ServeApplicationDetails struct {
	Deployments map[string]ServeDeploymentDetails `json:"deployments"`
	ServeApplicationStatus
	RoutePrefix string `json:"route_prefix,omitempty"`
	DocsPath    string `json:"docs_path,omitempty"`
}

type ServeApplicationStatus ¶ added in v0.6.0

type ServeApplicationStatus struct {
	Deployments map[string]ServeDeploymentStatus `json:"deployments"`
	Name        string                           `json:"name,omitempty"`
	Status      string                           `json:"status"`
	Message     string                           `json:"message,omitempty"`
}

Describes the status of an application

type ServeDeploymentDetails ¶ added in v0.6.0

type ServeDeploymentDetails struct {
	ServeDeploymentStatus
	RoutePrefix string `json:"route_prefix,omitempty"`
}

V2 Serve API Response format. These extend the ServeDeploymentStatus and ServeApplicationStatus structs, but contain more information such as route prefix because the V2/multi-app GET API fetchs general metadata, not just statuses.

type ServeDeploymentStatus ¶ added in v0.6.0

type ServeDeploymentStatus struct {
	Name    string `json:"name,omitempty"`
	Status  string `json:"status,omitempty"`
	Message string `json:"message,omitempty"`
}

ServeDeploymentStatus and ServeApplicationStatus describe the format of status(es) that will be returned by the GetMultiApplicationStatus method of the dashboard client Describes the status of a deployment

type ServeDetails ¶ added in v0.6.0

type ServeDetails struct {
	Applications map[string]ServeApplicationDetails `json:"applications"`
	DeployMode   string                             `json:"deploy_mode,omitempty"`
}

type ServiceType ¶ added in v1.1.0

type ServiceType string

const (
	HeadService    ServiceType = "headService"
	ServingService ServiceType = "serveService"
)

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL