ray

package
v1.0.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 12, 2026 License: Apache-2.0 Imports: 8 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func ActorLogsEndpoint

func ActorLogsEndpoint(actorID string) string

ActorLogsEndpoint returns the path for fetching stdout logs for a specific actor.

func ActorsStateEndpoint

func ActorsStateEndpoint() string

ActorsStateEndpoint returns the path for the State API actors endpoint.

func ClusterEventsEndpoint

func ClusterEventsEndpoint() string

ClusterEventsEndpoint returns the path for the cluster events State API endpoint.

func JobLogsEndpoint

func JobLogsEndpoint(submissionID string) string

JobLogsEndpoint returns the path for fetching logs for a specific job. Uses submission_id (e.g., "raysubmit_XXX"), NOT the internal job_id.

func JobsRESTEndpoint

func JobsRESTEndpoint() string

JobsRESTEndpoint returns the path for the Jobs REST API endpoint. Note: this endpoint requires a trailing slash.

func JobsStateEndpoint

func JobsStateEndpoint() string

JobsStateEndpoint returns the path for the State API jobs endpoint.

func NodeLogFileEndpoint

func NodeLogFileEndpoint(nodeID, filename string) string

NodeLogFileEndpoint returns the path for fetching a specific log file from a node.

func NodeLogsEndpoint

func NodeLogsEndpoint(nodeID string) string

NodeLogsEndpoint returns the path for listing log files for a specific node.

func NodesStateEndpoint

func NodesStateEndpoint() string

NodesStateEndpoint returns the path for the State API nodes endpoint.

func ServeApplicationsEndpoint

func ServeApplicationsEndpoint() string

ServeApplicationsEndpoint returns the path for the Serve applications endpoint. CRITICAL: This endpoint requires a trailing slash. Without it, you get a 404.

func TasksSummarizeEndpoint

func TasksSummarizeEndpoint(jobID string) string

TasksSummarizeEndpoint returns the path for the task summary endpoint filtered by job_id. Uses the internal job_id (e.g., "02000000"), NOT submission_id.

func VersionEndpoint

func VersionEndpoint() string

VersionEndpoint returns the path for the version/ping endpoint.

Types

type Actor

type Actor struct {
	State        string `json:"state"`
	ActorID      string `json:"actor_id"`
	ClassName    string `json:"class_name"`
	JobID        string `json:"job_id"`
	RayNamespace string `json:"ray_namespace"`
	PID          int    `json:"pid"`
	NodeID       string `json:"node_id"`
	Name         string `json:"name"`
	// detail=true fields
	IsDetached        bool           `json:"is_detached,omitempty"`
	PlacementGroupID  *string        `json:"placement_group_id,omitempty"` // nullable
	ReprName          string         `json:"repr_name,omitempty"`
	RequiredResources map[string]any `json:"required_resources,omitempty"`
	DeathCause        *DeathCause    `json:"death_cause,omitempty"` // nullable
	NumRestarts       string         `json:"num_restarts,omitempty"`
	CallSite          *string        `json:"call_site,omitempty"` // nullable
}

Actor represents a Ray actor from /api/v0/actors.

type ActorDiedErrorContext

type ActorDiedErrorContext struct {
	ErrorMessage   string `json:"error_message"`
	OwnerID        string `json:"owner_id"`
	OwnerIPAddress string `json:"owner_ip_address"`
	NodeIPAddress  string `json:"node_ip_address"`
	PID            int    `json:"pid"`
	Name           string `json:"name"`
	RayNamespace   string `json:"ray_namespace"`
	ClassName      string `json:"class_name"`
	ActorID        string `json:"actor_id"`
	Reason         string `json:"reason"`
	NeverStarted   bool   `json:"never_started"`
}

ActorDiedErrorContext contains detailed error information for a dead actor.

type ApplicationDetails

type ApplicationDetails struct {
	Name              string                       `json:"name"`
	RoutePrefix       *string                      `json:"route_prefix"`
	DocsPath          *string                      `json:"docs_path"`
	Status            string                       `json:"status"`
	Message           string                       `json:"message"`
	LastDeployedTimeS float64                      `json:"last_deployed_time_s"`
	DeployedAppConfig map[string]any               `json:"deployed_app_config"`
	Deployments       map[string]DeploymentDetails `json:"deployments"`
}

ApplicationDetails represents a single Serve application.

type Client

type Client interface {
	Ping(ctx context.Context) (*VersionInfo, error)
	ListJobs(ctx context.Context) ([]Job, error)
	ListNodes(ctx context.Context) ([]Node, error)
	ListActors(ctx context.Context) ([]Actor, error)
	ListJobDetails(ctx context.Context) ([]JobDetail, error)
	GetJobLogs(ctx context.Context, submissionID string) (string, error)
	GetTaskSummary(ctx context.Context, jobID string) (*TaskSummaryResponse, error)
	ListNodeLogs(ctx context.Context, nodeID string) (*NodeLogListing, error)
	GetNodeLogFile(ctx context.Context, nodeID, filename string) (string, error)
	GetActorLogs(ctx context.Context, actorID string) (string, error)
	GetServeApplications(ctx context.Context) (*ServeInstanceDetails, error)
	ListClusterEvents(ctx context.Context) ([]ClusterEvent, error)
}

Client defines the Ray Dashboard API interface.

type ClusterEvent

type ClusterEvent struct {
	Severity     string         `json:"severity"`
	Time         string         `json:"time"`
	SourceType   string         `json:"source_type"`
	Message      string         `json:"message"`
	EventID      string         `json:"event_id"`
	CustomFields map[string]any `json:"custom_fields,omitempty"`
}

ClusterEvent represents a cluster event from /api/v0/cluster_events.

type DeathCause

type DeathCause struct {
	ActorDiedErrorContext *ActorDiedErrorContext `json:"actor_died_error_context,omitempty"`
}

DeathCause contains the reason an actor died.

type DeploymentDetails

type DeploymentDetails struct {
	Name              string           `json:"name"`
	Status            string           `json:"status"`
	StatusTrigger     string           `json:"status_trigger"`
	Message           string           `json:"message"`
	TargetNumReplicas int              `json:"target_num_replicas"`
	Replicas          []ReplicaDetails `json:"replicas"`
}

DeploymentDetails represents a single deployment within a Serve application.

type DriverInfo

type DriverInfo struct {
	ID            string `json:"id"`
	NodeIPAddress string `json:"node_ip_address"`
	PID           string `json:"pid"`
}

DriverInfo contains driver process information for a job.

type HTTPClient

type HTTPClient struct {
	// contains filtered or unexported fields
}

HTTPClient implements Client using the Ray Dashboard REST API.

func NewClient

func NewClient(baseURL string, timeout time.Duration) *HTTPClient

NewClient creates a new Ray API client. The baseURL should be the Ray Dashboard URL (e.g., "http://localhost:8265"). Any trailing slash on baseURL is stripped to avoid double-slash in URL construction.

func (*HTTPClient) GetActorLogs

func (c *HTTPClient) GetActorLogs(ctx context.Context, actorID string) (string, error)

GetActorLogs fetches the stdout logs for a specific actor by actor_id. The response is raw text (Content-Type: text/plain), not JSON.

func (*HTTPClient) GetJobLogs

func (c *HTTPClient) GetJobLogs(ctx context.Context, submissionID string) (string, error)

GetJobLogs fetches the logs for a specific job by submission_id.

func (*HTTPClient) GetNodeLogFile

func (c *HTTPClient) GetNodeLogFile(ctx context.Context, nodeID, filename string) (string, error)

GetNodeLogFile fetches the raw content of a specific log file from a node. The response is raw text (Content-Type: text/plain), not JSON.

func (*HTTPClient) GetServeApplications

func (c *HTTPClient) GetServeApplications(ctx context.Context) (*ServeInstanceDetails, error)

GetServeApplications fetches the Serve instance details from /api/serve/applications/. Returns nil (not error) when Serve is not running or not deployed.

func (*HTTPClient) GetTaskSummary

func (c *HTTPClient) GetTaskSummary(ctx context.Context, jobID string) (*TaskSummaryResponse, error)

GetTaskSummary fetches the task summary for a specific job by job_id.

func (*HTTPClient) ListActors

func (c *HTTPClient) ListActors(ctx context.Context) ([]Actor, error)

ListActors fetches all actors from the State API (/api/v0/actors).

func (*HTTPClient) ListClusterEvents

func (c *HTTPClient) ListClusterEvents(ctx context.Context) ([]ClusterEvent, error)

ListClusterEvents fetches all cluster events from the State API (/api/v0/cluster_events).

func (*HTTPClient) ListJobDetails

func (c *HTTPClient) ListJobDetails(ctx context.Context) ([]JobDetail, error)

ListJobDetails fetches all jobs from the Jobs REST API (/api/jobs/). This endpoint returns a bare JSON array (not wrapped in StateAPIResponse).

func (*HTTPClient) ListJobs

func (c *HTTPClient) ListJobs(ctx context.Context) ([]Job, error)

ListJobs fetches all jobs from the State API (/api/v0/jobs).

func (*HTTPClient) ListNodeLogs

func (c *HTTPClient) ListNodeLogs(ctx context.Context, nodeID string) (*NodeLogListing, error)

ListNodeLogs fetches the categorized log file listing for a specific node. The response format is {result: bool, msg: string, data: {result: map[string][]string}}.

func (*HTTPClient) ListNodes

func (c *HTTPClient) ListNodes(ctx context.Context) ([]Node, error)

ListNodes fetches all nodes from the State API (/api/v0/nodes).

func (*HTTPClient) Ping

func (c *HTTPClient) Ping(ctx context.Context) (*VersionInfo, error)

Ping checks connectivity to the Ray cluster by calling /api/version.

type Job

type Job struct {
	Type         string      `json:"type"`
	SubmissionID string      `json:"submission_id"`
	JobID        *string     `json:"job_id"`     // nullable
	ErrorType    *string     `json:"error_type"` // nullable
	Entrypoint   string      `json:"entrypoint"`
	Status       string      `json:"status"`
	DriverInfo   *DriverInfo `json:"driver_info"` // nullable
	Message      string      `json:"message"`
	// detail=true fields
	StartTime              int64             `json:"start_time,omitempty"`
	EndTime                int64             `json:"end_time,omitempty"`
	DriverExitCode         *int              `json:"driver_exit_code,omitempty"`
	DriverAgentHTTPAddress string            `json:"driver_agent_http_address,omitempty"`
	DriverNodeID           string            `json:"driver_node_id,omitempty"`
	Metadata               map[string]string `json:"metadata,omitempty"`
	RuntimeEnv             map[string]any    `json:"runtime_env,omitempty"`
}

Job represents a Ray job from the State API (/api/v0/jobs).

type JobDetail

type JobDetail struct {
	Type                   string            `json:"type"`
	JobID                  *string           `json:"job_id"` // nullable
	SubmissionID           string            `json:"submission_id"`
	DriverInfo             *DriverInfo       `json:"driver_info"` // nullable
	Status                 string            `json:"status"`
	Entrypoint             string            `json:"entrypoint"`
	Message                string            `json:"message"`
	ErrorType              *string           `json:"error_type"` // nullable
	StartTime              int64             `json:"start_time"`
	EndTime                int64             `json:"end_time"`
	Metadata               map[string]string `json:"metadata"`
	RuntimeEnv             map[string]any    `json:"runtime_env"`
	DriverAgentHTTPAddress string            `json:"driver_agent_http_address"`
	DriverNodeID           string            `json:"driver_node_id"`
	DriverExitCode         *int              `json:"driver_exit_code"`
}

JobDetail represents a job from the Jobs REST API (/api/jobs/). This endpoint returns a bare JSON array (no StateAPIResponse wrapper) and includes richer fields like start_time, end_time, metadata, etc.

type Node

type Node struct {
	State              string             `json:"state"`
	ResourcesTotal     map[string]float64 `json:"resources_total"`
	ResourcesAvailable map[string]float64 `json:"resources_available"`
	StateMessage       *string            `json:"state_message"` // nullable
	NodeIP             string             `json:"node_ip"`
	IsHeadNode         bool               `json:"is_head_node"`
	Labels             map[string]string  `json:"labels"`
	NodeID             string             `json:"node_id"`
	NodeName           string             `json:"node_name"`
	// detail=true fields
	StartTimeMs int64 `json:"start_time_ms,omitempty"`
	EndTimeMs   int64 `json:"end_time_ms,omitempty"`
}

Node represents a Ray cluster node from /api/v0/nodes.

type NodeLogListing

type NodeLogListing struct {
	Categories map[string][]string // category name -> list of filenames
}

NodeLogListing contains categorized log file names for a node.

type NodeTaskSummary

type NodeTaskSummary struct {
	Summary             map[string]TaskFuncSummary `json:"summary"`
	TotalTasks          int                        `json:"total_tasks"`
	TotalActorTasks     int                        `json:"total_actor_tasks"`
	TotalActorScheduled int                        `json:"total_actor_scheduled"`
	SummaryBy           string                     `json:"summary_by"`
}

NodeTaskSummary contains task summary data for a node (or "cluster" aggregate).

type ProxyDetails

type ProxyDetails struct {
	Status      string  `json:"status"`
	NodeID      *string `json:"node_id"`
	NodeIP      *string `json:"node_ip"`
	ActorID     *string `json:"actor_id"`
	ActorName   *string `json:"actor_name"`
	WorkerID    *string `json:"worker_id"`
	LogFilePath *string `json:"log_file_path"`
}

ProxyDetails contains Serve proxy information.

type ReplicaDetails

type ReplicaDetails struct {
	ReplicaID   string  `json:"replica_id"`
	State       string  `json:"state"`
	PID         *int    `json:"pid"`
	ActorName   *string `json:"actor_name"`
	ActorID     *string `json:"actor_id"`
	NodeID      *string `json:"node_id"`
	NodeIP      *string `json:"node_ip"`
	StartTimeS  float64 `json:"start_time_s"`
	LogFilePath *string `json:"log_file_path"`
	WorkerID    *string `json:"worker_id"`
}

ReplicaDetails represents a single replica within a deployment.

type ServeActorDetails

type ServeActorDetails struct {
	NodeID      *string `json:"node_id"`
	NodeIP      *string `json:"node_ip"`
	ActorID     *string `json:"actor_id"`
	ActorName   *string `json:"actor_name"`
	WorkerID    *string `json:"worker_id"`
	LogFilePath *string `json:"log_file_path"`
	Status      string  `json:"status"`
}

ServeActorDetails contains Serve controller/proxy actor info.

type ServeInstanceDetails

type ServeInstanceDetails struct {
	ControllerInfo *ServeActorDetails            `json:"controller_info"`
	ProxyLocation  string                        `json:"proxy_location"`
	HTTPOptions    map[string]any                `json:"http_options"`
	GRPCOptions    map[string]any                `json:"grpc_options"`
	Proxies        map[string]ProxyDetails       `json:"proxies"`
	DeployMode     string                        `json:"deploy_mode"`
	Applications   map[string]ApplicationDetails `json:"applications"`
	TargetCapacity *float64                      `json:"target_capacity"`
}

ServeInstanceDetails is the top-level response from GET /api/serve/applications/. This is NOT wrapped in StateAPIResponse -- it's parsed directly.

type StateAPIResponse

type StateAPIResponse[T any] struct {
	Result bool   `json:"result"`
	Msg    string `json:"msg"`
	Data   struct {
		Result StateAPIResult[T] `json:"result"`
	} `json:"data"`
}

StateAPIResponse is the generic wrapper for /api/v0/* endpoints.

type StateAPIResult

type StateAPIResult[T any] struct {
	Total              int    `json:"total"`
	NumAfterTruncation int    `json:"num_after_truncation"`
	NumFiltered        int    `json:"num_filtered"`
	Result             []T    `json:"result"`
	PartialFailureWarn string `json:"partial_failure_warning"`
}

StateAPIResult contains the inner result data from State API responses.

type TaskFuncSummary

type TaskFuncSummary struct {
	FuncOrClassName string         `json:"func_or_class_name"`
	Type            string         `json:"type"`
	StateCounts     map[string]int `json:"state_counts"`
}

TaskFuncSummary summarizes tasks by function/class name with state counts.

type TaskSummaryResponse

type TaskSummaryResponse struct {
	NodeIDToSummary map[string]NodeTaskSummary `json:"node_id_to_summary"`
}

TaskSummaryResponse is the inner result from /api/v0/tasks/summarize. Access via: data.result.result.node_id_to_summary["cluster"].summary

type VersionInfo

type VersionInfo struct {
	Version     string `json:"version"`
	RayVersion  string `json:"ray_version"`
	RayCommit   string `json:"ray_commit"`
	SessionName string `json:"session_name"`
}

VersionInfo is returned by GET /api/version.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL