Documentation
¶
Index ¶
- func ActorLogsEndpoint(actorID string) string
- func ActorsStateEndpoint() string
- func ClusterEventsEndpoint() string
- func JobLogsEndpoint(submissionID string) string
- func JobsRESTEndpoint() string
- func JobsStateEndpoint() string
- func NodeLogFileEndpoint(nodeID, filename string) string
- func NodeLogsEndpoint(nodeID string) string
- func NodesStateEndpoint() string
- func ServeApplicationsEndpoint() string
- func TasksSummarizeEndpoint(jobID string) string
- func VersionEndpoint() string
- type Actor
- type ActorDiedErrorContext
- type ApplicationDetails
- type Client
- type ClusterEvent
- type DeathCause
- type DeploymentDetails
- type DriverInfo
- type HTTPClient
- func (c *HTTPClient) GetActorLogs(ctx context.Context, actorID string) (string, error)
- func (c *HTTPClient) GetJobLogs(ctx context.Context, submissionID string) (string, error)
- func (c *HTTPClient) GetNodeLogFile(ctx context.Context, nodeID, filename string) (string, error)
- func (c *HTTPClient) GetServeApplications(ctx context.Context) (*ServeInstanceDetails, error)
- func (c *HTTPClient) GetTaskSummary(ctx context.Context, jobID string) (*TaskSummaryResponse, error)
- func (c *HTTPClient) ListActors(ctx context.Context) ([]Actor, error)
- func (c *HTTPClient) ListClusterEvents(ctx context.Context) ([]ClusterEvent, error)
- func (c *HTTPClient) ListJobDetails(ctx context.Context) ([]JobDetail, error)
- func (c *HTTPClient) ListJobs(ctx context.Context) ([]Job, error)
- func (c *HTTPClient) ListNodeLogs(ctx context.Context, nodeID string) (*NodeLogListing, error)
- func (c *HTTPClient) ListNodes(ctx context.Context) ([]Node, error)
- func (c *HTTPClient) Ping(ctx context.Context) (*VersionInfo, error)
- type Job
- type JobDetail
- type Node
- type NodeLogListing
- type NodeTaskSummary
- type ProxyDetails
- type ReplicaDetails
- type ServeActorDetails
- type ServeInstanceDetails
- type StateAPIResponse
- type StateAPIResult
- type TaskFuncSummary
- type TaskSummaryResponse
- type VersionInfo
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func ActorLogsEndpoint ¶
ActorLogsEndpoint returns the path for fetching stdout logs for a specific actor.
func ActorsStateEndpoint ¶
func ActorsStateEndpoint() string
ActorsStateEndpoint returns the path for the State API actors endpoint.
func ClusterEventsEndpoint ¶
func ClusterEventsEndpoint() string
ClusterEventsEndpoint returns the path for the cluster events State API endpoint.
func JobLogsEndpoint ¶
JobLogsEndpoint returns the path for fetching logs for a specific job. Uses submission_id (e.g., "raysubmit_XXX"), NOT the internal job_id.
func JobsRESTEndpoint ¶
func JobsRESTEndpoint() string
JobsRESTEndpoint returns the path for the Jobs REST API endpoint. Note: this endpoint requires a trailing slash.
func JobsStateEndpoint ¶
func JobsStateEndpoint() string
JobsStateEndpoint returns the path for the State API jobs endpoint.
func NodeLogFileEndpoint ¶
NodeLogFileEndpoint returns the path for fetching a specific log file from a node.
func NodeLogsEndpoint ¶
NodeLogsEndpoint returns the path for listing log files for a specific node.
func NodesStateEndpoint ¶
func NodesStateEndpoint() string
NodesStateEndpoint returns the path for the State API nodes endpoint.
func ServeApplicationsEndpoint ¶
func ServeApplicationsEndpoint() string
ServeApplicationsEndpoint returns the path for the Serve applications endpoint. CRITICAL: This endpoint requires a trailing slash. Without it, you get a 404.
func TasksSummarizeEndpoint ¶
TasksSummarizeEndpoint returns the path for the task summary endpoint filtered by job_id. Uses the internal job_id (e.g., "02000000"), NOT submission_id.
func VersionEndpoint ¶
func VersionEndpoint() string
VersionEndpoint returns the path for the version/ping endpoint.
Types ¶
type Actor ¶
type Actor struct {
State string `json:"state"`
ActorID string `json:"actor_id"`
ClassName string `json:"class_name"`
JobID string `json:"job_id"`
RayNamespace string `json:"ray_namespace"`
PID int `json:"pid"`
NodeID string `json:"node_id"`
Name string `json:"name"`
// detail=true fields
IsDetached bool `json:"is_detached,omitempty"`
PlacementGroupID *string `json:"placement_group_id,omitempty"` // nullable
ReprName string `json:"repr_name,omitempty"`
RequiredResources map[string]any `json:"required_resources,omitempty"`
DeathCause *DeathCause `json:"death_cause,omitempty"` // nullable
NumRestarts string `json:"num_restarts,omitempty"`
CallSite *string `json:"call_site,omitempty"` // nullable
}
Actor represents a Ray actor from /api/v0/actors.
type ActorDiedErrorContext ¶
type ActorDiedErrorContext struct {
ErrorMessage string `json:"error_message"`
OwnerID string `json:"owner_id"`
OwnerIPAddress string `json:"owner_ip_address"`
NodeIPAddress string `json:"node_ip_address"`
PID int `json:"pid"`
Name string `json:"name"`
RayNamespace string `json:"ray_namespace"`
ClassName string `json:"class_name"`
ActorID string `json:"actor_id"`
Reason string `json:"reason"`
NeverStarted bool `json:"never_started"`
}
ActorDiedErrorContext contains detailed error information for a dead actor.
type ApplicationDetails ¶
type ApplicationDetails struct {
Name string `json:"name"`
RoutePrefix *string `json:"route_prefix"`
DocsPath *string `json:"docs_path"`
Status string `json:"status"`
Message string `json:"message"`
LastDeployedTimeS float64 `json:"last_deployed_time_s"`
DeployedAppConfig map[string]any `json:"deployed_app_config"`
Deployments map[string]DeploymentDetails `json:"deployments"`
}
ApplicationDetails represents a single Serve application.
type Client ¶
type Client interface {
Ping(ctx context.Context) (*VersionInfo, error)
ListJobs(ctx context.Context) ([]Job, error)
ListNodes(ctx context.Context) ([]Node, error)
ListActors(ctx context.Context) ([]Actor, error)
ListJobDetails(ctx context.Context) ([]JobDetail, error)
GetJobLogs(ctx context.Context, submissionID string) (string, error)
GetTaskSummary(ctx context.Context, jobID string) (*TaskSummaryResponse, error)
ListNodeLogs(ctx context.Context, nodeID string) (*NodeLogListing, error)
GetNodeLogFile(ctx context.Context, nodeID, filename string) (string, error)
GetActorLogs(ctx context.Context, actorID string) (string, error)
GetServeApplications(ctx context.Context) (*ServeInstanceDetails, error)
ListClusterEvents(ctx context.Context) ([]ClusterEvent, error)
}
Client defines the Ray Dashboard API interface.
type ClusterEvent ¶
type ClusterEvent struct {
Severity string `json:"severity"`
Time string `json:"time"`
SourceType string `json:"source_type"`
Message string `json:"message"`
EventID string `json:"event_id"`
CustomFields map[string]any `json:"custom_fields,omitempty"`
}
ClusterEvent represents a cluster event from /api/v0/cluster_events.
type DeathCause ¶
type DeathCause struct {
ActorDiedErrorContext *ActorDiedErrorContext `json:"actor_died_error_context,omitempty"`
}
DeathCause contains the reason an actor died.
type DeploymentDetails ¶
type DeploymentDetails struct {
Name string `json:"name"`
Status string `json:"status"`
StatusTrigger string `json:"status_trigger"`
Message string `json:"message"`
TargetNumReplicas int `json:"target_num_replicas"`
Replicas []ReplicaDetails `json:"replicas"`
}
DeploymentDetails represents a single deployment within a Serve application.
type DriverInfo ¶
type DriverInfo struct {
ID string `json:"id"`
NodeIPAddress string `json:"node_ip_address"`
PID string `json:"pid"`
}
DriverInfo contains driver process information for a job.
type HTTPClient ¶
type HTTPClient struct {
// contains filtered or unexported fields
}
HTTPClient implements Client using the Ray Dashboard REST API.
func NewClient ¶
func NewClient(baseURL string, timeout time.Duration) *HTTPClient
NewClient creates a new Ray API client. The baseURL should be the Ray Dashboard URL (e.g., "http://localhost:8265"). Any trailing slash on baseURL is stripped to avoid double-slash in URL construction.
func (*HTTPClient) GetActorLogs ¶
GetActorLogs fetches the stdout logs for a specific actor by actor_id. The response is raw text (Content-Type: text/plain), not JSON.
func (*HTTPClient) GetJobLogs ¶
GetJobLogs fetches the logs for a specific job by submission_id.
func (*HTTPClient) GetNodeLogFile ¶
GetNodeLogFile fetches the raw content of a specific log file from a node. The response is raw text (Content-Type: text/plain), not JSON.
func (*HTTPClient) GetServeApplications ¶
func (c *HTTPClient) GetServeApplications(ctx context.Context) (*ServeInstanceDetails, error)
GetServeApplications fetches the Serve instance details from /api/serve/applications/. Returns nil (not error) when Serve is not running or not deployed.
func (*HTTPClient) GetTaskSummary ¶
func (c *HTTPClient) GetTaskSummary(ctx context.Context, jobID string) (*TaskSummaryResponse, error)
GetTaskSummary fetches the task summary for a specific job by job_id.
func (*HTTPClient) ListActors ¶
func (c *HTTPClient) ListActors(ctx context.Context) ([]Actor, error)
ListActors fetches all actors from the State API (/api/v0/actors).
func (*HTTPClient) ListClusterEvents ¶
func (c *HTTPClient) ListClusterEvents(ctx context.Context) ([]ClusterEvent, error)
ListClusterEvents fetches all cluster events from the State API (/api/v0/cluster_events).
func (*HTTPClient) ListJobDetails ¶
func (c *HTTPClient) ListJobDetails(ctx context.Context) ([]JobDetail, error)
ListJobDetails fetches all jobs from the Jobs REST API (/api/jobs/). This endpoint returns a bare JSON array (not wrapped in StateAPIResponse).
func (*HTTPClient) ListJobs ¶
func (c *HTTPClient) ListJobs(ctx context.Context) ([]Job, error)
ListJobs fetches all jobs from the State API (/api/v0/jobs).
func (*HTTPClient) ListNodeLogs ¶
func (c *HTTPClient) ListNodeLogs(ctx context.Context, nodeID string) (*NodeLogListing, error)
ListNodeLogs fetches the categorized log file listing for a specific node. The response format is {result: bool, msg: string, data: {result: map[string][]string}}.
func (*HTTPClient) ListNodes ¶
func (c *HTTPClient) ListNodes(ctx context.Context) ([]Node, error)
ListNodes fetches all nodes from the State API (/api/v0/nodes).
func (*HTTPClient) Ping ¶
func (c *HTTPClient) Ping(ctx context.Context) (*VersionInfo, error)
Ping checks connectivity to the Ray cluster by calling /api/version.
type Job ¶
type Job struct {
Type string `json:"type"`
SubmissionID string `json:"submission_id"`
JobID *string `json:"job_id"` // nullable
ErrorType *string `json:"error_type"` // nullable
Entrypoint string `json:"entrypoint"`
Status string `json:"status"`
DriverInfo *DriverInfo `json:"driver_info"` // nullable
Message string `json:"message"`
// detail=true fields
StartTime int64 `json:"start_time,omitempty"`
EndTime int64 `json:"end_time,omitempty"`
DriverExitCode *int `json:"driver_exit_code,omitempty"`
DriverAgentHTTPAddress string `json:"driver_agent_http_address,omitempty"`
DriverNodeID string `json:"driver_node_id,omitempty"`
Metadata map[string]string `json:"metadata,omitempty"`
RuntimeEnv map[string]any `json:"runtime_env,omitempty"`
}
Job represents a Ray job from the State API (/api/v0/jobs).
type JobDetail ¶
type JobDetail struct {
Type string `json:"type"`
JobID *string `json:"job_id"` // nullable
SubmissionID string `json:"submission_id"`
DriverInfo *DriverInfo `json:"driver_info"` // nullable
Status string `json:"status"`
Entrypoint string `json:"entrypoint"`
Message string `json:"message"`
ErrorType *string `json:"error_type"` // nullable
StartTime int64 `json:"start_time"`
EndTime int64 `json:"end_time"`
Metadata map[string]string `json:"metadata"`
RuntimeEnv map[string]any `json:"runtime_env"`
DriverAgentHTTPAddress string `json:"driver_agent_http_address"`
DriverNodeID string `json:"driver_node_id"`
DriverExitCode *int `json:"driver_exit_code"`
}
JobDetail represents a job from the Jobs REST API (/api/jobs/). This endpoint returns a bare JSON array (no StateAPIResponse wrapper) and includes richer fields like start_time, end_time, metadata, etc.
type Node ¶
type Node struct {
State string `json:"state"`
ResourcesTotal map[string]float64 `json:"resources_total"`
ResourcesAvailable map[string]float64 `json:"resources_available"`
StateMessage *string `json:"state_message"` // nullable
NodeIP string `json:"node_ip"`
IsHeadNode bool `json:"is_head_node"`
Labels map[string]string `json:"labels"`
NodeID string `json:"node_id"`
NodeName string `json:"node_name"`
// detail=true fields
StartTimeMs int64 `json:"start_time_ms,omitempty"`
EndTimeMs int64 `json:"end_time_ms,omitempty"`
}
Node represents a Ray cluster node from /api/v0/nodes.
type NodeLogListing ¶
NodeLogListing contains categorized log file names for a node.
type NodeTaskSummary ¶
type NodeTaskSummary struct {
Summary map[string]TaskFuncSummary `json:"summary"`
TotalTasks int `json:"total_tasks"`
TotalActorTasks int `json:"total_actor_tasks"`
TotalActorScheduled int `json:"total_actor_scheduled"`
SummaryBy string `json:"summary_by"`
}
NodeTaskSummary contains task summary data for a node (or "cluster" aggregate).
type ProxyDetails ¶
type ProxyDetails struct {
Status string `json:"status"`
NodeID *string `json:"node_id"`
NodeIP *string `json:"node_ip"`
ActorID *string `json:"actor_id"`
ActorName *string `json:"actor_name"`
WorkerID *string `json:"worker_id"`
LogFilePath *string `json:"log_file_path"`
}
ProxyDetails contains Serve proxy information.
type ReplicaDetails ¶
type ReplicaDetails struct {
ReplicaID string `json:"replica_id"`
State string `json:"state"`
PID *int `json:"pid"`
ActorName *string `json:"actor_name"`
ActorID *string `json:"actor_id"`
NodeID *string `json:"node_id"`
NodeIP *string `json:"node_ip"`
StartTimeS float64 `json:"start_time_s"`
LogFilePath *string `json:"log_file_path"`
WorkerID *string `json:"worker_id"`
}
ReplicaDetails represents a single replica within a deployment.
type ServeActorDetails ¶
type ServeActorDetails struct {
NodeID *string `json:"node_id"`
NodeIP *string `json:"node_ip"`
ActorID *string `json:"actor_id"`
ActorName *string `json:"actor_name"`
WorkerID *string `json:"worker_id"`
LogFilePath *string `json:"log_file_path"`
Status string `json:"status"`
}
ServeActorDetails contains Serve controller/proxy actor info.
type ServeInstanceDetails ¶
type ServeInstanceDetails struct {
ControllerInfo *ServeActorDetails `json:"controller_info"`
ProxyLocation string `json:"proxy_location"`
HTTPOptions map[string]any `json:"http_options"`
GRPCOptions map[string]any `json:"grpc_options"`
Proxies map[string]ProxyDetails `json:"proxies"`
DeployMode string `json:"deploy_mode"`
Applications map[string]ApplicationDetails `json:"applications"`
TargetCapacity *float64 `json:"target_capacity"`
}
ServeInstanceDetails is the top-level response from GET /api/serve/applications/. This is NOT wrapped in StateAPIResponse -- it's parsed directly.
type StateAPIResponse ¶
type StateAPIResponse[T any] struct { Result bool `json:"result"` Msg string `json:"msg"` Data struct { Result StateAPIResult[T] `json:"result"` } `json:"data"` }
StateAPIResponse is the generic wrapper for /api/v0/* endpoints.
type StateAPIResult ¶
type StateAPIResult[T any] struct { Total int `json:"total"` NumAfterTruncation int `json:"num_after_truncation"` NumFiltered int `json:"num_filtered"` Result []T `json:"result"` PartialFailureWarn string `json:"partial_failure_warning"` }
StateAPIResult contains the inner result data from State API responses.
type TaskFuncSummary ¶
type TaskFuncSummary struct {
FuncOrClassName string `json:"func_or_class_name"`
Type string `json:"type"`
StateCounts map[string]int `json:"state_counts"`
}
TaskFuncSummary summarizes tasks by function/class name with state counts.
type TaskSummaryResponse ¶
type TaskSummaryResponse struct {
NodeIDToSummary map[string]NodeTaskSummary `json:"node_id_to_summary"`
}
TaskSummaryResponse is the inner result from /api/v0/tasks/summarize. Access via: data.result.result.node_id_to_summary["cluster"].summary