Documentation
¶
Index ¶
- type ArgSpec
- type ArgType
- type Collector
- type MCPClientSnapshot
- type MCPSessionSnapshot
- type MCPState
- type Metrics
- type MetricsRegistry
- type Middleware
- type ModelInfo
- type Options
- type PartitionJob
- type PartitionObserver
- type Plugin
- type PluginDescriptor
- type Router
- type Scheduler
- type ServerState
- type StateElement
- type StateRegistry
- type WorkerProvider
- type WorkerRef
- type WorkerRegistry
- type WorkerStatus
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ArgSpec ¶
type ArgSpec struct {
ID string // stable identifier within the plugin options map
Flag string // suggested command-line flag name (e.g. --llm-max-parallel-embeddings)
Env string // suggested environment variable (e.g. LLM_MAX_PARALLEL_EMBEDDINGS)
YAML string // suggested YAML path (e.g. plugin_options.llm.max_parallel_embeddings)
Type ArgType // type hint for formatting and validation
Default string // human-readable default value
Example string // optional example value
Description string // one-line description
Deprecated bool // true if deprecated
Replacement string // optional replacement guidance
Secret bool // true if value is secret and should be masked in UIs
}
ArgSpec describes a single configurable parameter for an extension.
type MCPClientSnapshot ¶
type MCPSessionSnapshot ¶
type MCPState ¶
type MCPState struct {
Clients []MCPClientSnapshot `json:"clients"`
Sessions []MCPSessionSnapshot `json:"sessions"`
}
type Metrics ¶
type Metrics interface {
RecordJobStart(id string)
RecordJobEnd(id, model string, dur time.Duration, tokensIn, tokensOut, embeddings uint64, success bool, errMsg string)
SetWorkerStatus(id string, status WorkerStatus)
ObserveRequestDuration(workerID, model string, dur time.Duration)
RecordWorkerProcessingTime(workerID string, dur time.Duration)
RecordWorkerTokens(workerID, kind string, n uint64)
RecordModelTokens(model, kind string, n uint64)
RecordModelRequest(model string, success bool)
RecordModelEmbeddings(model string, n uint64)
RecordWorkerEmbeddings(workerID string, n uint64)
RecordWorkerEmbeddingProcessingTime(workerID string, dur time.Duration)
}
type MetricsRegistry ¶
type MetricsRegistry interface {
MustRegister(...Collector)
}
MetricsRegistry abstracts the Prometheus registry used by plugins.
type Middleware ¶
Middleware represents an HTTP middleware function.
type Options ¶
type Options struct {
// Global time to wait for worker activity before timing out a request.
RequestTimeout time.Duration
// Shared key clients must present when registering.
ClientKey string
// Roles that grant client connect access when present in X-User-Roles.
ClientHTTPRoles []string
// AgentHeartbeatInterval controls how often connected agents are expected to send heartbeats.
// If zero, defaults are used by the server.
AgentHeartbeatInterval time.Duration
// AgentHeartbeatExpiry controls how long the server waits without a heartbeat before evicting an agent.
// If zero, defaults are used by the server.
AgentHeartbeatExpiry time.Duration
// PluginOptions holds extension-specific options keyed by plugin ID (e.g., "llm", "mcp").
PluginOptions map[string]map[string]string
}
Options represents common server options available to all extensions. It includes global settings and a dictionary of per-plugin options.
type PartitionJob ¶
type PartitionJob interface {
// Size returns the total number of elements in the job.
Size() int
// MakeChunk builds a request body for the subrange [start, start+count).
// It may return a smaller count if fewer elements remain.
MakeChunk(start, count int) (body []byte, actual int)
// Append merges a completed worker response for the subrange starting at start.
Append(resp []byte, start int) error
// Result returns the final assembled response body.
Result() []byte
// Path returns the HTTP path on the worker that handles this job (e.g., "/embeddings").
Path() string
// DesiredChunkSize optionally specifies the ideal chunk size for a given worker.
// Return <= 0 to defer to the worker's preferred size.
DesiredChunkSize(w WorkerRef) int
// Observer optionally provides hooks for recording domain-specific metrics.
Observer() PartitionObserver
}
PartitionJob describes a request that can be split into multiple independent chunks and recombined. Implemented by extensions that support partitioning.
type PartitionObserver ¶
type PartitionObserver interface {
// OnChunkResult is called after each worker chunk completes.
OnChunkResult(workerID, model string, dur time.Duration, elements int, success bool)
// OnJobResult is called once after all chunks complete or on first failure.
OnJobResult(model string, dur time.Duration, elements int, success bool)
}
PartitionObserver receives per-chunk and per-job results for partitioned work.
type Plugin ¶
type Plugin interface {
ID() string
RegisterRoutes(r Router)
RegisterMetrics(reg MetricsRegistry)
RegisterState(reg StateRegistry)
}
Plugin is implemented by all plugins.
type PluginDescriptor ¶
type PluginDescriptor struct {
ID string // plugin ID (e.g., "llm")
Name string // friendly name (e.g., "LLM Gateway")
Summary string // short description
Args []ArgSpec
}
PluginDescriptor provides human-readable metadata for an extension and its options.
type Router ¶
type Router interface {
Handle(pattern string, h http.Handler)
Group(fn func(r Router))
Route(pattern string, fn func(r Router))
Use(mw ...Middleware)
Get(pattern string, h http.Handler)
Post(pattern string, h http.Handler)
}
Router abstracts the HTTP router used by plugins.
type ServerState ¶
type StateElement ¶
type StateRegistry ¶
type StateRegistry interface {
Add(StateElement)
}
type WorkerProvider ¶
type WorkerProvider interface {
Scheduler() Scheduler
}
WorkerProvider is implemented by plugins that handle load-balanced workers.
type WorkerRegistry ¶
type WorkerStatus ¶
type WorkerStatus string
const ( StatusConnected WorkerStatus = "connected" StatusWorking WorkerStatus = "working" StatusIdle WorkerStatus = "idle" StatusNotReady WorkerStatus = "not_ready" StatusDraining WorkerStatus = "draining" StatusGone WorkerStatus = "gone" )