Documentation
¶
Index ¶
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type Config ¶
type Config struct {
Runtime RuntimeConfig `yaml:"runtime"`
Ollama OllamaConfig `yaml:"ollama"`
VLLM VLLMConfig `yaml:"vllm"`
LLMEngine llmkind.K `yaml:"llmEngine"`
// LLMPort is the port llm listens on.
LLMPort int `yaml:"llmPort"`
HealthPort int `yaml:"healthPort"`
ObjectStore ObjectStoreConfig `yaml:"objectStore"`
// PreloadedModelIDs is a list of model IDs to preload. These models are downloaded locally
// at the startup time.
PreloadedModelIDs []string `yaml:"preloadedModelIds"`
// ModelContextLengths is a map of model ID to context length. If not specified, the default
// context length is used.
ModelContextLengths map[string]int `yaml:"modelContextLengths"`
Debug DebugConfig `yaml:"debug"`
InferenceManagerServerWorkerServiceAddr string `yaml:"inferenceManagerServerWorkerServiceAddr"`
ModelManagerServerWorkerServiceAddr string `yaml:"modelManagerServerWorkerServiceAddr"`
Worker WorkerConfig `yaml:"worker"`
}
Config is the configuration.
type DebugConfig ¶ added in v0.2.0
type DebugConfig struct {
// Standalone is true if the service is running in standalone mode (except the
// dependency to inference-manager-server).
Standalone bool `yaml:"standalone"`
}
DebugConfig is the debug configuration.
type ObjectStoreConfig ¶ added in v0.2.0
type ObjectStoreConfig struct {
S3 S3Config `yaml:"s3"`
}
ObjectStoreConfig is the object store configuration.
func (*ObjectStoreConfig) Validate ¶ added in v0.2.0
func (c *ObjectStoreConfig) Validate() error
Validate validates the object store configuration.
type OllamaConfig ¶ added in v0.180.0
type OllamaConfig struct {
// KeepAlive is the keep-alive duration for Ollama.
// This controls how long Ollama keeps models in GPU memory.
KeepAlive time.Duration `yaml:"keepAlive"`
// NumParallel is the maximum number of requests procesed in parallel.
NumParallel int `yaml:"numParallel"`
// ForceSpreading is true if the models should be spread across all GPUs.
ForceSpreading bool `yaml:"forceSpreading"`
Debug bool `yaml:"debug"`
}
OllamaConfig is the Ollama configuration.
type PersistentVolume ¶ added in v0.240.0
type PersistentVolume struct {
StorageClassName string `yaml:"storageClassName"`
Size string `yaml:"size"`
AccessMode string `yaml:"accessMode"`
}
PersistentVolume is the persistent volume configuration.
type Resources ¶ added in v0.240.0
type Resources struct {
Requests map[string]string `yaml:"requests"`
Limits map[string]string `yaml:"limits"`
Volume *PersistentVolume `yaml:"volume"`
}
Resources is the resources configuration.
type RuntimeConfig ¶ added in v0.240.0
type RuntimeConfig struct {
PullerImage string `yaml:"pullerImage"`
RuntimeImage string `yaml:"runtimeImage"`
PullerImagePullPolicy string `yaml:"pullerImagePullPolicy"`
RuntimeImagePullPolicy string `yaml:"runtimeImagePullPolicy"`
ConfigMapName string `yaml:"configMapName"`
AWSSecretName string `yaml:"awsSecretName"`
AWSKeyIDEnvKey string `yaml:"awsKeyIdEnvKey"`
AWSAccessKeyEnvKey string `yaml:"awsAccessKeyEnvKey"`
LLMOWorkerSecretName string `yaml:"llmoWorkerSecretName"`
LLMOKeyEnvKey string `yaml:"llmoKeyEnvKey"`
ModelResources map[string]Resources `yaml:"modelResources"`
DefaultResources Resources `yaml:"defaultResources"`
}
RuntimeConfig is the runtime configuration.
type S3Config ¶ added in v0.2.0
type S3Config struct {
EndpointURL string `yaml:"endpointUrl"`
Region string `yaml:"region"`
Bucket string `yaml:"bucket"`
}
S3Config is the S3 configuration.
type VLLMConfig ¶ added in v0.190.0
VLLMConfig is the configuration for vLLM.
type WorkerConfig ¶ added in v0.103.0
type WorkerConfig struct {
TLS WorkerTLSConfig `yaml:"tls"`
}
WorkerConfig is the worker configuration.
type WorkerTLSConfig ¶ added in v0.103.0
type WorkerTLSConfig struct {
Enable bool `yaml:"enable"`
}
WorkerTLSConfig is the worker TLS configuration.
Click to show internal directories.
Click to hide internal directories.