config

package
v0.247.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 26, 2024 License: Apache-2.0 Imports: 6 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type Config

type Config struct {
	Runtime   RuntimeConfig `yaml:"runtime"`
	Ollama    OllamaConfig  `yaml:"ollama"`
	VLLM      VLLMConfig    `yaml:"vllm"`
	LLMEngine llmkind.K     `yaml:"llmEngine"`
	// LLMPort is the port llm listens on.
	LLMPort int `yaml:"llmPort"`

	HealthPort int `yaml:"healthPort"`

	ObjectStore ObjectStoreConfig `yaml:"objectStore"`

	// PreloadedModelIDs is a list of model IDs to preload. These models are downloaded locally
	// at the startup time.
	PreloadedModelIDs []string `yaml:"preloadedModelIds"`

	// ModelContextLengths is a map of model ID to context length. If not specified, the default
	// context length is used.
	ModelContextLengths map[string]int `yaml:"modelContextLengths"`

	Debug DebugConfig `yaml:"debug"`

	InferenceManagerServerWorkerServiceAddr string `yaml:"inferenceManagerServerWorkerServiceAddr"`
	ModelManagerServerWorkerServiceAddr     string `yaml:"modelManagerServerWorkerServiceAddr"`

	Worker WorkerConfig `yaml:"worker"`
}

Config is the configuration.

func Parse

func Parse(path string) (Config, error)

Parse parses the configuration file at the given path, returning a new Config struct.

func (*Config) Validate

func (c *Config) Validate() error

Validate validates the configuration.

type DebugConfig added in v0.2.0

type DebugConfig struct {
	// Standalone is true if the service is running in standalone mode (except the
	// dependency to inference-manager-server).
	Standalone bool `yaml:"standalone"`
}

DebugConfig is the debug configuration.

type ObjectStoreConfig added in v0.2.0

type ObjectStoreConfig struct {
	S3 S3Config `yaml:"s3"`
}

ObjectStoreConfig is the object store configuration.

func (*ObjectStoreConfig) Validate added in v0.2.0

func (c *ObjectStoreConfig) Validate() error

Validate validates the object store configuration.

type OllamaConfig added in v0.180.0

type OllamaConfig struct {
	// KeepAlive is the keep-alive duration for Ollama.
	// This controls how long Ollama keeps models in GPU memory.
	KeepAlive time.Duration `yaml:"keepAlive"`

	// NumParallel is the maximum number of requests procesed in parallel.
	NumParallel int `yaml:"numParallel"`

	// ForceSpreading is true if the models should be spread across all GPUs.
	ForceSpreading bool `yaml:"forceSpreading"`

	Debug bool `yaml:"debug"`
}

OllamaConfig is the Ollama configuration.

type PersistentVolume added in v0.240.0

type PersistentVolume struct {
	StorageClassName string `yaml:"storageClassName"`
	Size             string `yaml:"size"`
	AccessMode       string `yaml:"accessMode"`
}

PersistentVolume is the persistent volume configuration.

type Resources added in v0.240.0

type Resources struct {
	Requests map[string]string `yaml:"requests"`
	Limits   map[string]string `yaml:"limits"`
	Volume   *PersistentVolume `yaml:"volume"`
}

Resources is the resources configuration.

type RuntimeConfig added in v0.240.0

type RuntimeConfig struct {
	PullerImage            string `yaml:"pullerImage"`
	RuntimeImage           string `yaml:"runtimeImage"`
	PullerImagePullPolicy  string `yaml:"pullerImagePullPolicy"`
	RuntimeImagePullPolicy string `yaml:"runtimeImagePullPolicy"`

	ConfigMapName        string `yaml:"configMapName"`
	AWSSecretName        string `yaml:"awsSecretName"`
	AWSKeyIDEnvKey       string `yaml:"awsKeyIdEnvKey"`
	AWSAccessKeyEnvKey   string `yaml:"awsAccessKeyEnvKey"`
	LLMOWorkerSecretName string `yaml:"llmoWorkerSecretName"`
	LLMOKeyEnvKey        string `yaml:"llmoKeyEnvKey"`

	ModelResources   map[string]Resources `yaml:"modelResources"`
	DefaultResources Resources            `yaml:"defaultResources"`
}

RuntimeConfig is the runtime configuration.

type S3Config added in v0.2.0

type S3Config struct {
	EndpointURL string `yaml:"endpointUrl"`
	Region      string `yaml:"region"`
	Bucket      string `yaml:"bucket"`
}

S3Config is the S3 configuration.

type VLLMConfig added in v0.190.0

type VLLMConfig struct {
	Model   string `yaml:"model"`
	NumGPUs int    `yaml:"numGpus"`
}

VLLMConfig is the configuration for vLLM.

type WorkerConfig added in v0.103.0

type WorkerConfig struct {
	TLS WorkerTLSConfig `yaml:"tls"`
}

WorkerConfig is the worker configuration.

type WorkerTLSConfig added in v0.103.0

type WorkerTLSConfig struct {
	Enable bool `yaml:"enable"`
}

WorkerTLSConfig is the worker TLS configuration.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL