Documentation
¶
Overview ¶
Copyright 2024 The Aibrix Team.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
Copyright 2024 The Aibrix Team.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
Copyright 2024 The Aibrix Team.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
Copyright 2024 The Aibrix Team.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
Copyright 2024 The Aibrix Team.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
Copyright 2024 The Aibrix Team.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
Copyright 2024 The Aibrix Team.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
Index ¶
- Constants
- Variables
- type Cache
- type MetricCache
- type Model
- type ModelCache
- type Pod
- type PodCache
- type RequestTrace
- func (t *RequestTrace) AddRequest(requestID string, key string) (int64, bool)
- func (t *RequestTrace) AddRequestTrace(requestID string, inputTokens, outputTokens int64, key string) (string, bool)
- func (t *RequestTrace) DoneRequest(requestID string, term int64)
- func (t *RequestTrace) DoneRequestTrace(requestID string, inputTokens, outputTokens int64, key string, term int64) (string, bool)
- func (t *RequestTrace) Lock()
- func (t *RequestTrace) Recycle()
- func (t *RequestTrace) RecycleLocked()
- func (t *RequestTrace) ToMap(total_pending int32) map[string]int
- func (t *RequestTrace) ToMapLocked(total_pending int32) map[string]int
- func (t *RequestTrace) Unlock()
- type RequestTraceMetaKey
- type RequestTracker
- type Store
- func Init(config *rest.Config, stopCh <-chan struct{}) *Store
- func InitForGateway(config *rest.Config, stopCh <-chan struct{}, redisClient *redis.Client) *Store
- func InitForMetadata(config *rest.Config, stopCh <-chan struct{}, redisClient *redis.Client) *Store
- func InitForTest() *Store
- func New(redisClient *redis.Client, prometheusApi prometheusv1.API) *Store
- func NewTestCacheWithPods(pods []*v1.Pod, model string) *Store
- func NewTestCacheWithPodsMetrics(pods []*v1.Pod, model string, ...) *Store
- func (c *Store) AddRequestCount(ctx *types.RoutingContext, requestID string, modelName string) (traceTerm int64)
- func (c *Store) AddSubscriber(subscriber metrics.MetricSubscriber)
- func (c *Store) DoneRequestCount(ctx *types.RoutingContext, requestID string, modelName string, traceTerm int64)
- func (c *Store) DoneRequestTrace(ctx *types.RoutingContext, requestID string, modelName string, ...)
- func (c *Store) GetMetricValueByPod(podName, podNamespace, metricName string) (metrics.MetricValue, error)
- func (c *Store) GetMetricValueByPodModel(podName, podNamespace, modelName string, metricName string) (metrics.MetricValue, error)
- func (c *Store) GetPod(podName, podNamespace string) (*v1.Pod, error)
- func (c *Store) HasModel(modelName string) bool
- func (c *Store) ListModels() []string
- func (c *Store) ListModelsByPod(podName, podNamespace string) ([]string, error)
- func (c *Store) ListPods() []*v1.Pod
- func (c *Store) ListPodsByModel(modelName string) (types.PodList, error)
Constants ¶
const ( // The version of request trace, version history: // v1: No meta, default // v2: Added meta data include version(meta_v), bucket precision(meta_precision), and interval(meta_interval_sec) to notify client the trace interval. // v3: Added the number of total requests(meta_total_reqs) and pending requests(meta_pending_reqs) for uncompleted requests. RequestTraceVersion = 3 // Trace write interval RequestTraceWriteInterval = 10 * time.Second // Max tolerable write delay to write ticks. // For example for RequestTraceWriteInterval = 10s and MaxRequestTraceIntervalOffset = 500ms, the trace should be written before X:00.5s, X:10.5s, .., X:50.5s. MaxRequestTraceIntervalOffset = 500 * time.Millisecond // The precision of buckets in trace. 0.1 means requests will be split into buckets of .1 according to log2(tokens) RequestTracePrecision = 0.1 )
Variables ¶
var NewRequestTrace = newRequestTraceGen(nil)
Functions ¶
This section is empty.
Types ¶
type Cache ¶
type Cache interface { PodCache ModelCache MetricCache RequestTracker }
Cache is the root interface aggregating caching functionalities
type MetricCache ¶
type MetricCache interface { // GetMetricValueByPod gets metric value for a pod // Parameters: // podName: Name of the pod // podNamespace: Namespace of the pod // metricName: Name of the metric // Returns: // metrics.MetricValue: Retrieved metric value // error: Error information if operation fails GetMetricValueByPod(podName, podNamespace, metricName string) (metrics.MetricValue, error) // GetMetricValueByPodModel gets metric value for pod-model pair // Parameters: // ctx: Routing context // podName: Name of the pod // podNamespace: Namespace of the pod // modelName: Name of the model // metricName: Name of the metric // Returns: // metrics.MetricValue: Retrieved metric value // error: Error information if operation fails GetMetricValueByPodModel(podName, podNamespace, modelName string, metricName string) (metrics.MetricValue, error) // AddSubscriber adds a metric subscriber // Parameters: // subscriber: Metric subscriber implementation AddSubscriber(subscriber metrics.MetricSubscriber) }
MetricCache defines operations for metric data caching
type Model ¶
type Model struct { // Pods is a CustomizedRegistry that stores *v1.Pod objects. // The internal map uses `namespace/name` as the key and `*v1.Pod` as the value. // This allows efficient lookups and caching of Pod objects by their unique identifier. Pods *utils.CustomizedRegistry[*v1.Pod, *utils.PodArray] // contains filtered or unexported fields }
type ModelCache ¶
type ModelCache interface { // HasModel checks existence of a model // Parameters: // modelName: Name of the model // Returns: // bool: True if model exists, false otherwise HasModel(modelName string) bool // ListModels gets all model names // Returns: // []string: List of model names ListModels() []string // ListModelsByPod gets models associated with a pod // Parameters: // podName: Name of the pod // podNamespace: Namespace of the pod // Returns: // map[string]struct{}: Set of model names // error: Error information if operation fails ListModelsByPod(podName, podNamespace string) ([]string, error) }
ModelCache defines operations for model information caching
type Pod ¶
type Pod struct { *v1.Pod Models *utils.Registry[string] // Model/adapter names that the pod is running Metrics utils.SyncMap[string, metrics.MetricValue] // Pod metrics (metric_name -> value) ModelMetrics utils.SyncMap[string, metrics.MetricValue] // Pod-model metrics (model_name/metric_name -> value) // contains filtered or unexported fields }
type PodCache ¶
type PodCache interface { // GetPod retrieves a Pod object by name // Parameters: // podName: Name of the pod // podNamespace: Namespace of the pod // Returns: // *v1.Pod: Found pod object // error: Error information if operation fails GetPod(podName, podNamespace string) (*v1.Pod, error) // ListPodsByModel gets pods associated with a model // Parameters: // modelName: Name of the model // Returns: // map[string]*v1.Pod: Pod objects matching the criteria // error: Error information if operation fails ListPodsByModel(modelName string) (types.PodList, error) }
PodCache defines operations for pod information caching
type RequestTrace ¶
type RequestTrace struct {
// contains filtered or unexported fields
}
func (*RequestTrace) AddRequest ¶
func (t *RequestTrace) AddRequest(requestID string, key string) (int64, bool)
Increase request counting and return the trace term, key is ignored for now.
func (*RequestTrace) AddRequestTrace ¶
func (t *RequestTrace) AddRequestTrace(requestID string, inputTokens, outputTokens int64, key string) (string, bool)
Add request trace profile. key must be provided and will not be checked
func (*RequestTrace) DoneRequest ¶
func (t *RequestTrace) DoneRequest(requestID string, term int64)
Decrease request counting with term verification, retrying is fultile.
func (*RequestTrace) DoneRequestTrace ¶
func (t *RequestTrace) DoneRequestTrace(requestID string, inputTokens, outputTokens int64, key string, term int64) (string, bool)
Decrease request counting and add request trace profile.
func (*RequestTrace) Lock ¶
func (t *RequestTrace) Lock()
func (*RequestTrace) Recycle ¶
func (t *RequestTrace) Recycle()
func (*RequestTrace) RecycleLocked ¶
func (t *RequestTrace) RecycleLocked()
func (*RequestTrace) ToMapLocked ¶
func (t *RequestTrace) ToMapLocked(total_pending int32) map[string]int
func (*RequestTrace) Unlock ¶
func (t *RequestTrace) Unlock()
type RequestTraceMetaKey ¶
type RequestTraceMetaKey int
const ( MetaKeyVersionKey RequestTraceMetaKey = iota MetaKeyIntervalInSeconds MetaKeyTracePrecision MetaKeyTotalRequests MetaKeyPendingRequests RequestTraceNumMetaKeys // Guardian for the number of RequestTraceMetaKey. This is not a actual meta key. )
func (RequestTraceMetaKey) ToString ¶
func (key RequestTraceMetaKey) ToString() string
type RequestTracker ¶
type RequestTracker interface { // AddRequestCount starts tracking request count // Parameters: // ctx: Routing context // requestID: Unique request identifier // modelName: Name of the model // Returns: // int64: Trace term identifier AddRequestCount(ctx *types.RoutingContext, requestID string, modelName string) (traceTerm int64) // DoneRequestCount completes request count tracking, only one DoneRequestXXX should be called for a request // Parameters: // requestID: Unique request identifier // modelName: Name of the model // traceTerm: Trace term identifier DoneRequestCount(ctx *types.RoutingContext, requestID string, modelName string, traceTerm int64) // DoneRequestTrace completes request tracing, only one DoneRequestXXX should be called for a request // Parameters: // ctx: Routing context // requestID: Unique request identifier // modelName: Name of the model // inputTokens: Number of input tokens // outputTokens: Number of output tokens // traceTerm: Trace term identifier DoneRequestTrace(ctx *types.RoutingContext, requestID string, modelName string, inputTokens, outputTokens, traceTerm int64) }
RequestTracker defines operations for track workload statistics
type Store ¶
type Store struct {
// contains filtered or unexported fields
}
Store contains core data structures and components of the caching system
func Init ¶
Init initializes the cache store (singleton pattern) Parameters:
config: Kubernetes configuration stopCh: Stop signal channel redisClient: Redis client instance
Returns:
*Store: Pointer to initialized store instance
func InitForGateway ¶
func InitForMetadata ¶
func InitForTest ¶
func InitForTest() *Store
InitForTest initializes the cache store for testing purposes
func New ¶
func New(redisClient *redis.Client, prometheusApi prometheusv1.API) *Store
New creates a new cache store instance Parameters:
redisClient: Redis client instance prometheusApi: Prometheus API client
Returns:
Store: Initialized cache store instance
func (*Store) AddRequestCount ¶
func (c *Store) AddRequestCount(ctx *types.RoutingContext, requestID string, modelName string) (traceTerm int64)
AddRequestCount tracks new request initiation Parameters:
ctx: Routing context requestID: Unique request identifier modelName: Model handling the request
Returns:
int64: Trace term identifier
func (*Store) AddSubscriber ¶
func (c *Store) AddSubscriber(subscriber metrics.MetricSubscriber)
AddSubscriber registers new metric subscriber Parameters:
subscriber: Metric subscriber implementation
func (*Store) DoneRequestCount ¶
func (c *Store) DoneRequestCount(ctx *types.RoutingContext, requestID string, modelName string, traceTerm int64)
DoneRequestCount completes request tracking Parameters:
ctx: Routing context requestID: Unique request identifier modelName: Model handling the request traceTerm: Trace term identifier
func (*Store) DoneRequestTrace ¶
func (c *Store) DoneRequestTrace(ctx *types.RoutingContext, requestID string, modelName string, inputTokens, outputTokens, traceTerm int64)
DoneRequestTrace completes request tracing Parameters:
ctx: Routing context requestID: Unique request identifier modelName: Model handling the request inputTokens: Input tokens count outputTokens: Output tokens count traceTerm: Trace term identifier
func (*Store) GetMetricValueByPod ¶
func (c *Store) GetMetricValueByPod(podName, podNamespace, metricName string) (metrics.MetricValue, error)
GetMetricValueByPod retrieves metric value for a Pod Parameters:
podName: Name of the Pod podNamespace: Namespace of the Pod metricName: Name of the metric
Returns:
metrics.MetricValue: The metric value error: Error if Pod or metric doesn't exist
func (*Store) GetMetricValueByPodModel ¶
func (c *Store) GetMetricValueByPodModel(podName, podNamespace, modelName string, metricName string) (metrics.MetricValue, error)
GetMetricValueByPodModel retrieves metric value for Pod-Model combination Parameters:
podName: Name of the Pod podNamespace: Namespace of the Pod modelName: Name of the model metricName: Name of the metric
Returns:
metrics.MetricValue: The metric value error: Error if Pod, model or metric doesn't exist
func (*Store) GetPod ¶
GetPod retrieves a Pod object by name from the cache Parameters:
podName: Name of the pod to retrieve podNamespace: Namespace of the pod to retrieve
Returns:
*v1.Pod: The found Pod object error: Error if pod doesn't exist
func (*Store) HasModel ¶
HasModel checks if a model exists in the cache Parameters:
modelName: Name of the model to check
Returns:
bool: True if model exists
func (*Store) ListModels ¶
ListModels returns all cached model names Returns:
[]string: Slice of model names
func (*Store) ListModelsByPod ¶
ListModelsByPod gets models associated with a specific Pod Parameters:
podName: Name of the Pod to query podNamespace: Namespace of the Pod to query
Returns:
[]string: Slice of model names error: Error if Pod doesn't exist
func (*Store) ListPods ¶
ListPods returns all cached Pod objects Do not call this directly, for debug purpose and less efficient. Returns:
[]*v1.Pod: Slice of Pod objects
func (*Store) ListPodsByModel ¶
ListPodsByModel gets Pods associated with a specific model Parameters:
modelName: Name of the model to query
Returns:
*utils.PodArray: PodArray wrapper for a slice of Pod objects error: Error if model doesn't exist