gateway

package
v0.4.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Aug 19, 2025 License: Apache-2.0 Imports: 40 Imported by: 0

Documentation

Index

Constants

View Source
const (
	HeaderErrorInvalidRouting = "x-error-invalid-routing-strategy"

	// General Error Headers
	HeaderErrorUser                  = "x-error-user"
	HeaderErrorRouting               = "x-error-routing"
	HeaderErrorRequestBodyProcessing = "x-error-request-body-processing"
	HeaderErrorResponseUnmarshal     = "x-error-response-unmarshal"
	HeaderErrorResponseUnknown       = "x-error-response-unknown"

	// Model & Deployment Headers
	HeaderErrorNoModelInRequest = "x-error-no-model-in-request"
	HeaderErrorNoModelBackends  = "x-error-no-model-backends"

	// Streaming Headers
	HeaderErrorStream                    = "x-error-stream"
	HeaderErrorStreaming                 = "x-error-streaming"
	HeaderErrorStreamOptionsIncludeUsage = "x-error-no-stream-options-include-usage"

	// Request & Target Headers
	HeaderWentIntoReqHeaders = "x-went-into-req-headers"
	HeaderTargetPod          = "target-pod"
	HeaderRoutingStrategy    = "routing-strategy"
	HeaderRequestID          = "request-id"
	HeaderModel              = "model"

	// RPM & TPM Update Errors
	HeaderUpdateTPM        = "x-update-tpm"
	HeaderUpdateRPM        = "x-update-rpm"
	HeaderErrorRPMExceeded = "x-error-rpm-exceeded"
	HeaderErrorTPMExceeded = "x-error-tpm-exceeded"
	HeaderErrorIncrRPM     = "x-error-incr-rpm"
	HeaderErrorIncrTPM     = "x-error-incr-tpm"

	// Rate Limiting defaults
	DefaultRPM           = 100
	DefaultTPMMultiplier = 1000

	// Envs
	EnvRoutingAlgorithm = "ROUTING_ALGORITHM"
)

Variables

View Source
var (
	ErrorUnknownResponse = errors.New("unknown response")
)

Functions

This section is empty.

Types

type MockCache added in v0.4.0

type MockCache struct {
	mock.Mock
	cache.Cache
}

MockCache implements cache.Cache interface for testing

func (*MockCache) AddRequestCount added in v0.4.0

func (m *MockCache) AddRequestCount(ctx *types.RoutingContext, requestID string, model string) int64

func (*MockCache) AddSubscriber added in v0.4.0

func (m *MockCache) AddSubscriber(subscriber metrics.MetricSubscriber)

func (*MockCache) DoneRequestCount added in v0.4.0

func (m *MockCache) DoneRequestCount(ctx *types.RoutingContext, requestID string, model string, term int64)

func (*MockCache) DoneRequestTrace added in v0.4.0

func (m *MockCache) DoneRequestTrace(ctx *types.RoutingContext, requestID string, model string, term int64, inputTokens int64, outputTokens int64)

func (*MockCache) GetMetricValueByPod added in v0.4.0

func (m *MockCache) GetMetricValueByPod(namespace string, podName string, metricName string) (metrics.MetricValue, error)

func (*MockCache) GetMetricValueByPodModel added in v0.4.0

func (m *MockCache) GetMetricValueByPodModel(namespace string, podName string, model string, metricName string) (metrics.MetricValue, error)

func (*MockCache) GetPod added in v0.4.0

func (m *MockCache) GetPod(namespace string, podName string) (*v1.Pod, error)

func (*MockCache) HasModel added in v0.4.0

func (m *MockCache) HasModel(model string) bool

func (*MockCache) ListModels added in v0.4.0

func (m *MockCache) ListModels() []string

func (*MockCache) ListModelsByPod added in v0.4.0

func (m *MockCache) ListModelsByPod(namespace string, podName string) ([]string, error)

func (*MockCache) ListPodsByModel added in v0.4.0

func (m *MockCache) ListPodsByModel(model string) (types.PodList, error)

type MockGatewayClassClient added in v0.4.0

type MockGatewayClassClient struct {
	mock.Mock
}

type MockGatewayClient added in v0.4.0

type MockGatewayClient struct {
	mock.Mock
}

MockGatewayClient implements gatewayapi.Clientset interface

func (*MockGatewayClient) Discovery added in v0.4.0

func (*MockGatewayClient) GatewayV1 added in v0.4.0

func (*MockGatewayClient) GatewayV1alpha2 added in v0.4.0

func (*MockGatewayClient) GatewayV1beta1 added in v0.4.0

type MockGatewayV1Client added in v0.4.0

type MockGatewayV1Client struct {
	mock.Mock
}

MockGatewayV1Client implements gatewayapi.Interface

func (*MockGatewayV1Client) GatewayClasses added in v0.4.0

func (*MockGatewayV1Client) Gateways added in v0.4.0

func (*MockGatewayV1Client) HTTPRoutes added in v0.4.0

func (m *MockGatewayV1Client) HTTPRoutes(namespace string) gatewayapiv1.HTTPRouteInterface

func (*MockGatewayV1Client) RESTClient added in v0.4.0

func (m *MockGatewayV1Client) RESTClient() rest.Interface

type MockHTTPRouteClient added in v0.4.0

type MockHTTPRouteClient struct {
	mock.Mock
}

MockHTTPRouteClient implements gatewayapi.HTTPRouteInterface

func (*MockHTTPRouteClient) Create added in v0.4.0

func (*MockHTTPRouteClient) Delete added in v0.4.0

func (m *MockHTTPRouteClient) Delete(ctx context.Context, name string, opts metav1.DeleteOptions) error

func (*MockHTTPRouteClient) DeleteCollection added in v0.4.0

func (m *MockHTTPRouteClient) DeleteCollection(ctx context.Context, opts metav1.DeleteOptions, listOpts metav1.ListOptions) error

func (*MockHTTPRouteClient) Get added in v0.4.0

func (*MockHTTPRouteClient) List added in v0.4.0

func (*MockHTTPRouteClient) Patch added in v0.4.0

func (m *MockHTTPRouteClient) Patch(ctx context.Context, name string, pt k8stype.PatchType, data []byte, opts metav1.PatchOptions, subresources ...string) (result *gatewayv1.HTTPRoute, err error)

func (*MockHTTPRouteClient) Update added in v0.4.0

func (*MockHTTPRouteClient) UpdateStatus added in v0.4.0

func (*MockHTTPRouteClient) Watch added in v0.4.0

type Server

type Server struct {
	// contains filtered or unexported fields
}

func NewServer

func NewServer(redisClient *redis.Client, client kubernetes.Interface, gatewayClient gatewayapi.Interface) *Server

func (*Server) HandleRequestBody

func (*Server) HandleRequestHeaders

func (*Server) HandleResponseBody

func (s *Server) HandleResponseBody(ctx context.Context, requestID string, req *extProcPb.ProcessingRequest, user utils.User, rpm int64, model string, stream bool, traceTerm int64, hasCompleted bool) (*extProcPb.ProcessingResponse, bool)

func (*Server) HandleResponseHeaders

func (s *Server) HandleResponseHeaders(ctx context.Context, requestID string, model string, req *extProcPb.ProcessingRequest) (*extProcPb.ProcessingResponse, bool, int)

func (*Server) Process

func (*Server) Shutdown added in v0.4.0

func (s *Server) Shutdown()

func (*Server) StartMetricsServer added in v0.4.0

func (s *Server) StartMetricsServer(addr string) error

Directories

Path Synopsis
vtc
Package vtc implements the Virtual Token Counter routing algorithms focused on fairness and utilization
Package vtc implements the Virtual Token Counter routing algorithms focused on fairness and utilization

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL