disaggpb

package
v1.38.5 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 1, 2026 License: Apache-2.0 Imports: 4 Imported by: 0

Documentation

Overview

Package disaggpb defines the gRPC service contracts for disaggregated prefill/decode serving. A PrefillWorker runs the prompt-encoding phase and streams KV blocks back, while a DecodeWorker consumes those blocks and streams generated tokens. Stability: alpha

Index

Constants

View Source
const DecodeWorker_Decode_FullMethodName = "/disagg.DecodeWorker/Decode"
View Source
const PrefillWorker_Prefill_FullMethodName = "/disagg.PrefillWorker/Prefill"

Variables

View Source
var DecodeWorker_ServiceDesc = grpc.ServiceDesc{
	ServiceName: "disagg.DecodeWorker",
	HandlerType: (*DecodeWorkerServer)(nil),
	Methods:     []grpc.MethodDesc{},
	Streams: []grpc.StreamDesc{
		{
			StreamName:    "Decode",
			Handler:       _DecodeWorker_Decode_Handler,
			ServerStreams: true,
			ClientStreams: false,
		},
	},
	Metadata: "serve/disaggregated/proto/disagg.proto",
}

DecodeWorker_ServiceDesc is the grpc.ServiceDesc for DecodeWorker service.

View Source
var PrefillWorker_ServiceDesc = grpc.ServiceDesc{
	ServiceName: "disagg.PrefillWorker",
	HandlerType: (*PrefillWorkerServer)(nil),
	Methods:     []grpc.MethodDesc{},
	Streams: []grpc.StreamDesc{
		{
			StreamName:    "Prefill",
			Handler:       _PrefillWorker_Prefill_Handler,
			ServerStreams: true,
			ClientStreams: false,
		},
	},
	Metadata: "serve/disaggregated/proto/disagg.proto",
}

PrefillWorker_ServiceDesc is the grpc.ServiceDesc for PrefillWorker service.

Functions

func RegisterDecodeWorkerServer

func RegisterDecodeWorkerServer(s grpc.ServiceRegistrar, srv DecodeWorkerServer)

func RegisterPrefillWorkerServer

func RegisterPrefillWorkerServer(s grpc.ServiceRegistrar, srv PrefillWorkerServer)

Types

type DecodeRequest

type DecodeRequest struct {
	RequestId    string     `protobuf:"bytes,1,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"`
	KvBlocks     []*KVBlock `protobuf:"bytes,2,rep,name=kv_blocks,json=kvBlocks,proto3" json:"kv_blocks,omitempty"`
	TokenIds     []int32    `protobuf:"varint,3,rep,packed,name=token_ids,json=tokenIds,proto3" json:"token_ids,omitempty"`
	MaxNewTokens int32      `protobuf:"varint,4,opt,name=max_new_tokens,json=maxNewTokens,proto3" json:"max_new_tokens,omitempty"`
	Temperature  float32    `protobuf:"fixed32,5,opt,name=temperature,proto3" json:"temperature,omitempty"`
}

DecodeRequest is sent by the router to a decode worker.

func (*DecodeRequest) GetKvBlocks

func (x *DecodeRequest) GetKvBlocks() []*KVBlock

func (*DecodeRequest) GetMaxNewTokens

func (x *DecodeRequest) GetMaxNewTokens() int32

func (*DecodeRequest) GetRequestId

func (x *DecodeRequest) GetRequestId() string

func (*DecodeRequest) GetTemperature

func (x *DecodeRequest) GetTemperature() float32

func (*DecodeRequest) GetTokenIds

func (x *DecodeRequest) GetTokenIds() []int32

type DecodeWorkerClient

type DecodeWorkerClient interface {
	Decode(ctx context.Context, in *DecodeRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[TokenStream], error)
}

DecodeWorkerClient is the client API for DecodeWorker service.

func NewDecodeWorkerClient

func NewDecodeWorkerClient(cc grpc.ClientConnInterface) DecodeWorkerClient

NewDecodeWorkerClient returns a new DecodeWorkerClient.

type DecodeWorkerServer

type DecodeWorkerServer interface {
	Decode(*DecodeRequest, grpc.ServerStreamingServer[TokenStream]) error
	// contains filtered or unexported methods
}

DecodeWorkerServer is the server API for DecodeWorker service.

type KVBlock

type KVBlock struct {
	RequestId string `protobuf:"bytes,1,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"`
	LayerIdx  int32  `protobuf:"varint,2,opt,name=layer_idx,json=layerIdx,proto3" json:"layer_idx,omitempty"`
	BlockIdx  int32  `protobuf:"varint,3,opt,name=block_idx,json=blockIdx,proto3" json:"block_idx,omitempty"`
	KData     []byte `protobuf:"bytes,4,opt,name=k_data,json=kData,proto3" json:"k_data,omitempty"`
	VData     []byte `protobuf:"bytes,5,opt,name=v_data,json=vData,proto3" json:"v_data,omitempty"`
}

KVBlock carries a single key-value cache block for one layer.

func (*KVBlock) GetBlockIdx

func (x *KVBlock) GetBlockIdx() int32

func (*KVBlock) GetKData

func (x *KVBlock) GetKData() []byte

func (*KVBlock) GetLayerIdx

func (x *KVBlock) GetLayerIdx() int32

func (*KVBlock) GetRequestId

func (x *KVBlock) GetRequestId() string

func (*KVBlock) GetVData

func (x *KVBlock) GetVData() []byte

type KVBlockStream

type KVBlockStream struct {
	Block *KVBlock `protobuf:"bytes,1,opt,name=block,proto3" json:"block,omitempty"`
	Done  bool     `protobuf:"varint,2,opt,name=done,proto3" json:"done,omitempty"`
}

KVBlockStream wraps a KVBlock with a done flag for streaming.

func (*KVBlockStream) GetBlock

func (x *KVBlockStream) GetBlock() *KVBlock

func (*KVBlockStream) GetDone

func (x *KVBlockStream) GetDone() bool

type PreFillRequest

type PreFillRequest struct {
	RequestId    string  `protobuf:"bytes,1,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"`
	TokenIds     []int32 `protobuf:"varint,2,rep,packed,name=token_ids,json=tokenIds,proto3" json:"token_ids,omitempty"`
	MaxNewTokens int32   `protobuf:"varint,3,opt,name=max_new_tokens,json=maxNewTokens,proto3" json:"max_new_tokens,omitempty"`
	Temperature  float32 `protobuf:"fixed32,4,opt,name=temperature,proto3" json:"temperature,omitempty"`
}

PreFillRequest is sent by the router to a prefill worker.

func (*PreFillRequest) GetMaxNewTokens

func (x *PreFillRequest) GetMaxNewTokens() int32

func (*PreFillRequest) GetRequestId

func (x *PreFillRequest) GetRequestId() string

func (*PreFillRequest) GetTemperature

func (x *PreFillRequest) GetTemperature() float32

func (*PreFillRequest) GetTokenIds

func (x *PreFillRequest) GetTokenIds() []int32

type PrefillWorkerClient

type PrefillWorkerClient interface {
	Prefill(ctx context.Context, in *PreFillRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[KVBlockStream], error)
}

PrefillWorkerClient is the client API for PrefillWorker service.

func NewPrefillWorkerClient

func NewPrefillWorkerClient(cc grpc.ClientConnInterface) PrefillWorkerClient

NewPrefillWorkerClient returns a new PrefillWorkerClient.

type PrefillWorkerServer

type PrefillWorkerServer interface {
	Prefill(*PreFillRequest, grpc.ServerStreamingServer[KVBlockStream]) error
	// contains filtered or unexported methods
}

PrefillWorkerServer is the server API for PrefillWorker service.

type TokenStream

type TokenStream struct {
	RequestId    string `protobuf:"bytes,1,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"`
	TokenId      int32  `protobuf:"varint,2,opt,name=token_id,json=tokenId,proto3" json:"token_id,omitempty"`
	Done         bool   `protobuf:"varint,3,opt,name=done,proto3" json:"done,omitempty"`
	FinishReason string `protobuf:"bytes,4,opt,name=finish_reason,json=finishReason,proto3" json:"finish_reason,omitempty"`
}

TokenStream is streamed back from a decode worker.

func (*TokenStream) GetDone

func (x *TokenStream) GetDone() bool

func (*TokenStream) GetFinishReason

func (x *TokenStream) GetFinishReason() string

func (*TokenStream) GetRequestId

func (x *TokenStream) GetRequestId() string

func (*TokenStream) GetTokenId

func (x *TokenStream) GetTokenId() int32

type UnimplementedDecodeWorkerServer

type UnimplementedDecodeWorkerServer struct{}

UnimplementedDecodeWorkerServer should be embedded to have forward compatible implementations.

func (UnimplementedDecodeWorkerServer) Decode

type UnimplementedPrefillWorkerServer

type UnimplementedPrefillWorkerServer struct{}

UnimplementedPrefillWorkerServer should be embedded to have forward compatible implementations.

func (UnimplementedPrefillWorkerServer) Prefill

type UnsafeDecodeWorkerServer

type UnsafeDecodeWorkerServer interface {
	// contains filtered or unexported methods
}

UnsafeDecodeWorkerServer may be embedded to opt out of forward compatibility.

type UnsafePrefillWorkerServer

type UnsafePrefillWorkerServer interface {
	// contains filtered or unexported methods
}

UnsafePrefillWorkerServer may be embedded to opt out of forward compatibility.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL