Documentation
¶
Overview ¶
Package disaggpb defines the gRPC service contracts for disaggregated prefill/decode serving. A PrefillWorker runs the prompt-encoding phase and streams KV blocks back, while a DecodeWorker consumes those blocks and streams generated tokens. Stability: alpha
Index ¶
- Constants
- Variables
- func RegisterDecodeWorkerServer(s grpc.ServiceRegistrar, srv DecodeWorkerServer)
- func RegisterPrefillWorkerServer(s grpc.ServiceRegistrar, srv PrefillWorkerServer)
- type DecodeRequest
- type DecodeWorkerClient
- type DecodeWorkerServer
- type KVBlock
- type KVBlockStream
- type PreFillRequest
- type PrefillWorkerClient
- type PrefillWorkerServer
- type TokenStream
- type UnimplementedDecodeWorkerServer
- type UnimplementedPrefillWorkerServer
- type UnsafeDecodeWorkerServer
- type UnsafePrefillWorkerServer
Constants ¶
const DecodeWorker_Decode_FullMethodName = "/disagg.DecodeWorker/Decode"
const PrefillWorker_Prefill_FullMethodName = "/disagg.PrefillWorker/Prefill"
Variables ¶
var DecodeWorker_ServiceDesc = grpc.ServiceDesc{ ServiceName: "disagg.DecodeWorker", HandlerType: (*DecodeWorkerServer)(nil), Methods: []grpc.MethodDesc{}, Streams: []grpc.StreamDesc{ { StreamName: "Decode", Handler: _DecodeWorker_Decode_Handler, ServerStreams: true, ClientStreams: false, }, }, Metadata: "serve/disaggregated/proto/disagg.proto", }
DecodeWorker_ServiceDesc is the grpc.ServiceDesc for DecodeWorker service.
var PrefillWorker_ServiceDesc = grpc.ServiceDesc{ ServiceName: "disagg.PrefillWorker", HandlerType: (*PrefillWorkerServer)(nil), Methods: []grpc.MethodDesc{}, Streams: []grpc.StreamDesc{ { StreamName: "Prefill", Handler: _PrefillWorker_Prefill_Handler, ServerStreams: true, ClientStreams: false, }, }, Metadata: "serve/disaggregated/proto/disagg.proto", }
PrefillWorker_ServiceDesc is the grpc.ServiceDesc for PrefillWorker service.
Functions ¶
func RegisterDecodeWorkerServer ¶
func RegisterDecodeWorkerServer(s grpc.ServiceRegistrar, srv DecodeWorkerServer)
func RegisterPrefillWorkerServer ¶
func RegisterPrefillWorkerServer(s grpc.ServiceRegistrar, srv PrefillWorkerServer)
Types ¶
type DecodeRequest ¶
type DecodeRequest struct {
RequestId string `protobuf:"bytes,1,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"`
KvBlocks []*KVBlock `protobuf:"bytes,2,rep,name=kv_blocks,json=kvBlocks,proto3" json:"kv_blocks,omitempty"`
TokenIds []int32 `protobuf:"varint,3,rep,packed,name=token_ids,json=tokenIds,proto3" json:"token_ids,omitempty"`
MaxNewTokens int32 `protobuf:"varint,4,opt,name=max_new_tokens,json=maxNewTokens,proto3" json:"max_new_tokens,omitempty"`
Temperature float32 `protobuf:"fixed32,5,opt,name=temperature,proto3" json:"temperature,omitempty"`
}
DecodeRequest is sent by the router to a decode worker.
func (*DecodeRequest) GetKvBlocks ¶
func (x *DecodeRequest) GetKvBlocks() []*KVBlock
func (*DecodeRequest) GetMaxNewTokens ¶
func (x *DecodeRequest) GetMaxNewTokens() int32
func (*DecodeRequest) GetRequestId ¶
func (x *DecodeRequest) GetRequestId() string
func (*DecodeRequest) GetTemperature ¶
func (x *DecodeRequest) GetTemperature() float32
func (*DecodeRequest) GetTokenIds ¶
func (x *DecodeRequest) GetTokenIds() []int32
type DecodeWorkerClient ¶
type DecodeWorkerClient interface {
Decode(ctx context.Context, in *DecodeRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[TokenStream], error)
}
DecodeWorkerClient is the client API for DecodeWorker service.
func NewDecodeWorkerClient ¶
func NewDecodeWorkerClient(cc grpc.ClientConnInterface) DecodeWorkerClient
NewDecodeWorkerClient returns a new DecodeWorkerClient.
type DecodeWorkerServer ¶
type DecodeWorkerServer interface {
Decode(*DecodeRequest, grpc.ServerStreamingServer[TokenStream]) error
// contains filtered or unexported methods
}
DecodeWorkerServer is the server API for DecodeWorker service.
type KVBlock ¶
type KVBlock struct {
RequestId string `protobuf:"bytes,1,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"`
LayerIdx int32 `protobuf:"varint,2,opt,name=layer_idx,json=layerIdx,proto3" json:"layer_idx,omitempty"`
BlockIdx int32 `protobuf:"varint,3,opt,name=block_idx,json=blockIdx,proto3" json:"block_idx,omitempty"`
KData []byte `protobuf:"bytes,4,opt,name=k_data,json=kData,proto3" json:"k_data,omitempty"`
VData []byte `protobuf:"bytes,5,opt,name=v_data,json=vData,proto3" json:"v_data,omitempty"`
}
KVBlock carries a single key-value cache block for one layer.
func (*KVBlock) GetBlockIdx ¶
func (*KVBlock) GetLayerIdx ¶
func (*KVBlock) GetRequestId ¶
type KVBlockStream ¶
type KVBlockStream struct {
Block *KVBlock `protobuf:"bytes,1,opt,name=block,proto3" json:"block,omitempty"`
Done bool `protobuf:"varint,2,opt,name=done,proto3" json:"done,omitempty"`
}
KVBlockStream wraps a KVBlock with a done flag for streaming.
func (*KVBlockStream) GetBlock ¶
func (x *KVBlockStream) GetBlock() *KVBlock
func (*KVBlockStream) GetDone ¶
func (x *KVBlockStream) GetDone() bool
type PreFillRequest ¶
type PreFillRequest struct {
RequestId string `protobuf:"bytes,1,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"`
TokenIds []int32 `protobuf:"varint,2,rep,packed,name=token_ids,json=tokenIds,proto3" json:"token_ids,omitempty"`
MaxNewTokens int32 `protobuf:"varint,3,opt,name=max_new_tokens,json=maxNewTokens,proto3" json:"max_new_tokens,omitempty"`
Temperature float32 `protobuf:"fixed32,4,opt,name=temperature,proto3" json:"temperature,omitempty"`
}
PreFillRequest is sent by the router to a prefill worker.
func (*PreFillRequest) GetMaxNewTokens ¶
func (x *PreFillRequest) GetMaxNewTokens() int32
func (*PreFillRequest) GetRequestId ¶
func (x *PreFillRequest) GetRequestId() string
func (*PreFillRequest) GetTemperature ¶
func (x *PreFillRequest) GetTemperature() float32
func (*PreFillRequest) GetTokenIds ¶
func (x *PreFillRequest) GetTokenIds() []int32
type PrefillWorkerClient ¶
type PrefillWorkerClient interface {
Prefill(ctx context.Context, in *PreFillRequest, opts ...grpc.CallOption) (grpc.ServerStreamingClient[KVBlockStream], error)
}
PrefillWorkerClient is the client API for PrefillWorker service.
func NewPrefillWorkerClient ¶
func NewPrefillWorkerClient(cc grpc.ClientConnInterface) PrefillWorkerClient
NewPrefillWorkerClient returns a new PrefillWorkerClient.
type PrefillWorkerServer ¶
type PrefillWorkerServer interface {
Prefill(*PreFillRequest, grpc.ServerStreamingServer[KVBlockStream]) error
// contains filtered or unexported methods
}
PrefillWorkerServer is the server API for PrefillWorker service.
type TokenStream ¶
type TokenStream struct {
RequestId string `protobuf:"bytes,1,opt,name=request_id,json=requestId,proto3" json:"request_id,omitempty"`
TokenId int32 `protobuf:"varint,2,opt,name=token_id,json=tokenId,proto3" json:"token_id,omitempty"`
Done bool `protobuf:"varint,3,opt,name=done,proto3" json:"done,omitempty"`
FinishReason string `protobuf:"bytes,4,opt,name=finish_reason,json=finishReason,proto3" json:"finish_reason,omitempty"`
}
TokenStream is streamed back from a decode worker.
func (*TokenStream) GetDone ¶
func (x *TokenStream) GetDone() bool
func (*TokenStream) GetFinishReason ¶
func (x *TokenStream) GetFinishReason() string
func (*TokenStream) GetRequestId ¶
func (x *TokenStream) GetRequestId() string
func (*TokenStream) GetTokenId ¶
func (x *TokenStream) GetTokenId() int32
type UnimplementedDecodeWorkerServer ¶
type UnimplementedDecodeWorkerServer struct{}
UnimplementedDecodeWorkerServer should be embedded to have forward compatible implementations.
func (UnimplementedDecodeWorkerServer) Decode ¶
func (UnimplementedDecodeWorkerServer) Decode(*DecodeRequest, grpc.ServerStreamingServer[TokenStream]) error
type UnimplementedPrefillWorkerServer ¶
type UnimplementedPrefillWorkerServer struct{}
UnimplementedPrefillWorkerServer should be embedded to have forward compatible implementations.
func (UnimplementedPrefillWorkerServer) Prefill ¶
func (UnimplementedPrefillWorkerServer) Prefill(*PreFillRequest, grpc.ServerStreamingServer[KVBlockStream]) error
type UnsafeDecodeWorkerServer ¶
type UnsafeDecodeWorkerServer interface {
// contains filtered or unexported methods
}
UnsafeDecodeWorkerServer may be embedded to opt out of forward compatibility.
type UnsafePrefillWorkerServer ¶
type UnsafePrefillWorkerServer interface {
// contains filtered or unexported methods
}
UnsafePrefillWorkerServer may be embedded to opt out of forward compatibility.