Documentation
¶
Index ¶
- Constants
- Variables
- func RegisterGkeInferenceQuickstartServer(s grpc.ServiceRegistrar, srv GkeInferenceQuickstartServer)
- type Amount
- type Cost
- func (*Cost) Descriptor() ([]byte, []int)deprecated
- func (x *Cost) GetCostPerMillionInputTokens() *Amount
- func (x *Cost) GetCostPerMillionOutputTokens() *Amount
- func (x *Cost) GetOutputInputCostRatio() float32
- func (x *Cost) GetPricingModel() string
- func (*Cost) ProtoMessage()
- func (x *Cost) ProtoReflect() protoreflect.Message
- func (x *Cost) Reset()
- func (x *Cost) String() string
- type FetchBenchmarkingDataRequest
- func (*FetchBenchmarkingDataRequest) Descriptor() ([]byte, []int)deprecated
- func (x *FetchBenchmarkingDataRequest) GetInstanceType() string
- func (x *FetchBenchmarkingDataRequest) GetModelServerInfo() *ModelServerInfo
- func (x *FetchBenchmarkingDataRequest) GetPricingModel() string
- func (*FetchBenchmarkingDataRequest) ProtoMessage()
- func (x *FetchBenchmarkingDataRequest) ProtoReflect() protoreflect.Message
- func (x *FetchBenchmarkingDataRequest) Reset()
- func (x *FetchBenchmarkingDataRequest) String() string
- type FetchBenchmarkingDataResponse
- func (*FetchBenchmarkingDataResponse) Descriptor() ([]byte, []int)deprecated
- func (x *FetchBenchmarkingDataResponse) GetProfile() []*Profile
- func (*FetchBenchmarkingDataResponse) ProtoMessage()
- func (x *FetchBenchmarkingDataResponse) ProtoReflect() protoreflect.Message
- func (x *FetchBenchmarkingDataResponse) Reset()
- func (x *FetchBenchmarkingDataResponse) String() string
- type FetchModelServerVersionsRequest
- func (*FetchModelServerVersionsRequest) Descriptor() ([]byte, []int)deprecated
- func (x *FetchModelServerVersionsRequest) GetModel() string
- func (x *FetchModelServerVersionsRequest) GetModelServer() string
- func (x *FetchModelServerVersionsRequest) GetPageSize() int32
- func (x *FetchModelServerVersionsRequest) GetPageToken() string
- func (*FetchModelServerVersionsRequest) ProtoMessage()
- func (x *FetchModelServerVersionsRequest) ProtoReflect() protoreflect.Message
- func (x *FetchModelServerVersionsRequest) Reset()
- func (x *FetchModelServerVersionsRequest) String() string
- type FetchModelServerVersionsResponse
- func (*FetchModelServerVersionsResponse) Descriptor() ([]byte, []int)deprecated
- func (x *FetchModelServerVersionsResponse) GetModelServerVersions() []string
- func (x *FetchModelServerVersionsResponse) GetNextPageToken() string
- func (*FetchModelServerVersionsResponse) ProtoMessage()
- func (x *FetchModelServerVersionsResponse) ProtoReflect() protoreflect.Message
- func (x *FetchModelServerVersionsResponse) Reset()
- func (x *FetchModelServerVersionsResponse) String() string
- type FetchModelServersRequest
- func (*FetchModelServersRequest) Descriptor() ([]byte, []int)deprecated
- func (x *FetchModelServersRequest) GetModel() string
- func (x *FetchModelServersRequest) GetPageSize() int32
- func (x *FetchModelServersRequest) GetPageToken() string
- func (*FetchModelServersRequest) ProtoMessage()
- func (x *FetchModelServersRequest) ProtoReflect() protoreflect.Message
- func (x *FetchModelServersRequest) Reset()
- func (x *FetchModelServersRequest) String() string
- type FetchModelServersResponse
- func (*FetchModelServersResponse) Descriptor() ([]byte, []int)deprecated
- func (x *FetchModelServersResponse) GetModelServers() []string
- func (x *FetchModelServersResponse) GetNextPageToken() string
- func (*FetchModelServersResponse) ProtoMessage()
- func (x *FetchModelServersResponse) ProtoReflect() protoreflect.Message
- func (x *FetchModelServersResponse) Reset()
- func (x *FetchModelServersResponse) String() string
- type FetchModelsRequest
- func (*FetchModelsRequest) Descriptor() ([]byte, []int)deprecated
- func (x *FetchModelsRequest) GetPageSize() int32
- func (x *FetchModelsRequest) GetPageToken() string
- func (*FetchModelsRequest) ProtoMessage()
- func (x *FetchModelsRequest) ProtoReflect() protoreflect.Message
- func (x *FetchModelsRequest) Reset()
- func (x *FetchModelsRequest) String() string
- type FetchModelsResponse
- func (*FetchModelsResponse) Descriptor() ([]byte, []int)deprecated
- func (x *FetchModelsResponse) GetModels() []string
- func (x *FetchModelsResponse) GetNextPageToken() string
- func (*FetchModelsResponse) ProtoMessage()
- func (x *FetchModelsResponse) ProtoReflect() protoreflect.Message
- func (x *FetchModelsResponse) Reset()
- func (x *FetchModelsResponse) String() string
- type FetchProfilesRequest
- func (*FetchProfilesRequest) Descriptor() ([]byte, []int)deprecated
- func (x *FetchProfilesRequest) GetModel() string
- func (x *FetchProfilesRequest) GetModelServer() string
- func (x *FetchProfilesRequest) GetModelServerVersion() string
- func (x *FetchProfilesRequest) GetPageSize() int32
- func (x *FetchProfilesRequest) GetPageToken() string
- func (x *FetchProfilesRequest) GetPerformanceRequirements() *PerformanceRequirements
- func (*FetchProfilesRequest) ProtoMessage()
- func (x *FetchProfilesRequest) ProtoReflect() protoreflect.Message
- func (x *FetchProfilesRequest) Reset()
- func (x *FetchProfilesRequest) String() string
- type FetchProfilesResponse
- func (*FetchProfilesResponse) Descriptor() ([]byte, []int)deprecated
- func (x *FetchProfilesResponse) GetComments() string
- func (x *FetchProfilesResponse) GetNextPageToken() string
- func (x *FetchProfilesResponse) GetPerformanceRange() *PerformanceRange
- func (x *FetchProfilesResponse) GetProfile() []*Profile
- func (*FetchProfilesResponse) ProtoMessage()
- func (x *FetchProfilesResponse) ProtoReflect() protoreflect.Message
- func (x *FetchProfilesResponse) Reset()
- func (x *FetchProfilesResponse) String() string
- type GenerateOptimizedManifestRequest
- func (*GenerateOptimizedManifestRequest) Descriptor() ([]byte, []int)deprecated
- func (x *GenerateOptimizedManifestRequest) GetAcceleratorType() string
- func (x *GenerateOptimizedManifestRequest) GetKubernetesNamespace() string
- func (x *GenerateOptimizedManifestRequest) GetModelServerInfo() *ModelServerInfo
- func (x *GenerateOptimizedManifestRequest) GetPerformanceRequirements() *PerformanceRequirements
- func (x *GenerateOptimizedManifestRequest) GetStorageConfig() *StorageConfig
- func (*GenerateOptimizedManifestRequest) ProtoMessage()
- func (x *GenerateOptimizedManifestRequest) ProtoReflect() protoreflect.Message
- func (x *GenerateOptimizedManifestRequest) Reset()
- func (x *GenerateOptimizedManifestRequest) String() string
- type GenerateOptimizedManifestResponse
- func (*GenerateOptimizedManifestResponse) Descriptor() ([]byte, []int)deprecated
- func (x *GenerateOptimizedManifestResponse) GetComments() []string
- func (x *GenerateOptimizedManifestResponse) GetKubernetesManifests() []*KubernetesManifest
- func (x *GenerateOptimizedManifestResponse) GetManifestVersion() string
- func (*GenerateOptimizedManifestResponse) ProtoMessage()
- func (x *GenerateOptimizedManifestResponse) ProtoReflect() protoreflect.Message
- func (x *GenerateOptimizedManifestResponse) Reset()
- func (x *GenerateOptimizedManifestResponse) String() string
- type GkeInferenceQuickstartClient
- type GkeInferenceQuickstartServer
- type KubernetesManifest
- func (*KubernetesManifest) Descriptor() ([]byte, []int)deprecated
- func (x *KubernetesManifest) GetApiVersion() string
- func (x *KubernetesManifest) GetContent() string
- func (x *KubernetesManifest) GetKind() string
- func (*KubernetesManifest) ProtoMessage()
- func (x *KubernetesManifest) ProtoReflect() protoreflect.Message
- func (x *KubernetesManifest) Reset()
- func (x *KubernetesManifest) String() string
- type MillisecondRange
- func (*MillisecondRange) Descriptor() ([]byte, []int)deprecated
- func (x *MillisecondRange) GetMax() int32
- func (x *MillisecondRange) GetMin() int32
- func (*MillisecondRange) ProtoMessage()
- func (x *MillisecondRange) ProtoReflect() protoreflect.Message
- func (x *MillisecondRange) Reset()
- func (x *MillisecondRange) String() string
- type ModelServerInfo
- func (*ModelServerInfo) Descriptor() ([]byte, []int)deprecated
- func (x *ModelServerInfo) GetModel() string
- func (x *ModelServerInfo) GetModelServer() string
- func (x *ModelServerInfo) GetModelServerVersion() string
- func (*ModelServerInfo) ProtoMessage()
- func (x *ModelServerInfo) ProtoReflect() protoreflect.Message
- func (x *ModelServerInfo) Reset()
- func (x *ModelServerInfo) String() string
- type PerformanceRange
- func (*PerformanceRange) Descriptor() ([]byte, []int)deprecated
- func (x *PerformanceRange) GetNtpotRange() *MillisecondRange
- func (x *PerformanceRange) GetThroughputOutputRange() *TokensPerSecondRange
- func (x *PerformanceRange) GetTtftRange() *MillisecondRange
- func (*PerformanceRange) ProtoMessage()
- func (x *PerformanceRange) ProtoReflect() protoreflect.Message
- func (x *PerformanceRange) Reset()
- func (x *PerformanceRange) String() string
- type PerformanceRequirements
- func (*PerformanceRequirements) Descriptor() ([]byte, []int)deprecated
- func (x *PerformanceRequirements) GetTargetCost() *Cost
- func (x *PerformanceRequirements) GetTargetNtpotMilliseconds() int32
- func (x *PerformanceRequirements) GetTargetTtftMilliseconds() int32
- func (*PerformanceRequirements) ProtoMessage()
- func (x *PerformanceRequirements) ProtoReflect() protoreflect.Message
- func (x *PerformanceRequirements) Reset()
- func (x *PerformanceRequirements) String() string
- type PerformanceStats
- func (*PerformanceStats) Descriptor() ([]byte, []int)deprecated
- func (x *PerformanceStats) GetCost() []*Cost
- func (x *PerformanceStats) GetNtpotMilliseconds() int32
- func (x *PerformanceStats) GetOutputTokensPerSecond() int32
- func (x *PerformanceStats) GetQueriesPerSecond() float32
- func (x *PerformanceStats) GetTtftMilliseconds() int32
- func (*PerformanceStats) ProtoMessage()
- func (x *PerformanceStats) ProtoReflect() protoreflect.Message
- func (x *PerformanceStats) Reset()
- func (x *PerformanceStats) String() string
- type Profile
- func (*Profile) Descriptor() ([]byte, []int)deprecated
- func (x *Profile) GetAcceleratorType() string
- func (x *Profile) GetInstanceType() string
- func (x *Profile) GetModelServerInfo() *ModelServerInfo
- func (x *Profile) GetPerformanceStats() []*PerformanceStats
- func (x *Profile) GetResourcesUsed() *ResourcesUsed
- func (x *Profile) GetTpuTopology() string
- func (*Profile) ProtoMessage()
- func (x *Profile) ProtoReflect() protoreflect.Message
- func (x *Profile) Reset()
- func (x *Profile) String() string
- type ResourcesUsed
- type StorageConfig
- func (*StorageConfig) Descriptor() ([]byte, []int)deprecated
- func (x *StorageConfig) GetModelBucketUri() string
- func (x *StorageConfig) GetXlaCacheBucketUri() string
- func (*StorageConfig) ProtoMessage()
- func (x *StorageConfig) ProtoReflect() protoreflect.Message
- func (x *StorageConfig) Reset()
- func (x *StorageConfig) String() string
- type TokensPerSecondRange
- func (*TokensPerSecondRange) Descriptor() ([]byte, []int)deprecated
- func (x *TokensPerSecondRange) GetMax() int32
- func (x *TokensPerSecondRange) GetMin() int32
- func (*TokensPerSecondRange) ProtoMessage()
- func (x *TokensPerSecondRange) ProtoReflect() protoreflect.Message
- func (x *TokensPerSecondRange) Reset()
- func (x *TokensPerSecondRange) String() string
- type UnimplementedGkeInferenceQuickstartServer
- func (UnimplementedGkeInferenceQuickstartServer) FetchBenchmarkingData(context.Context, *FetchBenchmarkingDataRequest) (*FetchBenchmarkingDataResponse, error)
- func (UnimplementedGkeInferenceQuickstartServer) FetchModelServerVersions(context.Context, *FetchModelServerVersionsRequest) (*FetchModelServerVersionsResponse, error)
- func (UnimplementedGkeInferenceQuickstartServer) FetchModelServers(context.Context, *FetchModelServersRequest) (*FetchModelServersResponse, error)
- func (UnimplementedGkeInferenceQuickstartServer) FetchModels(context.Context, *FetchModelsRequest) (*FetchModelsResponse, error)
- func (UnimplementedGkeInferenceQuickstartServer) FetchProfiles(context.Context, *FetchProfilesRequest) (*FetchProfilesResponse, error)
- func (UnimplementedGkeInferenceQuickstartServer) GenerateOptimizedManifest(context.Context, *GenerateOptimizedManifestRequest) (*GenerateOptimizedManifestResponse, error)
- type UnsafeGkeInferenceQuickstartServer
Constants ¶
const ( GkeInferenceQuickstart_FetchModels_FullMethodName = "/google.cloud.gkerecommender.v1.GkeInferenceQuickstart/FetchModels" GkeInferenceQuickstart_FetchModelServers_FullMethodName = "/google.cloud.gkerecommender.v1.GkeInferenceQuickstart/FetchModelServers" GkeInferenceQuickstart_FetchModelServerVersions_FullMethodName = "/google.cloud.gkerecommender.v1.GkeInferenceQuickstart/FetchModelServerVersions" GkeInferenceQuickstart_FetchProfiles_FullMethodName = "/google.cloud.gkerecommender.v1.GkeInferenceQuickstart/FetchProfiles" GkeInferenceQuickstart_GenerateOptimizedManifest_FullMethodName = "/google.cloud.gkerecommender.v1.GkeInferenceQuickstart/GenerateOptimizedManifest" GkeInferenceQuickstart_FetchBenchmarkingData_FullMethodName = "/google.cloud.gkerecommender.v1.GkeInferenceQuickstart/FetchBenchmarkingData" )
Variables ¶
var File_google_cloud_gkerecommender_v1_gkerecommender_proto protoreflect.FileDescriptor
var GkeInferenceQuickstart_ServiceDesc = grpc.ServiceDesc{ ServiceName: "google.cloud.gkerecommender.v1.GkeInferenceQuickstart", HandlerType: (*GkeInferenceQuickstartServer)(nil), Methods: []grpc.MethodDesc{ { MethodName: "FetchModels", Handler: _GkeInferenceQuickstart_FetchModels_Handler, }, { MethodName: "FetchModelServers", Handler: _GkeInferenceQuickstart_FetchModelServers_Handler, }, { MethodName: "FetchModelServerVersions", Handler: _GkeInferenceQuickstart_FetchModelServerVersions_Handler, }, { MethodName: "FetchProfiles", Handler: _GkeInferenceQuickstart_FetchProfiles_Handler, }, { MethodName: "GenerateOptimizedManifest", Handler: _GkeInferenceQuickstart_GenerateOptimizedManifest_Handler, }, { MethodName: "FetchBenchmarkingData", Handler: _GkeInferenceQuickstart_FetchBenchmarkingData_Handler, }, }, Streams: []grpc.StreamDesc{}, Metadata: "google/cloud/gkerecommender/v1/gkerecommender.proto", }
GkeInferenceQuickstart_ServiceDesc is the grpc.ServiceDesc for GkeInferenceQuickstart service. It's only intended for direct use with grpc.RegisterService, and not to be introspected or modified (even as a copy)
Functions ¶
func RegisterGkeInferenceQuickstartServer ¶
func RegisterGkeInferenceQuickstartServer(s grpc.ServiceRegistrar, srv GkeInferenceQuickstartServer)
Types ¶
type Amount ¶
type Amount struct {
// Output only. The whole units of the amount.
// For example if `currencyCode` is `"USD"`, then 1 unit is one US dollar.
Units int64 `protobuf:"varint,1,opt,name=units,proto3" json:"units,omitempty"`
// Output only. Number of nano (10^-9) units of the amount.
// The value must be between -999,999,999 and +999,999,999 inclusive.
// If `units` is positive, `nanos` must be positive or zero.
// If `units` is zero, `nanos` can be positive, zero, or negative.
// If `units` is negative, `nanos` must be negative or zero.
// For example $-1.75 is represented as `units`=-1 and `nanos`=-750,000,000.
Nanos int32 `protobuf:"varint,2,opt,name=nanos,proto3" json:"nanos,omitempty"`
// contains filtered or unexported fields
}
Represents an amount of money in a specific currency.
func (*Amount) Descriptor
deprecated
func (*Amount) ProtoMessage ¶
func (*Amount) ProtoMessage()
func (*Amount) ProtoReflect ¶
func (x *Amount) ProtoReflect() protoreflect.Message
type Cost ¶
type Cost struct {
// Optional. The cost per million output tokens, calculated as:
// $/output token = GPU $/s / (1/output-to-input-cost-ratio * input tokens/s +
// output tokens/s)
CostPerMillionOutputTokens *Amount `` /* 145-byte string literal not displayed */
// Optional. The cost per million input tokens. $/input token = ($/output
// token) / output-to-input-cost-ratio.
CostPerMillionInputTokens *Amount `` /* 142-byte string literal not displayed */
// Optional. The pricing model used to calculate the cost. Can be one of:
// `3-years-cud`, `1-year-cud`, `on-demand`, `spot`. If not provided, `spot`
// will be used.
PricingModel string `protobuf:"bytes,3,opt,name=pricing_model,json=pricingModel,proto3" json:"pricing_model,omitempty"`
// Optional. The output-to-input cost ratio. This determines how the total GPU
// cost is split between input and output tokens. If not provided, `4.0` is
// used, assuming a 4:1 output:input cost ratio.
OutputInputCostRatio *float32 `` /* 133-byte string literal not displayed */
// contains filtered or unexported fields
}
Cost for running a model deployment on a given instance type. Currently, only USD currency code is supported.
func (*Cost) Descriptor
deprecated
func (*Cost) GetCostPerMillionInputTokens ¶
func (*Cost) GetCostPerMillionOutputTokens ¶
func (*Cost) GetOutputInputCostRatio ¶
func (*Cost) GetPricingModel ¶
func (*Cost) ProtoMessage ¶
func (*Cost) ProtoMessage()
func (*Cost) ProtoReflect ¶
func (x *Cost) ProtoReflect() protoreflect.Message
type FetchBenchmarkingDataRequest ¶
type FetchBenchmarkingDataRequest struct {
// Required. The model server configuration to get benchmarking data for. Use
// [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles]
// to find valid configurations.
ModelServerInfo *ModelServerInfo `protobuf:"bytes,1,opt,name=model_server_info,json=modelServerInfo,proto3" json:"model_server_info,omitempty"`
// Optional. The instance type to filter benchmarking data. Instance types are
// in the format `a2-highgpu-1g`. If not provided, all instance types for the
// given profile's `model_server_info` will be returned. Use
// [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles]
// to find available instance types.
InstanceType string `protobuf:"bytes,3,opt,name=instance_type,json=instanceType,proto3" json:"instance_type,omitempty"`
// Optional. The pricing model to use for the benchmarking data. Defaults to
// `spot`.
PricingModel string `protobuf:"bytes,4,opt,name=pricing_model,json=pricingModel,proto3" json:"pricing_model,omitempty"`
// contains filtered or unexported fields
}
Request message for [GkeInferenceQuickstart.FetchBenchmarkingData][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchBenchmarkingData].
func (*FetchBenchmarkingDataRequest) Descriptor
deprecated
func (*FetchBenchmarkingDataRequest) Descriptor() ([]byte, []int)
Deprecated: Use FetchBenchmarkingDataRequest.ProtoReflect.Descriptor instead.
func (*FetchBenchmarkingDataRequest) GetInstanceType ¶
func (x *FetchBenchmarkingDataRequest) GetInstanceType() string
func (*FetchBenchmarkingDataRequest) GetModelServerInfo ¶
func (x *FetchBenchmarkingDataRequest) GetModelServerInfo() *ModelServerInfo
func (*FetchBenchmarkingDataRequest) GetPricingModel ¶
func (x *FetchBenchmarkingDataRequest) GetPricingModel() string
func (*FetchBenchmarkingDataRequest) ProtoMessage ¶
func (*FetchBenchmarkingDataRequest) ProtoMessage()
func (*FetchBenchmarkingDataRequest) ProtoReflect ¶
func (x *FetchBenchmarkingDataRequest) ProtoReflect() protoreflect.Message
func (*FetchBenchmarkingDataRequest) Reset ¶
func (x *FetchBenchmarkingDataRequest) Reset()
func (*FetchBenchmarkingDataRequest) String ¶
func (x *FetchBenchmarkingDataRequest) String() string
type FetchBenchmarkingDataResponse ¶
type FetchBenchmarkingDataResponse struct {
// Output only. List of profiles containing their respective benchmarking
// data.
Profile []*Profile `protobuf:"bytes,1,rep,name=profile,proto3" json:"profile,omitempty"`
// contains filtered or unexported fields
}
Response message for [GkeInferenceQuickstart.FetchBenchmarkingData][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchBenchmarkingData].
func (*FetchBenchmarkingDataResponse) Descriptor
deprecated
func (*FetchBenchmarkingDataResponse) Descriptor() ([]byte, []int)
Deprecated: Use FetchBenchmarkingDataResponse.ProtoReflect.Descriptor instead.
func (*FetchBenchmarkingDataResponse) GetProfile ¶
func (x *FetchBenchmarkingDataResponse) GetProfile() []*Profile
func (*FetchBenchmarkingDataResponse) ProtoMessage ¶
func (*FetchBenchmarkingDataResponse) ProtoMessage()
func (*FetchBenchmarkingDataResponse) ProtoReflect ¶
func (x *FetchBenchmarkingDataResponse) ProtoReflect() protoreflect.Message
func (*FetchBenchmarkingDataResponse) Reset ¶
func (x *FetchBenchmarkingDataResponse) Reset()
func (*FetchBenchmarkingDataResponse) String ¶
func (x *FetchBenchmarkingDataResponse) String() string
type FetchModelServerVersionsRequest ¶
type FetchModelServerVersionsRequest struct {
// Required. The model for which to list model server versions. Open-source
// models follow the Huggingface Hub `owner/model_name` format. Use
// [GkeInferenceQuickstart.FetchModels][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModels]
// to find available models.
Model string `protobuf:"bytes,1,opt,name=model,proto3" json:"model,omitempty"`
// Required. The model server for which to list versions. Open-source model
// servers use simplified, lowercase names (e.g., `vllm`). Use
// [GkeInferenceQuickstart.FetchModelServers][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServers]
// to find available model servers.
ModelServer string `protobuf:"bytes,2,opt,name=model_server,json=modelServer,proto3" json:"model_server,omitempty"`
// Optional. The target number of results to return in a single response.
// If not specified, a default value will be chosen by the service.
// Note that the response may include a partial list and a caller should
// only rely on the response's
// [next_page_token][google.cloud.gkerecommender.v1.FetchModelServerVersionsResponse.next_page_token]
// to determine if there are more instances left to be queried.
PageSize *int32 `protobuf:"varint,3,opt,name=page_size,json=pageSize,proto3,oneof" json:"page_size,omitempty"`
// Optional. The value of
// [next_page_token][google.cloud.gkerecommender.v1.FetchModelServerVersionsResponse.next_page_token]
// received from a previous `FetchModelServerVersionsRequest` call.
// Provide this to retrieve the subsequent page in a multi-page list of
// results. When paginating, all other parameters provided to
// `FetchModelServerVersionsRequest` must match the call that provided the
// page token.
PageToken *string `protobuf:"bytes,4,opt,name=page_token,json=pageToken,proto3,oneof" json:"page_token,omitempty"`
// contains filtered or unexported fields
}
Request message for [GkeInferenceQuickstart.FetchModelServerVersions][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServerVersions].
func (*FetchModelServerVersionsRequest) Descriptor
deprecated
func (*FetchModelServerVersionsRequest) Descriptor() ([]byte, []int)
Deprecated: Use FetchModelServerVersionsRequest.ProtoReflect.Descriptor instead.
func (*FetchModelServerVersionsRequest) GetModel ¶
func (x *FetchModelServerVersionsRequest) GetModel() string
func (*FetchModelServerVersionsRequest) GetModelServer ¶
func (x *FetchModelServerVersionsRequest) GetModelServer() string
func (*FetchModelServerVersionsRequest) GetPageSize ¶
func (x *FetchModelServerVersionsRequest) GetPageSize() int32
func (*FetchModelServerVersionsRequest) GetPageToken ¶
func (x *FetchModelServerVersionsRequest) GetPageToken() string
func (*FetchModelServerVersionsRequest) ProtoMessage ¶
func (*FetchModelServerVersionsRequest) ProtoMessage()
func (*FetchModelServerVersionsRequest) ProtoReflect ¶
func (x *FetchModelServerVersionsRequest) ProtoReflect() protoreflect.Message
func (*FetchModelServerVersionsRequest) Reset ¶
func (x *FetchModelServerVersionsRequest) Reset()
func (*FetchModelServerVersionsRequest) String ¶
func (x *FetchModelServerVersionsRequest) String() string
type FetchModelServerVersionsResponse ¶
type FetchModelServerVersionsResponse struct {
// Output only. A list of available model server versions.
ModelServerVersions []string `protobuf:"bytes,1,rep,name=model_server_versions,json=modelServerVersions,proto3" json:"model_server_versions,omitempty"`
// Output only. A token which may be sent as
// [page_token][FetchModelServerVersionsResponse.page_token] in a subsequent
// `FetchModelServerVersionsResponse` call to retrieve the next page of
// results. If this field is omitted or empty, then there are no more results
// to return.
NextPageToken string `protobuf:"bytes,2,opt,name=next_page_token,json=nextPageToken,proto3" json:"next_page_token,omitempty"`
// contains filtered or unexported fields
}
Response message for [GkeInferenceQuickstart.FetchModelServerVersions][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServerVersions].
func (*FetchModelServerVersionsResponse) Descriptor
deprecated
func (*FetchModelServerVersionsResponse) Descriptor() ([]byte, []int)
Deprecated: Use FetchModelServerVersionsResponse.ProtoReflect.Descriptor instead.
func (*FetchModelServerVersionsResponse) GetModelServerVersions ¶
func (x *FetchModelServerVersionsResponse) GetModelServerVersions() []string
func (*FetchModelServerVersionsResponse) GetNextPageToken ¶
func (x *FetchModelServerVersionsResponse) GetNextPageToken() string
func (*FetchModelServerVersionsResponse) ProtoMessage ¶
func (*FetchModelServerVersionsResponse) ProtoMessage()
func (*FetchModelServerVersionsResponse) ProtoReflect ¶
func (x *FetchModelServerVersionsResponse) ProtoReflect() protoreflect.Message
func (*FetchModelServerVersionsResponse) Reset ¶
func (x *FetchModelServerVersionsResponse) Reset()
func (*FetchModelServerVersionsResponse) String ¶
func (x *FetchModelServerVersionsResponse) String() string
type FetchModelServersRequest ¶
type FetchModelServersRequest struct {
// Required. The model for which to list model servers. Open-source models
// follow the Huggingface Hub `owner/model_name` format. Use
// [GkeInferenceQuickstart.FetchModels][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModels]
// to find available models.
Model string `protobuf:"bytes,1,opt,name=model,proto3" json:"model,omitempty"`
// Optional. The target number of results to return in a single response.
// If not specified, a default value will be chosen by the service.
// Note that the response may include a partial list and a caller should
// only rely on the response's
// [next_page_token][google.cloud.gkerecommender.v1.FetchModelServersResponse.next_page_token]
// to determine if there are more instances left to be queried.
PageSize *int32 `protobuf:"varint,2,opt,name=page_size,json=pageSize,proto3,oneof" json:"page_size,omitempty"`
// Optional. The value of
// [next_page_token][google.cloud.gkerecommender.v1.FetchModelServersResponse.next_page_token]
// received from a previous `FetchModelServersRequest` call.
// Provide this to retrieve the subsequent page in a multi-page list of
// results. When paginating, all other parameters provided to
// `FetchModelServersRequest` must match the call that provided the page
// token.
PageToken *string `protobuf:"bytes,3,opt,name=page_token,json=pageToken,proto3,oneof" json:"page_token,omitempty"`
// contains filtered or unexported fields
}
Request message for [GkeInferenceQuickstart.FetchModelServers][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServers].
func (*FetchModelServersRequest) Descriptor
deprecated
func (*FetchModelServersRequest) Descriptor() ([]byte, []int)
Deprecated: Use FetchModelServersRequest.ProtoReflect.Descriptor instead.
func (*FetchModelServersRequest) GetModel ¶
func (x *FetchModelServersRequest) GetModel() string
func (*FetchModelServersRequest) GetPageSize ¶
func (x *FetchModelServersRequest) GetPageSize() int32
func (*FetchModelServersRequest) GetPageToken ¶
func (x *FetchModelServersRequest) GetPageToken() string
func (*FetchModelServersRequest) ProtoMessage ¶
func (*FetchModelServersRequest) ProtoMessage()
func (*FetchModelServersRequest) ProtoReflect ¶
func (x *FetchModelServersRequest) ProtoReflect() protoreflect.Message
func (*FetchModelServersRequest) Reset ¶
func (x *FetchModelServersRequest) Reset()
func (*FetchModelServersRequest) String ¶
func (x *FetchModelServersRequest) String() string
type FetchModelServersResponse ¶
type FetchModelServersResponse struct {
// Output only. List of available model servers. Open-source model servers use
// simplified, lowercase names (e.g., `vllm`).
ModelServers []string `protobuf:"bytes,1,rep,name=model_servers,json=modelServers,proto3" json:"model_servers,omitempty"`
// Output only. A token which may be sent as
// [page_token][FetchModelServersResponse.page_token] in a subsequent
// `FetchModelServersResponse` call to retrieve the next page of results.
// If this field is omitted or empty, then there are no more results to
// return.
NextPageToken string `protobuf:"bytes,2,opt,name=next_page_token,json=nextPageToken,proto3" json:"next_page_token,omitempty"`
// contains filtered or unexported fields
}
Response message for [GkeInferenceQuickstart.FetchModelServers][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServers].
func (*FetchModelServersResponse) Descriptor
deprecated
func (*FetchModelServersResponse) Descriptor() ([]byte, []int)
Deprecated: Use FetchModelServersResponse.ProtoReflect.Descriptor instead.
func (*FetchModelServersResponse) GetModelServers ¶
func (x *FetchModelServersResponse) GetModelServers() []string
func (*FetchModelServersResponse) GetNextPageToken ¶
func (x *FetchModelServersResponse) GetNextPageToken() string
func (*FetchModelServersResponse) ProtoMessage ¶
func (*FetchModelServersResponse) ProtoMessage()
func (*FetchModelServersResponse) ProtoReflect ¶
func (x *FetchModelServersResponse) ProtoReflect() protoreflect.Message
func (*FetchModelServersResponse) Reset ¶
func (x *FetchModelServersResponse) Reset()
func (*FetchModelServersResponse) String ¶
func (x *FetchModelServersResponse) String() string
type FetchModelsRequest ¶
type FetchModelsRequest struct {
// Optional. The target number of results to return in a single response.
// If not specified, a default value will be chosen by the service.
// Note that the response may include a partial list and a caller should
// only rely on the response's
// [next_page_token][google.cloud.gkerecommender.v1.FetchModelsResponse.next_page_token]
// to determine if there are more instances left to be queried.
PageSize *int32 `protobuf:"varint,1,opt,name=page_size,json=pageSize,proto3,oneof" json:"page_size,omitempty"`
// Optional. The value of
// [next_page_token][google.cloud.gkerecommender.v1.FetchModelsResponse.next_page_token]
// received from a previous `FetchModelsRequest` call.
// Provide this to retrieve the subsequent page in a multi-page list of
// results. When paginating, all other parameters provided to
// `FetchModelsRequest` must match the call that provided the page token.
PageToken *string `protobuf:"bytes,2,opt,name=page_token,json=pageToken,proto3,oneof" json:"page_token,omitempty"`
// contains filtered or unexported fields
}
Request message for [GkeInferenceQuickstart.FetchModels][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModels].
func (*FetchModelsRequest) Descriptor
deprecated
func (*FetchModelsRequest) Descriptor() ([]byte, []int)
Deprecated: Use FetchModelsRequest.ProtoReflect.Descriptor instead.
func (*FetchModelsRequest) GetPageSize ¶
func (x *FetchModelsRequest) GetPageSize() int32
func (*FetchModelsRequest) GetPageToken ¶
func (x *FetchModelsRequest) GetPageToken() string
func (*FetchModelsRequest) ProtoMessage ¶
func (*FetchModelsRequest) ProtoMessage()
func (*FetchModelsRequest) ProtoReflect ¶
func (x *FetchModelsRequest) ProtoReflect() protoreflect.Message
func (*FetchModelsRequest) Reset ¶
func (x *FetchModelsRequest) Reset()
func (*FetchModelsRequest) String ¶
func (x *FetchModelsRequest) String() string
type FetchModelsResponse ¶
type FetchModelsResponse struct {
// Output only. List of available models. Open-source models follow the
// Huggingface Hub `owner/model_name` format.
Models []string `protobuf:"bytes,1,rep,name=models,proto3" json:"models,omitempty"`
// Output only. A token which may be sent as
// [page_token][FetchModelsResponse.page_token] in a subsequent
// `FetchModelsResponse` call to retrieve the next page of results.
// If this field is omitted or empty, then there are no more results to
// return.
NextPageToken string `protobuf:"bytes,2,opt,name=next_page_token,json=nextPageToken,proto3" json:"next_page_token,omitempty"`
// contains filtered or unexported fields
}
Response message for [GkeInferenceQuickstart.FetchModels][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModels].
func (*FetchModelsResponse) Descriptor
deprecated
func (*FetchModelsResponse) Descriptor() ([]byte, []int)
Deprecated: Use FetchModelsResponse.ProtoReflect.Descriptor instead.
func (*FetchModelsResponse) GetModels ¶
func (x *FetchModelsResponse) GetModels() []string
func (*FetchModelsResponse) GetNextPageToken ¶
func (x *FetchModelsResponse) GetNextPageToken() string
func (*FetchModelsResponse) ProtoMessage ¶
func (*FetchModelsResponse) ProtoMessage()
func (*FetchModelsResponse) ProtoReflect ¶
func (x *FetchModelsResponse) ProtoReflect() protoreflect.Message
func (*FetchModelsResponse) Reset ¶
func (x *FetchModelsResponse) Reset()
func (*FetchModelsResponse) String ¶
func (x *FetchModelsResponse) String() string
type FetchProfilesRequest ¶
type FetchProfilesRequest struct {
// Optional. The model to filter profiles by. Open-source models follow the
// Huggingface Hub `owner/model_name` format. If not provided, all models are
// returned. Use
// [GkeInferenceQuickstart.FetchModels][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModels]
// to find available models.
Model string `protobuf:"bytes,1,opt,name=model,proto3" json:"model,omitempty"`
// Optional. The model server to filter profiles by. If not provided, all
// model servers are returned. Use
// [GkeInferenceQuickstart.FetchModelServers][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServers]
// to find available model servers for a given model.
ModelServer string `protobuf:"bytes,2,opt,name=model_server,json=modelServer,proto3" json:"model_server,omitempty"`
// Optional. The model server version to filter profiles by. If not provided,
// all model server versions are returned. Use
// [GkeInferenceQuickstart.FetchModelServerVersions][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServerVersions]
// to find available versions for a given model and server.
ModelServerVersion string `protobuf:"bytes,3,opt,name=model_server_version,json=modelServerVersion,proto3" json:"model_server_version,omitempty"`
// Optional. The performance requirements to filter profiles. Profiles that do
// not meet these requirements are filtered out. If not provided, all profiles
// are returned.
PerformanceRequirements *PerformanceRequirements `` /* 130-byte string literal not displayed */
// Optional. The target number of results to return in a single response. If
// not specified, a default value will be chosen by the service. Note that the
// response may include a partial list and a caller should only rely on the
// response's
// [next_page_token][google.cloud.gkerecommender.v1.FetchProfilesResponse.next_page_token]
// to determine if there are more instances left to be queried.
PageSize *int32 `protobuf:"varint,5,opt,name=page_size,json=pageSize,proto3,oneof" json:"page_size,omitempty"`
// Optional. The value of
// [next_page_token][google.cloud.gkerecommender.v1.FetchProfilesResponse.next_page_token]
// received from a previous `FetchProfilesRequest` call.
// Provide this to retrieve the subsequent page in a multi-page list of
// results. When paginating, all other parameters provided to
// `FetchProfilesRequest` must match the call that provided the page
// token.
PageToken *string `protobuf:"bytes,6,opt,name=page_token,json=pageToken,proto3,oneof" json:"page_token,omitempty"`
// contains filtered or unexported fields
}
Request message for [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles].
func (*FetchProfilesRequest) Descriptor
deprecated
func (*FetchProfilesRequest) Descriptor() ([]byte, []int)
Deprecated: Use FetchProfilesRequest.ProtoReflect.Descriptor instead.
func (*FetchProfilesRequest) GetModel ¶
func (x *FetchProfilesRequest) GetModel() string
func (*FetchProfilesRequest) GetModelServer ¶
func (x *FetchProfilesRequest) GetModelServer() string
func (*FetchProfilesRequest) GetModelServerVersion ¶
func (x *FetchProfilesRequest) GetModelServerVersion() string
func (*FetchProfilesRequest) GetPageSize ¶
func (x *FetchProfilesRequest) GetPageSize() int32
func (*FetchProfilesRequest) GetPageToken ¶
func (x *FetchProfilesRequest) GetPageToken() string
func (*FetchProfilesRequest) GetPerformanceRequirements ¶
func (x *FetchProfilesRequest) GetPerformanceRequirements() *PerformanceRequirements
func (*FetchProfilesRequest) ProtoMessage ¶
func (*FetchProfilesRequest) ProtoMessage()
func (*FetchProfilesRequest) ProtoReflect ¶
func (x *FetchProfilesRequest) ProtoReflect() protoreflect.Message
func (*FetchProfilesRequest) Reset ¶
func (x *FetchProfilesRequest) Reset()
func (*FetchProfilesRequest) String ¶
func (x *FetchProfilesRequest) String() string
type FetchProfilesResponse ¶
type FetchProfilesResponse struct {
// Output only. List of profiles that match the given model server info and
// performance requirements (if provided).
Profile []*Profile `protobuf:"bytes,1,rep,name=profile,proto3" json:"profile,omitempty"`
// Output only. The combined range of performance values observed across all
// profiles in this response.
PerformanceRange *PerformanceRange `protobuf:"bytes,2,opt,name=performance_range,json=performanceRange,proto3" json:"performance_range,omitempty"`
// Output only. Additional comments related to the response.
Comments string `protobuf:"bytes,3,opt,name=comments,proto3" json:"comments,omitempty"`
// Output only. A token which may be sent as
// [page_token][FetchProfilesResponse.page_token] in a subsequent
// `FetchProfilesResponse` call to retrieve the next page of results. If this
// field is omitted or empty, then there are no more results to return.
NextPageToken string `protobuf:"bytes,4,opt,name=next_page_token,json=nextPageToken,proto3" json:"next_page_token,omitempty"`
// contains filtered or unexported fields
}
Response message for [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles].
func (*FetchProfilesResponse) Descriptor
deprecated
func (*FetchProfilesResponse) Descriptor() ([]byte, []int)
Deprecated: Use FetchProfilesResponse.ProtoReflect.Descriptor instead.
func (*FetchProfilesResponse) GetComments ¶
func (x *FetchProfilesResponse) GetComments() string
func (*FetchProfilesResponse) GetNextPageToken ¶
func (x *FetchProfilesResponse) GetNextPageToken() string
func (*FetchProfilesResponse) GetPerformanceRange ¶
func (x *FetchProfilesResponse) GetPerformanceRange() *PerformanceRange
func (*FetchProfilesResponse) GetProfile ¶
func (x *FetchProfilesResponse) GetProfile() []*Profile
func (*FetchProfilesResponse) ProtoMessage ¶
func (*FetchProfilesResponse) ProtoMessage()
func (*FetchProfilesResponse) ProtoReflect ¶
func (x *FetchProfilesResponse) ProtoReflect() protoreflect.Message
func (*FetchProfilesResponse) Reset ¶
func (x *FetchProfilesResponse) Reset()
func (*FetchProfilesResponse) String ¶
func (x *FetchProfilesResponse) String() string
type GenerateOptimizedManifestRequest ¶
type GenerateOptimizedManifestRequest struct {
// Required. The model server configuration to generate the manifest for. Use
// [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles]
// to find valid configurations.
ModelServerInfo *ModelServerInfo `protobuf:"bytes,1,opt,name=model_server_info,json=modelServerInfo,proto3" json:"model_server_info,omitempty"`
// Required. The accelerator type. Use
// [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles]
// to find valid accelerators for a given `model_server_info`.
AcceleratorType string `protobuf:"bytes,2,opt,name=accelerator_type,json=acceleratorType,proto3" json:"accelerator_type,omitempty"`
// Optional. The kubernetes namespace to deploy the manifests in.
KubernetesNamespace string `protobuf:"bytes,3,opt,name=kubernetes_namespace,json=kubernetesNamespace,proto3" json:"kubernetes_namespace,omitempty"`
// Optional. The performance requirements to use for generating Horizontal Pod
// Autoscaler (HPA) resources. If provided, the manifest includes HPA
// resources to adjust the model server replica count to maintain the
// specified targets (e.g., NTPOT, TTFT) at a P50 latency. Cost targets are
// not currently supported for HPA generation. If the specified targets are
// not achievable, the HPA manifest will not be generated.
PerformanceRequirements *PerformanceRequirements `` /* 130-byte string literal not displayed */
// Optional. The storage configuration for the model. If not provided, the
// model is loaded from Huggingface.
StorageConfig *StorageConfig `protobuf:"bytes,5,opt,name=storage_config,json=storageConfig,proto3" json:"storage_config,omitempty"`
// contains filtered or unexported fields
}
Request message for [GkeInferenceQuickstart.GenerateOptimizedManifest][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.GenerateOptimizedManifest].
func (*GenerateOptimizedManifestRequest) Descriptor
deprecated
func (*GenerateOptimizedManifestRequest) Descriptor() ([]byte, []int)
Deprecated: Use GenerateOptimizedManifestRequest.ProtoReflect.Descriptor instead.
func (*GenerateOptimizedManifestRequest) GetAcceleratorType ¶
func (x *GenerateOptimizedManifestRequest) GetAcceleratorType() string
func (*GenerateOptimizedManifestRequest) GetKubernetesNamespace ¶
func (x *GenerateOptimizedManifestRequest) GetKubernetesNamespace() string
func (*GenerateOptimizedManifestRequest) GetModelServerInfo ¶
func (x *GenerateOptimizedManifestRequest) GetModelServerInfo() *ModelServerInfo
func (*GenerateOptimizedManifestRequest) GetPerformanceRequirements ¶
func (x *GenerateOptimizedManifestRequest) GetPerformanceRequirements() *PerformanceRequirements
func (*GenerateOptimizedManifestRequest) GetStorageConfig ¶
func (x *GenerateOptimizedManifestRequest) GetStorageConfig() *StorageConfig
func (*GenerateOptimizedManifestRequest) ProtoMessage ¶
func (*GenerateOptimizedManifestRequest) ProtoMessage()
func (*GenerateOptimizedManifestRequest) ProtoReflect ¶
func (x *GenerateOptimizedManifestRequest) ProtoReflect() protoreflect.Message
func (*GenerateOptimizedManifestRequest) Reset ¶
func (x *GenerateOptimizedManifestRequest) Reset()
func (*GenerateOptimizedManifestRequest) String ¶
func (x *GenerateOptimizedManifestRequest) String() string
type GenerateOptimizedManifestResponse ¶
type GenerateOptimizedManifestResponse struct {
// Output only. A list of generated Kubernetes manifests.
KubernetesManifests []*KubernetesManifest `protobuf:"bytes,1,rep,name=kubernetes_manifests,json=kubernetesManifests,proto3" json:"kubernetes_manifests,omitempty"`
// Output only. Comments related to deploying the generated manifests.
Comments []string `protobuf:"bytes,2,rep,name=comments,proto3" json:"comments,omitempty"`
// Output only. Additional information about the versioned dependencies used
// to generate the manifests. See [Run best practice inference with GKE
// Inference Quickstart
// recipes](https://cloud.google.com/kubernetes-engine/docs/how-to/machine-learning/inference/inference-quickstart)
// for details.
ManifestVersion string `protobuf:"bytes,3,opt,name=manifest_version,json=manifestVersion,proto3" json:"manifest_version,omitempty"`
// contains filtered or unexported fields
}
Response message for [GkeInferenceQuickstart.GenerateOptimizedManifest][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.GenerateOptimizedManifest].
func (*GenerateOptimizedManifestResponse) Descriptor
deprecated
func (*GenerateOptimizedManifestResponse) Descriptor() ([]byte, []int)
Deprecated: Use GenerateOptimizedManifestResponse.ProtoReflect.Descriptor instead.
func (*GenerateOptimizedManifestResponse) GetComments ¶
func (x *GenerateOptimizedManifestResponse) GetComments() []string
func (*GenerateOptimizedManifestResponse) GetKubernetesManifests ¶
func (x *GenerateOptimizedManifestResponse) GetKubernetesManifests() []*KubernetesManifest
func (*GenerateOptimizedManifestResponse) GetManifestVersion ¶
func (x *GenerateOptimizedManifestResponse) GetManifestVersion() string
func (*GenerateOptimizedManifestResponse) ProtoMessage ¶
func (*GenerateOptimizedManifestResponse) ProtoMessage()
func (*GenerateOptimizedManifestResponse) ProtoReflect ¶
func (x *GenerateOptimizedManifestResponse) ProtoReflect() protoreflect.Message
func (*GenerateOptimizedManifestResponse) Reset ¶
func (x *GenerateOptimizedManifestResponse) Reset()
func (*GenerateOptimizedManifestResponse) String ¶
func (x *GenerateOptimizedManifestResponse) String() string
type GkeInferenceQuickstartClient ¶
type GkeInferenceQuickstartClient interface {
// Fetches available models. Open-source models follow the Huggingface Hub
// `owner/model_name` format.
FetchModels(ctx context.Context, in *FetchModelsRequest, opts ...grpc.CallOption) (*FetchModelsResponse, error)
// Fetches available model servers. Open-source model servers use simplified,
// lowercase names (e.g., `vllm`).
FetchModelServers(ctx context.Context, in *FetchModelServersRequest, opts ...grpc.CallOption) (*FetchModelServersResponse, error)
// Fetches available model server versions. Open-source servers use their own
// versioning schemas (e.g., `vllm` uses semver like `v1.0.0`).
//
// Some model servers have different versioning schemas depending on the
// accelerator. For example, `vllm` uses semver on GPUs, but returns nightly
// build tags on TPUs. All available versions will be returned when different
// schemas are present.
FetchModelServerVersions(ctx context.Context, in *FetchModelServerVersionsRequest, opts ...grpc.CallOption) (*FetchModelServerVersionsResponse, error)
// Fetches available profiles. A profile contains performance metrics and
// cost information for a specific model server setup. Profiles can be
// filtered by parameters. If no filters are provided, all profiles are
// returned.
//
// Profiles display a single value per performance metric based on the
// provided performance requirements. If no requirements are given, the
// metrics represent the inflection point. See [Run best practice inference
// with GKE Inference Quickstart
// recipes](https://cloud.google.com/kubernetes-engine/docs/how-to/machine-learning/inference/inference-quickstart#how)
// for details.
FetchProfiles(ctx context.Context, in *FetchProfilesRequest, opts ...grpc.CallOption) (*FetchProfilesResponse, error)
// Generates an optimized deployment manifest for a given model and model
// server, based on the specified accelerator, performance targets, and
// configurations. See [Run best practice inference with GKE Inference
// Quickstart
// recipes](https://cloud.google.com/kubernetes-engine/docs/how-to/machine-learning/inference/inference-quickstart)
// for deployment details.
GenerateOptimizedManifest(ctx context.Context, in *GenerateOptimizedManifestRequest, opts ...grpc.CallOption) (*GenerateOptimizedManifestResponse, error)
// Fetches all of the benchmarking data available for a profile. Benchmarking
// data returns all of the performance metrics available for a given model
// server setup on a given instance type.
FetchBenchmarkingData(ctx context.Context, in *FetchBenchmarkingDataRequest, opts ...grpc.CallOption) (*FetchBenchmarkingDataResponse, error)
}
GkeInferenceQuickstartClient is the client API for GkeInferenceQuickstart service.
For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
func NewGkeInferenceQuickstartClient ¶
func NewGkeInferenceQuickstartClient(cc grpc.ClientConnInterface) GkeInferenceQuickstartClient
type GkeInferenceQuickstartServer ¶
type GkeInferenceQuickstartServer interface {
// Fetches available models. Open-source models follow the Huggingface Hub
// `owner/model_name` format.
FetchModels(context.Context, *FetchModelsRequest) (*FetchModelsResponse, error)
// Fetches available model servers. Open-source model servers use simplified,
// lowercase names (e.g., `vllm`).
FetchModelServers(context.Context, *FetchModelServersRequest) (*FetchModelServersResponse, error)
// Fetches available model server versions. Open-source servers use their own
// versioning schemas (e.g., `vllm` uses semver like `v1.0.0`).
//
// Some model servers have different versioning schemas depending on the
// accelerator. For example, `vllm` uses semver on GPUs, but returns nightly
// build tags on TPUs. All available versions will be returned when different
// schemas are present.
FetchModelServerVersions(context.Context, *FetchModelServerVersionsRequest) (*FetchModelServerVersionsResponse, error)
// Fetches available profiles. A profile contains performance metrics and
// cost information for a specific model server setup. Profiles can be
// filtered by parameters. If no filters are provided, all profiles are
// returned.
//
// Profiles display a single value per performance metric based on the
// provided performance requirements. If no requirements are given, the
// metrics represent the inflection point. See [Run best practice inference
// with GKE Inference Quickstart
// recipes](https://cloud.google.com/kubernetes-engine/docs/how-to/machine-learning/inference/inference-quickstart#how)
// for details.
FetchProfiles(context.Context, *FetchProfilesRequest) (*FetchProfilesResponse, error)
// Generates an optimized deployment manifest for a given model and model
// server, based on the specified accelerator, performance targets, and
// configurations. See [Run best practice inference with GKE Inference
// Quickstart
// recipes](https://cloud.google.com/kubernetes-engine/docs/how-to/machine-learning/inference/inference-quickstart)
// for deployment details.
GenerateOptimizedManifest(context.Context, *GenerateOptimizedManifestRequest) (*GenerateOptimizedManifestResponse, error)
// Fetches all of the benchmarking data available for a profile. Benchmarking
// data returns all of the performance metrics available for a given model
// server setup on a given instance type.
FetchBenchmarkingData(context.Context, *FetchBenchmarkingDataRequest) (*FetchBenchmarkingDataResponse, error)
}
GkeInferenceQuickstartServer is the server API for GkeInferenceQuickstart service. All implementations should embed UnimplementedGkeInferenceQuickstartServer for forward compatibility
type KubernetesManifest ¶
type KubernetesManifest struct {
// Output only. Kubernetes resource kind.
Kind string `protobuf:"bytes,1,opt,name=kind,proto3" json:"kind,omitempty"`
// Output only. Kubernetes API version.
ApiVersion string `protobuf:"bytes,2,opt,name=api_version,json=apiVersion,proto3" json:"api_version,omitempty"`
// Output only. YAML content.
Content string `protobuf:"bytes,3,opt,name=content,proto3" json:"content,omitempty"`
// contains filtered or unexported fields
}
A Kubernetes manifest.
func (*KubernetesManifest) Descriptor
deprecated
func (*KubernetesManifest) Descriptor() ([]byte, []int)
Deprecated: Use KubernetesManifest.ProtoReflect.Descriptor instead.
func (*KubernetesManifest) GetApiVersion ¶
func (x *KubernetesManifest) GetApiVersion() string
func (*KubernetesManifest) GetContent ¶
func (x *KubernetesManifest) GetContent() string
func (*KubernetesManifest) GetKind ¶
func (x *KubernetesManifest) GetKind() string
func (*KubernetesManifest) ProtoMessage ¶
func (*KubernetesManifest) ProtoMessage()
func (*KubernetesManifest) ProtoReflect ¶
func (x *KubernetesManifest) ProtoReflect() protoreflect.Message
func (*KubernetesManifest) Reset ¶
func (x *KubernetesManifest) Reset()
func (*KubernetesManifest) String ¶
func (x *KubernetesManifest) String() string
type MillisecondRange ¶
type MillisecondRange struct {
// Output only. The minimum value of the range.
Min int32 `protobuf:"varint,1,opt,name=min,proto3" json:"min,omitempty"`
// Output only. The maximum value of the range.
Max int32 `protobuf:"varint,2,opt,name=max,proto3" json:"max,omitempty"`
// contains filtered or unexported fields
}
Represents a range of latency values in milliseconds.
func (*MillisecondRange) Descriptor
deprecated
func (*MillisecondRange) Descriptor() ([]byte, []int)
Deprecated: Use MillisecondRange.ProtoReflect.Descriptor instead.
func (*MillisecondRange) GetMax ¶
func (x *MillisecondRange) GetMax() int32
func (*MillisecondRange) GetMin ¶
func (x *MillisecondRange) GetMin() int32
func (*MillisecondRange) ProtoMessage ¶
func (*MillisecondRange) ProtoMessage()
func (*MillisecondRange) ProtoReflect ¶
func (x *MillisecondRange) ProtoReflect() protoreflect.Message
func (*MillisecondRange) Reset ¶
func (x *MillisecondRange) Reset()
func (*MillisecondRange) String ¶
func (x *MillisecondRange) String() string
type ModelServerInfo ¶
type ModelServerInfo struct {
// Required. The model. Open-source models follow the Huggingface Hub
// `owner/model_name` format. Use
// [GkeInferenceQuickstart.FetchModels][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModels]
// to find available models.
Model string `protobuf:"bytes,1,opt,name=model,proto3" json:"model,omitempty"`
// Required. The model server. Open-source model servers use simplified,
// lowercase names (e.g., `vllm`). Use
// [GkeInferenceQuickstart.FetchModelServers][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServers]
// to find available servers.
ModelServer string `protobuf:"bytes,2,opt,name=model_server,json=modelServer,proto3" json:"model_server,omitempty"`
// Optional. The model server version. Use
// [GkeInferenceQuickstart.FetchModelServerVersions][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServerVersions]
// to find available versions. If not provided, the latest available version
// is used.
ModelServerVersion string `protobuf:"bytes,3,opt,name=model_server_version,json=modelServerVersion,proto3" json:"model_server_version,omitempty"`
// contains filtered or unexported fields
}
Model server information gives. Valid model server info combinations can be found using [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles].
func (*ModelServerInfo) Descriptor
deprecated
func (*ModelServerInfo) Descriptor() ([]byte, []int)
Deprecated: Use ModelServerInfo.ProtoReflect.Descriptor instead.
func (*ModelServerInfo) GetModel ¶
func (x *ModelServerInfo) GetModel() string
func (*ModelServerInfo) GetModelServer ¶
func (x *ModelServerInfo) GetModelServer() string
func (*ModelServerInfo) GetModelServerVersion ¶
func (x *ModelServerInfo) GetModelServerVersion() string
func (*ModelServerInfo) ProtoMessage ¶
func (*ModelServerInfo) ProtoMessage()
func (*ModelServerInfo) ProtoReflect ¶
func (x *ModelServerInfo) ProtoReflect() protoreflect.Message
func (*ModelServerInfo) Reset ¶
func (x *ModelServerInfo) Reset()
func (*ModelServerInfo) String ¶
func (x *ModelServerInfo) String() string
type PerformanceRange ¶
type PerformanceRange struct {
// Output only. The range of throughput in output tokens per second. This is
// measured as total_output_tokens_generated_by_server /
// elapsed_time_in_seconds.
ThroughputOutputRange *TokensPerSecondRange `` /* 126-byte string literal not displayed */
// Output only. The range of TTFT (Time To First Token) in milliseconds. TTFT
// is the time it takes to generate the first token for a request.
TtftRange *MillisecondRange `protobuf:"bytes,2,opt,name=ttft_range,json=ttftRange,proto3" json:"ttft_range,omitempty"`
// Output only. The range of NTPOT (Normalized Time Per Output Token) in
// milliseconds. NTPOT is the request latency normalized by the number of
// output tokens, measured as request_latency / total_output_tokens.
NtpotRange *MillisecondRange `protobuf:"bytes,3,opt,name=ntpot_range,json=ntpotRange,proto3" json:"ntpot_range,omitempty"`
// contains filtered or unexported fields
}
Performance range for a model deployment.
func (*PerformanceRange) Descriptor
deprecated
func (*PerformanceRange) Descriptor() ([]byte, []int)
Deprecated: Use PerformanceRange.ProtoReflect.Descriptor instead.
func (*PerformanceRange) GetNtpotRange ¶
func (x *PerformanceRange) GetNtpotRange() *MillisecondRange
func (*PerformanceRange) GetThroughputOutputRange ¶
func (x *PerformanceRange) GetThroughputOutputRange() *TokensPerSecondRange
func (*PerformanceRange) GetTtftRange ¶
func (x *PerformanceRange) GetTtftRange() *MillisecondRange
func (*PerformanceRange) ProtoMessage ¶
func (*PerformanceRange) ProtoMessage()
func (*PerformanceRange) ProtoReflect ¶
func (x *PerformanceRange) ProtoReflect() protoreflect.Message
func (*PerformanceRange) Reset ¶
func (x *PerformanceRange) Reset()
func (*PerformanceRange) String ¶
func (x *PerformanceRange) String() string
type PerformanceRequirements ¶
type PerformanceRequirements struct {
// Optional. The target Normalized Time Per Output Token (NTPOT) in
// milliseconds. NTPOT is calculated as `request_latency /
// total_output_tokens`. If not provided, this target will not be enforced.
TargetNtpotMilliseconds *int32 `` /* 139-byte string literal not displayed */
// Optional. The target Time To First Token (TTFT) in milliseconds. TTFT is
// the time it takes to generate the first token for a request. If not
// provided, this target will not be enforced.
TargetTtftMilliseconds *int32 `` /* 136-byte string literal not displayed */
// Optional. The target cost for running a profile's model server. If not
// provided, this requirement will not be enforced.
TargetCost *Cost `protobuf:"bytes,3,opt,name=target_cost,json=targetCost,proto3" json:"target_cost,omitempty"`
// contains filtered or unexported fields
}
Performance requirements for a profile and or model deployment.
func (*PerformanceRequirements) Descriptor
deprecated
func (*PerformanceRequirements) Descriptor() ([]byte, []int)
Deprecated: Use PerformanceRequirements.ProtoReflect.Descriptor instead.
func (*PerformanceRequirements) GetTargetCost ¶
func (x *PerformanceRequirements) GetTargetCost() *Cost
func (*PerformanceRequirements) GetTargetNtpotMilliseconds ¶
func (x *PerformanceRequirements) GetTargetNtpotMilliseconds() int32
func (*PerformanceRequirements) GetTargetTtftMilliseconds ¶
func (x *PerformanceRequirements) GetTargetTtftMilliseconds() int32
func (*PerformanceRequirements) ProtoMessage ¶
func (*PerformanceRequirements) ProtoMessage()
func (*PerformanceRequirements) ProtoReflect ¶
func (x *PerformanceRequirements) ProtoReflect() protoreflect.Message
func (*PerformanceRequirements) Reset ¶
func (x *PerformanceRequirements) Reset()
func (*PerformanceRequirements) String ¶
func (x *PerformanceRequirements) String() string
type PerformanceStats ¶
type PerformanceStats struct {
// Output only. The number of queries per second.
// Note: This metric can vary widely based on context length and may not be a
// reliable measure of LLM throughput.
QueriesPerSecond float32 `protobuf:"fixed32,1,opt,name=queries_per_second,json=queriesPerSecond,proto3" json:"queries_per_second,omitempty"`
// Output only. The number of output tokens per second. This is the throughput
// measured as total_output_tokens_generated_by_server /
// elapsed_time_in_seconds.
OutputTokensPerSecond int32 `` /* 129-byte string literal not displayed */
// Output only. The Normalized Time Per Output Token (NTPOT) in milliseconds.
// This is the request latency normalized by the number of output tokens,
// measured as request_latency / total_output_tokens.
NtpotMilliseconds int32 `protobuf:"varint,3,opt,name=ntpot_milliseconds,json=ntpotMilliseconds,proto3" json:"ntpot_milliseconds,omitempty"`
// Output only. The Time To First Token (TTFT) in milliseconds. This is the
// time it takes to generate the first token for a request.
TtftMilliseconds int32 `protobuf:"varint,4,opt,name=ttft_milliseconds,json=ttftMilliseconds,proto3" json:"ttft_milliseconds,omitempty"`
// Output only. The cost of running the model deployment.
Cost []*Cost `protobuf:"bytes,5,rep,name=cost,proto3" json:"cost,omitempty"`
// contains filtered or unexported fields
}
Performance statistics for a model deployment.
func (*PerformanceStats) Descriptor
deprecated
func (*PerformanceStats) Descriptor() ([]byte, []int)
Deprecated: Use PerformanceStats.ProtoReflect.Descriptor instead.
func (*PerformanceStats) GetCost ¶
func (x *PerformanceStats) GetCost() []*Cost
func (*PerformanceStats) GetNtpotMilliseconds ¶
func (x *PerformanceStats) GetNtpotMilliseconds() int32
func (*PerformanceStats) GetOutputTokensPerSecond ¶
func (x *PerformanceStats) GetOutputTokensPerSecond() int32
func (*PerformanceStats) GetQueriesPerSecond ¶
func (x *PerformanceStats) GetQueriesPerSecond() float32
func (*PerformanceStats) GetTtftMilliseconds ¶
func (x *PerformanceStats) GetTtftMilliseconds() int32
func (*PerformanceStats) ProtoMessage ¶
func (*PerformanceStats) ProtoMessage()
func (*PerformanceStats) ProtoReflect ¶
func (x *PerformanceStats) ProtoReflect() protoreflect.Message
func (*PerformanceStats) Reset ¶
func (x *PerformanceStats) Reset()
func (*PerformanceStats) String ¶
func (x *PerformanceStats) String() string
type Profile ¶
type Profile struct {
// Output only. The model server configuration. Use
// [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles]
// to find valid configurations.
ModelServerInfo *ModelServerInfo `protobuf:"bytes,1,opt,name=model_server_info,json=modelServerInfo,proto3" json:"model_server_info,omitempty"`
// Output only. The accelerator type. Expected format: `nvidia-h100-80gb`.
AcceleratorType string `protobuf:"bytes,2,opt,name=accelerator_type,json=acceleratorType,proto3" json:"accelerator_type,omitempty"`
// Output only. The TPU topology (if applicable).
TpuTopology string `protobuf:"bytes,3,opt,name=tpu_topology,json=tpuTopology,proto3" json:"tpu_topology,omitempty"`
// Output only. The instance type. Expected format: `a2-highgpu-1g`.
InstanceType string `protobuf:"bytes,4,opt,name=instance_type,json=instanceType,proto3" json:"instance_type,omitempty"`
// Output only. The resources used by the model deployment.
ResourcesUsed *ResourcesUsed `protobuf:"bytes,5,opt,name=resources_used,json=resourcesUsed,proto3" json:"resources_used,omitempty"`
// Output only. The performance statistics for this profile.
PerformanceStats []*PerformanceStats `protobuf:"bytes,6,rep,name=performance_stats,json=performanceStats,proto3" json:"performance_stats,omitempty"`
// contains filtered or unexported fields
}
A profile containing information about a model deployment.
func (*Profile) Descriptor
deprecated
func (*Profile) GetAcceleratorType ¶
func (*Profile) GetInstanceType ¶
func (*Profile) GetModelServerInfo ¶
func (x *Profile) GetModelServerInfo() *ModelServerInfo
func (*Profile) GetPerformanceStats ¶
func (x *Profile) GetPerformanceStats() []*PerformanceStats
func (*Profile) GetResourcesUsed ¶
func (x *Profile) GetResourcesUsed() *ResourcesUsed
func (*Profile) GetTpuTopology ¶
func (*Profile) ProtoMessage ¶
func (*Profile) ProtoMessage()
func (*Profile) ProtoReflect ¶
func (x *Profile) ProtoReflect() protoreflect.Message
type ResourcesUsed ¶
type ResourcesUsed struct {
// Output only. The number of accelerators (e.g., GPUs or TPUs) used by the
// model deployment on the Kubernetes node.
AcceleratorCount int32 `protobuf:"varint,1,opt,name=accelerator_count,json=acceleratorCount,proto3" json:"accelerator_count,omitempty"`
// contains filtered or unexported fields
}
Resources used by a model deployment.
func (*ResourcesUsed) Descriptor
deprecated
func (*ResourcesUsed) Descriptor() ([]byte, []int)
Deprecated: Use ResourcesUsed.ProtoReflect.Descriptor instead.
func (*ResourcesUsed) GetAcceleratorCount ¶
func (x *ResourcesUsed) GetAcceleratorCount() int32
func (*ResourcesUsed) ProtoMessage ¶
func (*ResourcesUsed) ProtoMessage()
func (*ResourcesUsed) ProtoReflect ¶
func (x *ResourcesUsed) ProtoReflect() protoreflect.Message
func (*ResourcesUsed) Reset ¶
func (x *ResourcesUsed) Reset()
func (*ResourcesUsed) String ¶
func (x *ResourcesUsed) String() string
type StorageConfig ¶
type StorageConfig struct {
// Optional. The Google Cloud Storage bucket URI to load the model from. This
// URI must point to the directory containing the model's config file
// (`config.json`) and model weights. A tuned GCSFuse setup can improve
// LLM Pod startup time by more than 7x. Expected format:
// `gs://<bucket-name>/<path-to-model>`.
ModelBucketUri string `protobuf:"bytes,1,opt,name=model_bucket_uri,json=modelBucketUri,proto3" json:"model_bucket_uri,omitempty"`
// Optional. The URI for the GCS bucket containing the XLA compilation cache.
// If using TPUs, the XLA cache will be written to the same path as
// `model_bucket_uri`. This can speed up vLLM model preparation for repeated
// deployments.
XlaCacheBucketUri string `protobuf:"bytes,2,opt,name=xla_cache_bucket_uri,json=xlaCacheBucketUri,proto3" json:"xla_cache_bucket_uri,omitempty"`
// contains filtered or unexported fields
}
Storage configuration for a model deployment.
func (*StorageConfig) Descriptor
deprecated
func (*StorageConfig) Descriptor() ([]byte, []int)
Deprecated: Use StorageConfig.ProtoReflect.Descriptor instead.
func (*StorageConfig) GetModelBucketUri ¶
func (x *StorageConfig) GetModelBucketUri() string
func (*StorageConfig) GetXlaCacheBucketUri ¶
func (x *StorageConfig) GetXlaCacheBucketUri() string
func (*StorageConfig) ProtoMessage ¶
func (*StorageConfig) ProtoMessage()
func (*StorageConfig) ProtoReflect ¶
func (x *StorageConfig) ProtoReflect() protoreflect.Message
func (*StorageConfig) Reset ¶
func (x *StorageConfig) Reset()
func (*StorageConfig) String ¶
func (x *StorageConfig) String() string
type TokensPerSecondRange ¶
type TokensPerSecondRange struct {
// Output only. The minimum value of the range.
Min int32 `protobuf:"varint,1,opt,name=min,proto3" json:"min,omitempty"`
// Output only. The maximum value of the range.
Max int32 `protobuf:"varint,2,opt,name=max,proto3" json:"max,omitempty"`
// contains filtered or unexported fields
}
Represents a range of throughput values in tokens per second.
func (*TokensPerSecondRange) Descriptor
deprecated
func (*TokensPerSecondRange) Descriptor() ([]byte, []int)
Deprecated: Use TokensPerSecondRange.ProtoReflect.Descriptor instead.
func (*TokensPerSecondRange) GetMax ¶
func (x *TokensPerSecondRange) GetMax() int32
func (*TokensPerSecondRange) GetMin ¶
func (x *TokensPerSecondRange) GetMin() int32
func (*TokensPerSecondRange) ProtoMessage ¶
func (*TokensPerSecondRange) ProtoMessage()
func (*TokensPerSecondRange) ProtoReflect ¶
func (x *TokensPerSecondRange) ProtoReflect() protoreflect.Message
func (*TokensPerSecondRange) Reset ¶
func (x *TokensPerSecondRange) Reset()
func (*TokensPerSecondRange) String ¶
func (x *TokensPerSecondRange) String() string
type UnimplementedGkeInferenceQuickstartServer ¶
type UnimplementedGkeInferenceQuickstartServer struct {
}
UnimplementedGkeInferenceQuickstartServer should be embedded to have forward compatible implementations.
func (UnimplementedGkeInferenceQuickstartServer) FetchBenchmarkingData ¶
func (UnimplementedGkeInferenceQuickstartServer) FetchBenchmarkingData(context.Context, *FetchBenchmarkingDataRequest) (*FetchBenchmarkingDataResponse, error)
func (UnimplementedGkeInferenceQuickstartServer) FetchModelServerVersions ¶
func (UnimplementedGkeInferenceQuickstartServer) FetchModelServerVersions(context.Context, *FetchModelServerVersionsRequest) (*FetchModelServerVersionsResponse, error)
func (UnimplementedGkeInferenceQuickstartServer) FetchModelServers ¶
func (UnimplementedGkeInferenceQuickstartServer) FetchModelServers(context.Context, *FetchModelServersRequest) (*FetchModelServersResponse, error)
func (UnimplementedGkeInferenceQuickstartServer) FetchModels ¶
func (UnimplementedGkeInferenceQuickstartServer) FetchModels(context.Context, *FetchModelsRequest) (*FetchModelsResponse, error)
func (UnimplementedGkeInferenceQuickstartServer) FetchProfiles ¶
func (UnimplementedGkeInferenceQuickstartServer) FetchProfiles(context.Context, *FetchProfilesRequest) (*FetchProfilesResponse, error)
func (UnimplementedGkeInferenceQuickstartServer) GenerateOptimizedManifest ¶
func (UnimplementedGkeInferenceQuickstartServer) GenerateOptimizedManifest(context.Context, *GenerateOptimizedManifestRequest) (*GenerateOptimizedManifestResponse, error)
type UnsafeGkeInferenceQuickstartServer ¶
type UnsafeGkeInferenceQuickstartServer interface {
// contains filtered or unexported methods
}
UnsafeGkeInferenceQuickstartServer may be embedded to opt out of forward compatibility for this service. Use of this interface is not recommended, as added methods to GkeInferenceQuickstartServer will result in compilation errors.