gkerecommenderpb

package

v0.1.0 Latest Latest Go to latest Published: Nov 10, 2025 License: Apache-2.0 Imports: 9 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/googleapis/google-cloud-go

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
func RegisterGkeInferenceQuickstartServer(s grpc.ServiceRegistrar, srv GkeInferenceQuickstartServer)
type Amount
- func (*Amount) Descriptor() ([]byte, []int)deprecated
- func (x *Amount) GetNanos() int32
- func (x *Amount) GetUnits() int64
- func (*Amount) ProtoMessage()
- func (x *Amount) ProtoReflect() protoreflect.Message
- func (x *Amount) Reset()
- func (x *Amount) String() string
type Cost
- func (*Cost) Descriptor() ([]byte, []int)deprecated
- func (x *Cost) GetCostPerMillionInputTokens() *Amount
- func (x *Cost) GetCostPerMillionOutputTokens() *Amount
- func (x *Cost) GetOutputInputCostRatio() float32
- func (x *Cost) GetPricingModel() string
- func (*Cost) ProtoMessage()
- func (x *Cost) ProtoReflect() protoreflect.Message
- func (x *Cost) Reset()
- func (x *Cost) String() string
type FetchBenchmarkingDataRequest
- func (*FetchBenchmarkingDataRequest) Descriptor() ([]byte, []int)deprecated
- func (x *FetchBenchmarkingDataRequest) GetInstanceType() string
- func (x *FetchBenchmarkingDataRequest) GetModelServerInfo() *ModelServerInfo
- func (x *FetchBenchmarkingDataRequest) GetPricingModel() string
- func (*FetchBenchmarkingDataRequest) ProtoMessage()
- func (x *FetchBenchmarkingDataRequest) ProtoReflect() protoreflect.Message
- func (x *FetchBenchmarkingDataRequest) Reset()
- func (x *FetchBenchmarkingDataRequest) String() string
type FetchBenchmarkingDataResponse
- func (*FetchBenchmarkingDataResponse) Descriptor() ([]byte, []int)deprecated
- func (x *FetchBenchmarkingDataResponse) GetProfile() []*Profile
- func (*FetchBenchmarkingDataResponse) ProtoMessage()
- func (x *FetchBenchmarkingDataResponse) ProtoReflect() protoreflect.Message
- func (x *FetchBenchmarkingDataResponse) Reset()
- func (x *FetchBenchmarkingDataResponse) String() string
type FetchModelServerVersionsRequest
- func (*FetchModelServerVersionsRequest) Descriptor() ([]byte, []int)deprecated
- func (x *FetchModelServerVersionsRequest) GetModel() string
- func (x *FetchModelServerVersionsRequest) GetModelServer() string
- func (x *FetchModelServerVersionsRequest) GetPageSize() int32
- func (x *FetchModelServerVersionsRequest) GetPageToken() string
- func (*FetchModelServerVersionsRequest) ProtoMessage()
- func (x *FetchModelServerVersionsRequest) ProtoReflect() protoreflect.Message
- func (x *FetchModelServerVersionsRequest) Reset()
- func (x *FetchModelServerVersionsRequest) String() string
type FetchModelServerVersionsResponse
- func (*FetchModelServerVersionsResponse) Descriptor() ([]byte, []int)deprecated
- func (x *FetchModelServerVersionsResponse) GetModelServerVersions() []string
- func (x *FetchModelServerVersionsResponse) GetNextPageToken() string
- func (*FetchModelServerVersionsResponse) ProtoMessage()
- func (x *FetchModelServerVersionsResponse) ProtoReflect() protoreflect.Message
- func (x *FetchModelServerVersionsResponse) Reset()
- func (x *FetchModelServerVersionsResponse) String() string
type FetchModelServersRequest
- func (*FetchModelServersRequest) Descriptor() ([]byte, []int)deprecated
- func (x *FetchModelServersRequest) GetModel() string
- func (x *FetchModelServersRequest) GetPageSize() int32
- func (x *FetchModelServersRequest) GetPageToken() string
- func (*FetchModelServersRequest) ProtoMessage()
- func (x *FetchModelServersRequest) ProtoReflect() protoreflect.Message
- func (x *FetchModelServersRequest) Reset()
- func (x *FetchModelServersRequest) String() string
type FetchModelServersResponse
- func (*FetchModelServersResponse) Descriptor() ([]byte, []int)deprecated
- func (x *FetchModelServersResponse) GetModelServers() []string
- func (x *FetchModelServersResponse) GetNextPageToken() string
- func (*FetchModelServersResponse) ProtoMessage()
- func (x *FetchModelServersResponse) ProtoReflect() protoreflect.Message
- func (x *FetchModelServersResponse) Reset()
- func (x *FetchModelServersResponse) String() string
type FetchModelsRequest
- func (*FetchModelsRequest) Descriptor() ([]byte, []int)deprecated
- func (x *FetchModelsRequest) GetPageSize() int32
- func (x *FetchModelsRequest) GetPageToken() string
- func (*FetchModelsRequest) ProtoMessage()
- func (x *FetchModelsRequest) ProtoReflect() protoreflect.Message
- func (x *FetchModelsRequest) Reset()
- func (x *FetchModelsRequest) String() string
type FetchModelsResponse
- func (*FetchModelsResponse) Descriptor() ([]byte, []int)deprecated
- func (x *FetchModelsResponse) GetModels() []string
- func (x *FetchModelsResponse) GetNextPageToken() string
- func (*FetchModelsResponse) ProtoMessage()
- func (x *FetchModelsResponse) ProtoReflect() protoreflect.Message
- func (x *FetchModelsResponse) Reset()
- func (x *FetchModelsResponse) String() string
type FetchProfilesRequest
- func (*FetchProfilesRequest) Descriptor() ([]byte, []int)deprecated
- func (x *FetchProfilesRequest) GetModel() string
- func (x *FetchProfilesRequest) GetModelServer() string
- func (x *FetchProfilesRequest) GetModelServerVersion() string
- func (x *FetchProfilesRequest) GetPageSize() int32
- func (x *FetchProfilesRequest) GetPageToken() string
- func (x *FetchProfilesRequest) GetPerformanceRequirements() *PerformanceRequirements
- func (*FetchProfilesRequest) ProtoMessage()
- func (x *FetchProfilesRequest) ProtoReflect() protoreflect.Message
- func (x *FetchProfilesRequest) Reset()
- func (x *FetchProfilesRequest) String() string
type FetchProfilesResponse
- func (*FetchProfilesResponse) Descriptor() ([]byte, []int)deprecated
- func (x *FetchProfilesResponse) GetComments() string
- func (x *FetchProfilesResponse) GetNextPageToken() string
- func (x *FetchProfilesResponse) GetPerformanceRange() *PerformanceRange
- func (x *FetchProfilesResponse) GetProfile() []*Profile
- func (*FetchProfilesResponse) ProtoMessage()
- func (x *FetchProfilesResponse) ProtoReflect() protoreflect.Message
- func (x *FetchProfilesResponse) Reset()
- func (x *FetchProfilesResponse) String() string
type GenerateOptimizedManifestRequest
- func (*GenerateOptimizedManifestRequest) Descriptor() ([]byte, []int)deprecated
- func (x *GenerateOptimizedManifestRequest) GetAcceleratorType() string
- func (x *GenerateOptimizedManifestRequest) GetKubernetesNamespace() string
- func (x *GenerateOptimizedManifestRequest) GetModelServerInfo() *ModelServerInfo
- func (x *GenerateOptimizedManifestRequest) GetPerformanceRequirements() *PerformanceRequirements
- func (x *GenerateOptimizedManifestRequest) GetStorageConfig() *StorageConfig
- func (*GenerateOptimizedManifestRequest) ProtoMessage()
- func (x *GenerateOptimizedManifestRequest) ProtoReflect() protoreflect.Message
- func (x *GenerateOptimizedManifestRequest) Reset()
- func (x *GenerateOptimizedManifestRequest) String() string
type GenerateOptimizedManifestResponse
- func (*GenerateOptimizedManifestResponse) Descriptor() ([]byte, []int)deprecated
- func (x *GenerateOptimizedManifestResponse) GetComments() []string
- func (x *GenerateOptimizedManifestResponse) GetKubernetesManifests() []*KubernetesManifest
- func (x *GenerateOptimizedManifestResponse) GetManifestVersion() string
- func (*GenerateOptimizedManifestResponse) ProtoMessage()
- func (x *GenerateOptimizedManifestResponse) ProtoReflect() protoreflect.Message
- func (x *GenerateOptimizedManifestResponse) Reset()
- func (x *GenerateOptimizedManifestResponse) String() string
type GkeInferenceQuickstartClient
- func NewGkeInferenceQuickstartClient(cc grpc.ClientConnInterface) GkeInferenceQuickstartClient
type GkeInferenceQuickstartServer
type KubernetesManifest
- func (*KubernetesManifest) Descriptor() ([]byte, []int)deprecated
- func (x *KubernetesManifest) GetApiVersion() string
- func (x *KubernetesManifest) GetContent() string
- func (x *KubernetesManifest) GetKind() string
- func (*KubernetesManifest) ProtoMessage()
- func (x *KubernetesManifest) ProtoReflect() protoreflect.Message
- func (x *KubernetesManifest) Reset()
- func (x *KubernetesManifest) String() string
type MillisecondRange
- func (*MillisecondRange) Descriptor() ([]byte, []int)deprecated
- func (x *MillisecondRange) GetMax() int32
- func (x *MillisecondRange) GetMin() int32
- func (*MillisecondRange) ProtoMessage()
- func (x *MillisecondRange) ProtoReflect() protoreflect.Message
- func (x *MillisecondRange) Reset()
- func (x *MillisecondRange) String() string
type ModelServerInfo
- func (*ModelServerInfo) Descriptor() ([]byte, []int)deprecated
- func (x *ModelServerInfo) GetModel() string
- func (x *ModelServerInfo) GetModelServer() string
- func (x *ModelServerInfo) GetModelServerVersion() string
- func (*ModelServerInfo) ProtoMessage()
- func (x *ModelServerInfo) ProtoReflect() protoreflect.Message
- func (x *ModelServerInfo) Reset()
- func (x *ModelServerInfo) String() string
type PerformanceRange
- func (*PerformanceRange) Descriptor() ([]byte, []int)deprecated
- func (x *PerformanceRange) GetNtpotRange() *MillisecondRange
- func (x *PerformanceRange) GetThroughputOutputRange() *TokensPerSecondRange
- func (x *PerformanceRange) GetTtftRange() *MillisecondRange
- func (*PerformanceRange) ProtoMessage()
- func (x *PerformanceRange) ProtoReflect() protoreflect.Message
- func (x *PerformanceRange) Reset()
- func (x *PerformanceRange) String() string
type PerformanceRequirements
- func (*PerformanceRequirements) Descriptor() ([]byte, []int)deprecated
- func (x *PerformanceRequirements) GetTargetCost() *Cost
- func (x *PerformanceRequirements) GetTargetNtpotMilliseconds() int32
- func (x *PerformanceRequirements) GetTargetTtftMilliseconds() int32
- func (*PerformanceRequirements) ProtoMessage()
- func (x *PerformanceRequirements) ProtoReflect() protoreflect.Message
- func (x *PerformanceRequirements) Reset()
- func (x *PerformanceRequirements) String() string
type PerformanceStats
- func (*PerformanceStats) Descriptor() ([]byte, []int)deprecated
- func (x *PerformanceStats) GetCost() []*Cost
- func (x *PerformanceStats) GetNtpotMilliseconds() int32
- func (x *PerformanceStats) GetOutputTokensPerSecond() int32
- func (x *PerformanceStats) GetQueriesPerSecond() float32
- func (x *PerformanceStats) GetTtftMilliseconds() int32
- func (*PerformanceStats) ProtoMessage()
- func (x *PerformanceStats) ProtoReflect() protoreflect.Message
- func (x *PerformanceStats) Reset()
- func (x *PerformanceStats) String() string
type Profile
- func (*Profile) Descriptor() ([]byte, []int)deprecated
- func (x *Profile) GetAcceleratorType() string
- func (x *Profile) GetInstanceType() string
- func (x *Profile) GetModelServerInfo() *ModelServerInfo
- func (x *Profile) GetPerformanceStats() []*PerformanceStats
- func (x *Profile) GetResourcesUsed() *ResourcesUsed
- func (x *Profile) GetTpuTopology() string
- func (*Profile) ProtoMessage()
- func (x *Profile) ProtoReflect() protoreflect.Message
- func (x *Profile) Reset()
- func (x *Profile) String() string
type ResourcesUsed
- func (*ResourcesUsed) Descriptor() ([]byte, []int)deprecated
- func (x *ResourcesUsed) GetAcceleratorCount() int32
- func (*ResourcesUsed) ProtoMessage()
- func (x *ResourcesUsed) ProtoReflect() protoreflect.Message
- func (x *ResourcesUsed) Reset()
- func (x *ResourcesUsed) String() string
type StorageConfig
- func (*StorageConfig) Descriptor() ([]byte, []int)deprecated
- func (x *StorageConfig) GetModelBucketUri() string
- func (x *StorageConfig) GetXlaCacheBucketUri() string
- func (*StorageConfig) ProtoMessage()
- func (x *StorageConfig) ProtoReflect() protoreflect.Message
- func (x *StorageConfig) Reset()
- func (x *StorageConfig) String() string
type TokensPerSecondRange
- func (*TokensPerSecondRange) Descriptor() ([]byte, []int)deprecated
- func (x *TokensPerSecondRange) GetMax() int32
- func (x *TokensPerSecondRange) GetMin() int32
- func (*TokensPerSecondRange) ProtoMessage()
- func (x *TokensPerSecondRange) ProtoReflect() protoreflect.Message
- func (x *TokensPerSecondRange) Reset()
- func (x *TokensPerSecondRange) String() string
type UnimplementedGkeInferenceQuickstartServer
- func (UnimplementedGkeInferenceQuickstartServer) FetchBenchmarkingData(context.Context, *FetchBenchmarkingDataRequest) (*FetchBenchmarkingDataResponse, error)
- func (UnimplementedGkeInferenceQuickstartServer) FetchModelServerVersions(context.Context, *FetchModelServerVersionsRequest) (*FetchModelServerVersionsResponse, error)
- func (UnimplementedGkeInferenceQuickstartServer) FetchModelServers(context.Context, *FetchModelServersRequest) (*FetchModelServersResponse, error)
- func (UnimplementedGkeInferenceQuickstartServer) FetchModels(context.Context, *FetchModelsRequest) (*FetchModelsResponse, error)
- func (UnimplementedGkeInferenceQuickstartServer) FetchProfiles(context.Context, *FetchProfilesRequest) (*FetchProfilesResponse, error)
- func (UnimplementedGkeInferenceQuickstartServer) GenerateOptimizedManifest(context.Context, *GenerateOptimizedManifestRequest) (*GenerateOptimizedManifestResponse, error)
type UnsafeGkeInferenceQuickstartServer

Constants ¶

View Source

const (
	GkeInferenceQuickstart_FetchModels_FullMethodName               = "/google.cloud.gkerecommender.v1.GkeInferenceQuickstart/FetchModels"
	GkeInferenceQuickstart_FetchModelServers_FullMethodName         = "/google.cloud.gkerecommender.v1.GkeInferenceQuickstart/FetchModelServers"
	GkeInferenceQuickstart_FetchModelServerVersions_FullMethodName  = "/google.cloud.gkerecommender.v1.GkeInferenceQuickstart/FetchModelServerVersions"
	GkeInferenceQuickstart_FetchProfiles_FullMethodName             = "/google.cloud.gkerecommender.v1.GkeInferenceQuickstart/FetchProfiles"
	GkeInferenceQuickstart_GenerateOptimizedManifest_FullMethodName = "/google.cloud.gkerecommender.v1.GkeInferenceQuickstart/GenerateOptimizedManifest"
	GkeInferenceQuickstart_FetchBenchmarkingData_FullMethodName     = "/google.cloud.gkerecommender.v1.GkeInferenceQuickstart/FetchBenchmarkingData"
)

Variables ¶

View Source

var File_google_cloud_gkerecommender_v1_gkerecommender_proto protoreflect.FileDescriptor

View Source

var GkeInferenceQuickstart_ServiceDesc = grpc.ServiceDesc{
	ServiceName: "google.cloud.gkerecommender.v1.GkeInferenceQuickstart",
	HandlerType: (*GkeInferenceQuickstartServer)(nil),
	Methods: []grpc.MethodDesc{
		{
			MethodName: "FetchModels",
			Handler:    _GkeInferenceQuickstart_FetchModels_Handler,
		},
		{
			MethodName: "FetchModelServers",
			Handler:    _GkeInferenceQuickstart_FetchModelServers_Handler,
		},
		{
			MethodName: "FetchModelServerVersions",
			Handler:    _GkeInferenceQuickstart_FetchModelServerVersions_Handler,
		},
		{
			MethodName: "FetchProfiles",
			Handler:    _GkeInferenceQuickstart_FetchProfiles_Handler,
		},
		{
			MethodName: "GenerateOptimizedManifest",
			Handler:    _GkeInferenceQuickstart_GenerateOptimizedManifest_Handler,
		},
		{
			MethodName: "FetchBenchmarkingData",
			Handler:    _GkeInferenceQuickstart_FetchBenchmarkingData_Handler,
		},
	},
	Streams:  []grpc.StreamDesc{},
	Metadata: "google/cloud/gkerecommender/v1/gkerecommender.proto",
}

GkeInferenceQuickstart_ServiceDesc is the grpc.ServiceDesc for GkeInferenceQuickstart service. It's only intended for direct use with grpc.RegisterService, and not to be introspected or modified (even as a copy)

Functions ¶

func RegisterGkeInferenceQuickstartServer ¶

func RegisterGkeInferenceQuickstartServer(s grpc.ServiceRegistrar, srv GkeInferenceQuickstartServer)

Types ¶

type Amount ¶

type Amount struct {

	// Output only. The whole units of the amount.
	// For example if `currencyCode` is `"USD"`, then 1 unit is one US dollar.
	Units int64 `protobuf:"varint,1,opt,name=units,proto3" json:"units,omitempty"`
	// Output only. Number of nano (10^-9) units of the amount.
	// The value must be between -999,999,999 and +999,999,999 inclusive.
	// If `units` is positive, `nanos` must be positive or zero.
	// If `units` is zero, `nanos` can be positive, zero, or negative.
	// If `units` is negative, `nanos` must be negative or zero.
	// For example $-1.75 is represented as `units`=-1 and `nanos`=-750,000,000.
	Nanos int32 `protobuf:"varint,2,opt,name=nanos,proto3" json:"nanos,omitempty"`
	// contains filtered or unexported fields
}

Represents an amount of money in a specific currency.

func (*Amount) Descriptor deprecated

func (*Amount) Descriptor() ([]byte, []int)

Deprecated: Use Amount.ProtoReflect.Descriptor instead.

func (*Amount) GetNanos ¶

func (x *Amount) GetNanos() int32

func (*Amount) GetUnits ¶

func (x *Amount) GetUnits() int64

func (*Amount) ProtoMessage ¶

func (*Amount) ProtoMessage()

func (*Amount) ProtoReflect ¶

func (x *Amount) ProtoReflect() protoreflect.Message

func (*Amount) Reset ¶

func (x *Amount) Reset()

func (*Amount) String ¶

func (x *Amount) String() string

type Cost ¶

type Cost struct {

	// Optional. The cost per million output tokens, calculated as:
	// $/output token = GPU $/s / (1/output-to-input-cost-ratio * input tokens/s +
	// output tokens/s)
	CostPerMillionOutputTokens *Amount `` /* 145-byte string literal not displayed */
	// Optional. The cost per million input tokens. $/input token = ($/output
	// token) / output-to-input-cost-ratio.
	CostPerMillionInputTokens *Amount `` /* 142-byte string literal not displayed */
	// Optional. The pricing model used to calculate the cost. Can be one of:
	// `3-years-cud`, `1-year-cud`, `on-demand`, `spot`. If not provided, `spot`
	// will be used.
	PricingModel string `protobuf:"bytes,3,opt,name=pricing_model,json=pricingModel,proto3" json:"pricing_model,omitempty"`
	// Optional. The output-to-input cost ratio. This determines how the total GPU
	// cost is split between input and output tokens. If not provided, `4.0` is
	// used, assuming a 4:1 output:input cost ratio.
	OutputInputCostRatio *float32 `` /* 133-byte string literal not displayed */
	// contains filtered or unexported fields
}

Cost for running a model deployment on a given instance type. Currently, only USD currency code is supported.

func (*Cost) Descriptor deprecated

func (*Cost) Descriptor() ([]byte, []int)

Deprecated: Use Cost.ProtoReflect.Descriptor instead.

func (*Cost) GetCostPerMillionInputTokens ¶

func (x *Cost) GetCostPerMillionInputTokens() *Amount

func (*Cost) GetCostPerMillionOutputTokens ¶

func (x *Cost) GetCostPerMillionOutputTokens() *Amount

func (*Cost) GetOutputInputCostRatio ¶

func (x *Cost) GetOutputInputCostRatio() float32

func (*Cost) GetPricingModel ¶

func (x *Cost) GetPricingModel() string

func (*Cost) ProtoMessage ¶

func (*Cost) ProtoMessage()

func (*Cost) ProtoReflect ¶

func (x *Cost) ProtoReflect() protoreflect.Message

func (*Cost) Reset ¶

func (x *Cost) Reset()

func (*Cost) String ¶

func (x *Cost) String() string

type FetchBenchmarkingDataRequest ¶

type FetchBenchmarkingDataRequest struct {

	// Required. The model server configuration to get benchmarking data for. Use
	// [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles]
	// to find valid configurations.
	ModelServerInfo *ModelServerInfo `protobuf:"bytes,1,opt,name=model_server_info,json=modelServerInfo,proto3" json:"model_server_info,omitempty"`
	// Optional. The instance type to filter benchmarking data. Instance types are
	// in the format `a2-highgpu-1g`. If not provided, all instance types for the
	// given profile's `model_server_info` will be returned. Use
	// [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles]
	// to find available instance types.
	InstanceType string `protobuf:"bytes,3,opt,name=instance_type,json=instanceType,proto3" json:"instance_type,omitempty"`
	// Optional. The pricing model to use for the benchmarking data. Defaults to
	// `spot`.
	PricingModel string `protobuf:"bytes,4,opt,name=pricing_model,json=pricingModel,proto3" json:"pricing_model,omitempty"`
	// contains filtered or unexported fields
}

Request message for [GkeInferenceQuickstart.FetchBenchmarkingData][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchBenchmarkingData].

func (*FetchBenchmarkingDataRequest) Descriptor deprecated

func (*FetchBenchmarkingDataRequest) Descriptor() ([]byte, []int)

Deprecated: Use FetchBenchmarkingDataRequest.ProtoReflect.Descriptor instead.

func (*FetchBenchmarkingDataRequest) GetInstanceType ¶

func (x *FetchBenchmarkingDataRequest) GetInstanceType() string

func (*FetchBenchmarkingDataRequest) GetModelServerInfo ¶

func (x *FetchBenchmarkingDataRequest) GetModelServerInfo() *ModelServerInfo

func (*FetchBenchmarkingDataRequest) GetPricingModel ¶

func (x *FetchBenchmarkingDataRequest) GetPricingModel() string

func (*FetchBenchmarkingDataRequest) ProtoMessage ¶

func (*FetchBenchmarkingDataRequest) ProtoMessage()

func (*FetchBenchmarkingDataRequest) ProtoReflect ¶

func (x *FetchBenchmarkingDataRequest) ProtoReflect() protoreflect.Message

func (*FetchBenchmarkingDataRequest) Reset ¶

func (x *FetchBenchmarkingDataRequest) Reset()

func (*FetchBenchmarkingDataRequest) String ¶

func (x *FetchBenchmarkingDataRequest) String() string

type FetchBenchmarkingDataResponse ¶

type FetchBenchmarkingDataResponse struct {

	// Output only. List of profiles containing their respective benchmarking
	// data.
	Profile []*Profile `protobuf:"bytes,1,rep,name=profile,proto3" json:"profile,omitempty"`
	// contains filtered or unexported fields
}

Response message for [GkeInferenceQuickstart.FetchBenchmarkingData][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchBenchmarkingData].

func (*FetchBenchmarkingDataResponse) Descriptor deprecated

func (*FetchBenchmarkingDataResponse) Descriptor() ([]byte, []int)

Deprecated: Use FetchBenchmarkingDataResponse.ProtoReflect.Descriptor instead.

func (*FetchBenchmarkingDataResponse) GetProfile ¶

func (x *FetchBenchmarkingDataResponse) GetProfile() []*Profile

func (*FetchBenchmarkingDataResponse) ProtoMessage ¶

func (*FetchBenchmarkingDataResponse) ProtoMessage()

func (*FetchBenchmarkingDataResponse) ProtoReflect ¶

func (x *FetchBenchmarkingDataResponse) ProtoReflect() protoreflect.Message

func (*FetchBenchmarkingDataResponse) Reset ¶

func (x *FetchBenchmarkingDataResponse) Reset()

func (*FetchBenchmarkingDataResponse) String ¶

func (x *FetchBenchmarkingDataResponse) String() string

type FetchModelServerVersionsRequest ¶

type FetchModelServerVersionsRequest struct {

	// Required. The model for which to list model server versions. Open-source
	// models follow the Huggingface Hub `owner/model_name` format. Use
	// [GkeInferenceQuickstart.FetchModels][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModels]
	// to find available models.
	Model string `protobuf:"bytes,1,opt,name=model,proto3" json:"model,omitempty"`
	// Required. The model server for which to list versions. Open-source model
	// servers use simplified, lowercase names (e.g., `vllm`). Use
	// [GkeInferenceQuickstart.FetchModelServers][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServers]
	// to find available model servers.
	ModelServer string `protobuf:"bytes,2,opt,name=model_server,json=modelServer,proto3" json:"model_server,omitempty"`
	// Optional. The target number of results to return in a single response.
	// If not specified, a default value will be chosen by the service.
	// Note that the response may include a partial list and a caller should
	// only rely on the response's
	// [next_page_token][google.cloud.gkerecommender.v1.FetchModelServerVersionsResponse.next_page_token]
	// to determine if there are more instances left to be queried.
	PageSize *int32 `protobuf:"varint,3,opt,name=page_size,json=pageSize,proto3,oneof" json:"page_size,omitempty"`
	// Optional. The value of
	// [next_page_token][google.cloud.gkerecommender.v1.FetchModelServerVersionsResponse.next_page_token]
	// received from a previous `FetchModelServerVersionsRequest` call.
	// Provide this to retrieve the subsequent page in a multi-page list of
	// results. When paginating, all other parameters provided to
	// `FetchModelServerVersionsRequest` must match the call that provided the
	// page token.
	PageToken *string `protobuf:"bytes,4,opt,name=page_token,json=pageToken,proto3,oneof" json:"page_token,omitempty"`
	// contains filtered or unexported fields
}

Request message for [GkeInferenceQuickstart.FetchModelServerVersions][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServerVersions].

func (*FetchModelServerVersionsRequest) Descriptor deprecated

func (*FetchModelServerVersionsRequest) Descriptor() ([]byte, []int)

Deprecated: Use FetchModelServerVersionsRequest.ProtoReflect.Descriptor instead.

func (*FetchModelServerVersionsRequest) GetModel ¶

func (x *FetchModelServerVersionsRequest) GetModel() string

func (*FetchModelServerVersionsRequest) GetModelServer ¶

func (x *FetchModelServerVersionsRequest) GetModelServer() string

func (*FetchModelServerVersionsRequest) GetPageSize ¶

func (x *FetchModelServerVersionsRequest) GetPageSize() int32

func (*FetchModelServerVersionsRequest) GetPageToken ¶

func (x *FetchModelServerVersionsRequest) GetPageToken() string

func (*FetchModelServerVersionsRequest) ProtoMessage ¶

func (*FetchModelServerVersionsRequest) ProtoMessage()

func (*FetchModelServerVersionsRequest) ProtoReflect ¶

func (x *FetchModelServerVersionsRequest) ProtoReflect() protoreflect.Message

func (*FetchModelServerVersionsRequest) Reset ¶

func (x *FetchModelServerVersionsRequest) Reset()

func (*FetchModelServerVersionsRequest) String ¶

func (x *FetchModelServerVersionsRequest) String() string

type FetchModelServerVersionsResponse ¶

type FetchModelServerVersionsResponse struct {

	// Output only. A list of available model server versions.
	ModelServerVersions []string `protobuf:"bytes,1,rep,name=model_server_versions,json=modelServerVersions,proto3" json:"model_server_versions,omitempty"`
	// Output only. A token which may be sent as
	// [page_token][FetchModelServerVersionsResponse.page_token] in a subsequent
	// `FetchModelServerVersionsResponse` call to retrieve the next page of
	// results. If this field is omitted or empty, then there are no more results
	// to return.
	NextPageToken string `protobuf:"bytes,2,opt,name=next_page_token,json=nextPageToken,proto3" json:"next_page_token,omitempty"`
	// contains filtered or unexported fields
}

Response message for [GkeInferenceQuickstart.FetchModelServerVersions][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServerVersions].

func (*FetchModelServerVersionsResponse) Descriptor deprecated

func (*FetchModelServerVersionsResponse) Descriptor() ([]byte, []int)

Deprecated: Use FetchModelServerVersionsResponse.ProtoReflect.Descriptor instead.

func (*FetchModelServerVersionsResponse) GetModelServerVersions ¶

func (x *FetchModelServerVersionsResponse) GetModelServerVersions() []string

func (*FetchModelServerVersionsResponse) GetNextPageToken ¶

func (x *FetchModelServerVersionsResponse) GetNextPageToken() string

func (*FetchModelServerVersionsResponse) ProtoMessage ¶

func (*FetchModelServerVersionsResponse) ProtoMessage()

func (*FetchModelServerVersionsResponse) ProtoReflect ¶

func (x *FetchModelServerVersionsResponse) ProtoReflect() protoreflect.Message

func (*FetchModelServerVersionsResponse) Reset ¶

func (x *FetchModelServerVersionsResponse) Reset()

func (*FetchModelServerVersionsResponse) String ¶

func (x *FetchModelServerVersionsResponse) String() string

type FetchModelServersRequest ¶

type FetchModelServersRequest struct {

	// Required. The model for which to list model servers. Open-source models
	// follow the Huggingface Hub `owner/model_name` format. Use
	// [GkeInferenceQuickstart.FetchModels][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModels]
	// to find available models.
	Model string `protobuf:"bytes,1,opt,name=model,proto3" json:"model,omitempty"`
	// Optional. The target number of results to return in a single response.
	// If not specified, a default value will be chosen by the service.
	// Note that the response may include a partial list and a caller should
	// only rely on the response's
	// [next_page_token][google.cloud.gkerecommender.v1.FetchModelServersResponse.next_page_token]
	// to determine if there are more instances left to be queried.
	PageSize *int32 `protobuf:"varint,2,opt,name=page_size,json=pageSize,proto3,oneof" json:"page_size,omitempty"`
	// Optional. The value of
	// [next_page_token][google.cloud.gkerecommender.v1.FetchModelServersResponse.next_page_token]
	// received from a previous `FetchModelServersRequest` call.
	// Provide this to retrieve the subsequent page in a multi-page list of
	// results. When paginating, all other parameters provided to
	// `FetchModelServersRequest` must match the call that provided the page
	// token.
	PageToken *string `protobuf:"bytes,3,opt,name=page_token,json=pageToken,proto3,oneof" json:"page_token,omitempty"`
	// contains filtered or unexported fields
}

Request message for [GkeInferenceQuickstart.FetchModelServers][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServers].

func (*FetchModelServersRequest) Descriptor deprecated

func (*FetchModelServersRequest) Descriptor() ([]byte, []int)

Deprecated: Use FetchModelServersRequest.ProtoReflect.Descriptor instead.

func (*FetchModelServersRequest) GetModel ¶

func (x *FetchModelServersRequest) GetModel() string

func (*FetchModelServersRequest) GetPageSize ¶

func (x *FetchModelServersRequest) GetPageSize() int32

func (*FetchModelServersRequest) GetPageToken ¶

func (x *FetchModelServersRequest) GetPageToken() string

func (*FetchModelServersRequest) ProtoMessage ¶

func (*FetchModelServersRequest) ProtoMessage()

func (*FetchModelServersRequest) ProtoReflect ¶

func (x *FetchModelServersRequest) ProtoReflect() protoreflect.Message

func (*FetchModelServersRequest) Reset ¶

func (x *FetchModelServersRequest) Reset()

func (*FetchModelServersRequest) String ¶

func (x *FetchModelServersRequest) String() string

type FetchModelServersResponse ¶

type FetchModelServersResponse struct {

	// Output only. List of available model servers. Open-source model servers use
	// simplified, lowercase names (e.g., `vllm`).
	ModelServers []string `protobuf:"bytes,1,rep,name=model_servers,json=modelServers,proto3" json:"model_servers,omitempty"`
	// Output only. A token which may be sent as
	// [page_token][FetchModelServersResponse.page_token] in a subsequent
	// `FetchModelServersResponse` call to retrieve the next page of results.
	// If this field is omitted or empty, then there are no more results to
	// return.
	NextPageToken string `protobuf:"bytes,2,opt,name=next_page_token,json=nextPageToken,proto3" json:"next_page_token,omitempty"`
	// contains filtered or unexported fields
}

Response message for [GkeInferenceQuickstart.FetchModelServers][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServers].

func (*FetchModelServersResponse) Descriptor deprecated

func (*FetchModelServersResponse) Descriptor() ([]byte, []int)

Deprecated: Use FetchModelServersResponse.ProtoReflect.Descriptor instead.

func (*FetchModelServersResponse) GetModelServers ¶

func (x *FetchModelServersResponse) GetModelServers() []string

func (*FetchModelServersResponse) GetNextPageToken ¶

func (x *FetchModelServersResponse) GetNextPageToken() string

func (*FetchModelServersResponse) ProtoMessage ¶

func (*FetchModelServersResponse) ProtoMessage()

func (*FetchModelServersResponse) ProtoReflect ¶

func (x *FetchModelServersResponse) ProtoReflect() protoreflect.Message

func (*FetchModelServersResponse) Reset ¶

func (x *FetchModelServersResponse) Reset()

func (*FetchModelServersResponse) String ¶

func (x *FetchModelServersResponse) String() string

type FetchModelsRequest ¶

type FetchModelsRequest struct {

	// Optional. The target number of results to return in a single response.
	// If not specified, a default value will be chosen by the service.
	// Note that the response may include a partial list and a caller should
	// only rely on the response's
	// [next_page_token][google.cloud.gkerecommender.v1.FetchModelsResponse.next_page_token]
	// to determine if there are more instances left to be queried.
	PageSize *int32 `protobuf:"varint,1,opt,name=page_size,json=pageSize,proto3,oneof" json:"page_size,omitempty"`
	// Optional. The value of
	// [next_page_token][google.cloud.gkerecommender.v1.FetchModelsResponse.next_page_token]
	// received from a previous `FetchModelsRequest` call.
	// Provide this to retrieve the subsequent page in a multi-page list of
	// results. When paginating, all other parameters provided to
	// `FetchModelsRequest` must match the call that provided the page token.
	PageToken *string `protobuf:"bytes,2,opt,name=page_token,json=pageToken,proto3,oneof" json:"page_token,omitempty"`
	// contains filtered or unexported fields
}

Request message for [GkeInferenceQuickstart.FetchModels][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModels].

func (*FetchModelsRequest) Descriptor deprecated

func (*FetchModelsRequest) Descriptor() ([]byte, []int)

Deprecated: Use FetchModelsRequest.ProtoReflect.Descriptor instead.

func (*FetchModelsRequest) GetPageSize ¶

func (x *FetchModelsRequest) GetPageSize() int32

func (*FetchModelsRequest) GetPageToken ¶

func (x *FetchModelsRequest) GetPageToken() string

func (*FetchModelsRequest) ProtoMessage ¶

func (*FetchModelsRequest) ProtoMessage()

func (*FetchModelsRequest) ProtoReflect ¶

func (x *FetchModelsRequest) ProtoReflect() protoreflect.Message

func (*FetchModelsRequest) Reset ¶

func (x *FetchModelsRequest) Reset()

func (*FetchModelsRequest) String ¶

func (x *FetchModelsRequest) String() string

type FetchModelsResponse ¶

type FetchModelsResponse struct {

	// Output only. List of available models. Open-source models follow the
	// Huggingface Hub `owner/model_name` format.
	Models []string `protobuf:"bytes,1,rep,name=models,proto3" json:"models,omitempty"`
	// Output only. A token which may be sent as
	// [page_token][FetchModelsResponse.page_token] in a subsequent
	// `FetchModelsResponse` call to retrieve the next page of results.
	// If this field is omitted or empty, then there are no more results to
	// return.
	NextPageToken string `protobuf:"bytes,2,opt,name=next_page_token,json=nextPageToken,proto3" json:"next_page_token,omitempty"`
	// contains filtered or unexported fields
}

Response message for [GkeInferenceQuickstart.FetchModels][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModels].

func (*FetchModelsResponse) Descriptor deprecated

func (*FetchModelsResponse) Descriptor() ([]byte, []int)

Deprecated: Use FetchModelsResponse.ProtoReflect.Descriptor instead.

func (*FetchModelsResponse) GetModels ¶

func (x *FetchModelsResponse) GetModels() []string

func (*FetchModelsResponse) GetNextPageToken ¶

func (x *FetchModelsResponse) GetNextPageToken() string

func (*FetchModelsResponse) ProtoMessage ¶

func (*FetchModelsResponse) ProtoMessage()

func (*FetchModelsResponse) ProtoReflect ¶

func (x *FetchModelsResponse) ProtoReflect() protoreflect.Message

func (*FetchModelsResponse) Reset ¶

func (x *FetchModelsResponse) Reset()

func (*FetchModelsResponse) String ¶

func (x *FetchModelsResponse) String() string

type FetchProfilesRequest ¶

type FetchProfilesRequest struct {

	// Optional. The model to filter profiles by. Open-source models follow the
	// Huggingface Hub `owner/model_name` format. If not provided, all models are
	// returned. Use
	// [GkeInferenceQuickstart.FetchModels][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModels]
	// to find available models.
	Model string `protobuf:"bytes,1,opt,name=model,proto3" json:"model,omitempty"`
	// Optional. The model server to filter profiles by. If not provided, all
	// model servers are returned. Use
	// [GkeInferenceQuickstart.FetchModelServers][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServers]
	// to find available model servers for a given model.
	ModelServer string `protobuf:"bytes,2,opt,name=model_server,json=modelServer,proto3" json:"model_server,omitempty"`
	// Optional. The model server version to filter profiles by. If not provided,
	// all model server versions are returned. Use
	// [GkeInferenceQuickstart.FetchModelServerVersions][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServerVersions]
	// to find available versions for a given model and server.
	ModelServerVersion string `protobuf:"bytes,3,opt,name=model_server_version,json=modelServerVersion,proto3" json:"model_server_version,omitempty"`
	// Optional. The performance requirements to filter profiles. Profiles that do
	// not meet these requirements are filtered out. If not provided, all profiles
	// are returned.
	PerformanceRequirements *PerformanceRequirements `` /* 130-byte string literal not displayed */
	// Optional. The target number of results to return in a single response. If
	// not specified, a default value will be chosen by the service. Note that the
	// response may include a partial list and a caller should only rely on the
	// response's
	// [next_page_token][google.cloud.gkerecommender.v1.FetchProfilesResponse.next_page_token]
	// to determine if there are more instances left to be queried.
	PageSize *int32 `protobuf:"varint,5,opt,name=page_size,json=pageSize,proto3,oneof" json:"page_size,omitempty"`
	// Optional. The value of
	// [next_page_token][google.cloud.gkerecommender.v1.FetchProfilesResponse.next_page_token]
	// received from a previous `FetchProfilesRequest` call.
	// Provide this to retrieve the subsequent page in a multi-page list of
	// results. When paginating, all other parameters provided to
	// `FetchProfilesRequest` must match the call that provided the page
	// token.
	PageToken *string `protobuf:"bytes,6,opt,name=page_token,json=pageToken,proto3,oneof" json:"page_token,omitempty"`
	// contains filtered or unexported fields
}

Request message for [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles].

func (*FetchProfilesRequest) Descriptor deprecated

func (*FetchProfilesRequest) Descriptor() ([]byte, []int)

Deprecated: Use FetchProfilesRequest.ProtoReflect.Descriptor instead.

func (*FetchProfilesRequest) GetModel ¶

func (x *FetchProfilesRequest) GetModel() string

func (*FetchProfilesRequest) GetModelServer ¶

func (x *FetchProfilesRequest) GetModelServer() string

func (*FetchProfilesRequest) GetModelServerVersion ¶

func (x *FetchProfilesRequest) GetModelServerVersion() string

func (*FetchProfilesRequest) GetPageSize ¶

func (x *FetchProfilesRequest) GetPageSize() int32

func (*FetchProfilesRequest) GetPageToken ¶

func (x *FetchProfilesRequest) GetPageToken() string

func (*FetchProfilesRequest) GetPerformanceRequirements ¶

func (x *FetchProfilesRequest) GetPerformanceRequirements() *PerformanceRequirements

func (*FetchProfilesRequest) ProtoMessage ¶

func (*FetchProfilesRequest) ProtoMessage()

func (*FetchProfilesRequest) ProtoReflect ¶

func (x *FetchProfilesRequest) ProtoReflect() protoreflect.Message

func (*FetchProfilesRequest) Reset ¶

func (x *FetchProfilesRequest) Reset()

func (*FetchProfilesRequest) String ¶

func (x *FetchProfilesRequest) String() string

type FetchProfilesResponse ¶

type FetchProfilesResponse struct {

	// Output only. List of profiles that match the given model server info and
	// performance requirements (if provided).
	Profile []*Profile `protobuf:"bytes,1,rep,name=profile,proto3" json:"profile,omitempty"`
	// Output only. The combined range of performance values observed across all
	// profiles in this response.
	PerformanceRange *PerformanceRange `protobuf:"bytes,2,opt,name=performance_range,json=performanceRange,proto3" json:"performance_range,omitempty"`
	// Output only. Additional comments related to the response.
	Comments string `protobuf:"bytes,3,opt,name=comments,proto3" json:"comments,omitempty"`
	// Output only. A token which may be sent as
	// [page_token][FetchProfilesResponse.page_token] in a subsequent
	// `FetchProfilesResponse` call to retrieve the next page of results. If this
	// field is omitted or empty, then there are no more results to return.
	NextPageToken string `protobuf:"bytes,4,opt,name=next_page_token,json=nextPageToken,proto3" json:"next_page_token,omitempty"`
	// contains filtered or unexported fields
}

Response message for [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles].

func (*FetchProfilesResponse) Descriptor deprecated

func (*FetchProfilesResponse) Descriptor() ([]byte, []int)

Deprecated: Use FetchProfilesResponse.ProtoReflect.Descriptor instead.

func (*FetchProfilesResponse) GetComments ¶

func (x *FetchProfilesResponse) GetComments() string

func (*FetchProfilesResponse) GetNextPageToken ¶

func (x *FetchProfilesResponse) GetNextPageToken() string

func (*FetchProfilesResponse) GetPerformanceRange ¶

func (x *FetchProfilesResponse) GetPerformanceRange() *PerformanceRange

func (*FetchProfilesResponse) GetProfile ¶

func (x *FetchProfilesResponse) GetProfile() []*Profile

func (*FetchProfilesResponse) ProtoMessage ¶

func (*FetchProfilesResponse) ProtoMessage()

func (*FetchProfilesResponse) ProtoReflect ¶

func (x *FetchProfilesResponse) ProtoReflect() protoreflect.Message

func (*FetchProfilesResponse) Reset ¶

func (x *FetchProfilesResponse) Reset()

func (*FetchProfilesResponse) String ¶

func (x *FetchProfilesResponse) String() string

type GenerateOptimizedManifestRequest ¶

type GenerateOptimizedManifestRequest struct {

	// Required. The model server configuration to generate the manifest for. Use
	// [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles]
	// to find valid configurations.
	ModelServerInfo *ModelServerInfo `protobuf:"bytes,1,opt,name=model_server_info,json=modelServerInfo,proto3" json:"model_server_info,omitempty"`
	// Required. The accelerator type. Use
	// [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles]
	// to find valid accelerators for a given `model_server_info`.
	AcceleratorType string `protobuf:"bytes,2,opt,name=accelerator_type,json=acceleratorType,proto3" json:"accelerator_type,omitempty"`
	// Optional. The kubernetes namespace to deploy the manifests in.
	KubernetesNamespace string `protobuf:"bytes,3,opt,name=kubernetes_namespace,json=kubernetesNamespace,proto3" json:"kubernetes_namespace,omitempty"`
	// Optional. The performance requirements to use for generating Horizontal Pod
	// Autoscaler (HPA) resources. If provided, the manifest includes HPA
	// resources to adjust the model server replica count to maintain the
	// specified targets (e.g., NTPOT, TTFT) at a P50 latency. Cost targets are
	// not currently supported for HPA generation. If the specified targets are
	// not achievable, the HPA manifest will not be generated.
	PerformanceRequirements *PerformanceRequirements `` /* 130-byte string literal not displayed */
	// Optional. The storage configuration for the model. If not provided, the
	// model is loaded from Huggingface.
	StorageConfig *StorageConfig `protobuf:"bytes,5,opt,name=storage_config,json=storageConfig,proto3" json:"storage_config,omitempty"`
	// contains filtered or unexported fields
}

Request message for [GkeInferenceQuickstart.GenerateOptimizedManifest][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.GenerateOptimizedManifest].

func (*GenerateOptimizedManifestRequest) Descriptor deprecated

func (*GenerateOptimizedManifestRequest) Descriptor() ([]byte, []int)

Deprecated: Use GenerateOptimizedManifestRequest.ProtoReflect.Descriptor instead.

func (*GenerateOptimizedManifestRequest) GetAcceleratorType ¶

func (x *GenerateOptimizedManifestRequest) GetAcceleratorType() string

func (*GenerateOptimizedManifestRequest) GetKubernetesNamespace ¶

func (x *GenerateOptimizedManifestRequest) GetKubernetesNamespace() string

func (*GenerateOptimizedManifestRequest) GetModelServerInfo ¶

func (x *GenerateOptimizedManifestRequest) GetModelServerInfo() *ModelServerInfo

func (*GenerateOptimizedManifestRequest) GetPerformanceRequirements ¶

func (x *GenerateOptimizedManifestRequest) GetPerformanceRequirements() *PerformanceRequirements

func (*GenerateOptimizedManifestRequest) GetStorageConfig ¶

func (x *GenerateOptimizedManifestRequest) GetStorageConfig() *StorageConfig

func (*GenerateOptimizedManifestRequest) ProtoMessage ¶

func (*GenerateOptimizedManifestRequest) ProtoMessage()

func (*GenerateOptimizedManifestRequest) ProtoReflect ¶

func (x *GenerateOptimizedManifestRequest) ProtoReflect() protoreflect.Message

func (*GenerateOptimizedManifestRequest) Reset ¶

func (x *GenerateOptimizedManifestRequest) Reset()

func (*GenerateOptimizedManifestRequest) String ¶

func (x *GenerateOptimizedManifestRequest) String() string

type GenerateOptimizedManifestResponse ¶

type GenerateOptimizedManifestResponse struct {

	// Output only. A list of generated Kubernetes manifests.
	KubernetesManifests []*KubernetesManifest `protobuf:"bytes,1,rep,name=kubernetes_manifests,json=kubernetesManifests,proto3" json:"kubernetes_manifests,omitempty"`
	// Output only. Comments related to deploying the generated manifests.
	Comments []string `protobuf:"bytes,2,rep,name=comments,proto3" json:"comments,omitempty"`
	// Output only. Additional information about the versioned dependencies used
	// to generate the manifests. See [Run best practice inference with GKE
	// Inference Quickstart
	// recipes](https://cloud.google.com/kubernetes-engine/docs/how-to/machine-learning/inference/inference-quickstart)
	// for details.
	ManifestVersion string `protobuf:"bytes,3,opt,name=manifest_version,json=manifestVersion,proto3" json:"manifest_version,omitempty"`
	// contains filtered or unexported fields
}

Response message for [GkeInferenceQuickstart.GenerateOptimizedManifest][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.GenerateOptimizedManifest].

func (*GenerateOptimizedManifestResponse) Descriptor deprecated

func (*GenerateOptimizedManifestResponse) Descriptor() ([]byte, []int)

Deprecated: Use GenerateOptimizedManifestResponse.ProtoReflect.Descriptor instead.

func (*GenerateOptimizedManifestResponse) GetComments ¶

func (x *GenerateOptimizedManifestResponse) GetComments() []string

func (*GenerateOptimizedManifestResponse) GetKubernetesManifests ¶

func (x *GenerateOptimizedManifestResponse) GetKubernetesManifests() []*KubernetesManifest

func (*GenerateOptimizedManifestResponse) GetManifestVersion ¶

func (x *GenerateOptimizedManifestResponse) GetManifestVersion() string

func (*GenerateOptimizedManifestResponse) ProtoMessage ¶

func (*GenerateOptimizedManifestResponse) ProtoMessage()

func (*GenerateOptimizedManifestResponse) ProtoReflect ¶

func (x *GenerateOptimizedManifestResponse) ProtoReflect() protoreflect.Message

func (*GenerateOptimizedManifestResponse) Reset ¶

func (x *GenerateOptimizedManifestResponse) Reset()

func (*GenerateOptimizedManifestResponse) String ¶

func (x *GenerateOptimizedManifestResponse) String() string

type GkeInferenceQuickstartClient ¶

type GkeInferenceQuickstartClient interface {
	// Fetches available models. Open-source models follow the Huggingface Hub
	// `owner/model_name` format.
	FetchModels(ctx context.Context, in *FetchModelsRequest, opts ...grpc.CallOption) (*FetchModelsResponse, error)
	// Fetches available model servers. Open-source model servers use simplified,
	// lowercase names (e.g., `vllm`).
	FetchModelServers(ctx context.Context, in *FetchModelServersRequest, opts ...grpc.CallOption) (*FetchModelServersResponse, error)
	// Fetches available model server versions. Open-source servers use their own
	// versioning schemas (e.g., `vllm` uses semver like `v1.0.0`).
	//
	// Some model servers have different versioning schemas depending on the
	// accelerator. For example, `vllm` uses semver on GPUs, but returns nightly
	// build tags on TPUs. All available versions will be returned when different
	// schemas are present.
	FetchModelServerVersions(ctx context.Context, in *FetchModelServerVersionsRequest, opts ...grpc.CallOption) (*FetchModelServerVersionsResponse, error)
	// Fetches available profiles. A profile contains performance metrics and
	// cost information for a specific model server setup. Profiles can be
	// filtered by parameters. If no filters are provided, all profiles are
	// returned.
	//
	// Profiles display a single value per performance metric based on the
	// provided performance requirements. If no requirements are given, the
	// metrics represent the inflection point. See [Run best practice inference
	// with GKE Inference Quickstart
	// recipes](https://cloud.google.com/kubernetes-engine/docs/how-to/machine-learning/inference/inference-quickstart#how)
	// for details.
	FetchProfiles(ctx context.Context, in *FetchProfilesRequest, opts ...grpc.CallOption) (*FetchProfilesResponse, error)
	// Generates an optimized deployment manifest for a given model and model
	// server, based on the specified accelerator, performance targets, and
	// configurations. See [Run best practice inference with GKE Inference
	// Quickstart
	// recipes](https://cloud.google.com/kubernetes-engine/docs/how-to/machine-learning/inference/inference-quickstart)
	// for deployment details.
	GenerateOptimizedManifest(ctx context.Context, in *GenerateOptimizedManifestRequest, opts ...grpc.CallOption) (*GenerateOptimizedManifestResponse, error)
	// Fetches all of the benchmarking data available for a profile. Benchmarking
	// data returns all of the performance metrics available for a given model
	// server setup on a given instance type.
	FetchBenchmarkingData(ctx context.Context, in *FetchBenchmarkingDataRequest, opts ...grpc.CallOption) (*FetchBenchmarkingDataResponse, error)
}

GkeInferenceQuickstartClient is the client API for GkeInferenceQuickstart service.

For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.

func NewGkeInferenceQuickstartClient ¶

func NewGkeInferenceQuickstartClient(cc grpc.ClientConnInterface) GkeInferenceQuickstartClient

type GkeInferenceQuickstartServer ¶

type GkeInferenceQuickstartServer interface {
	// Fetches available models. Open-source models follow the Huggingface Hub
	// `owner/model_name` format.
	FetchModels(context.Context, *FetchModelsRequest) (*FetchModelsResponse, error)
	// Fetches available model servers. Open-source model servers use simplified,
	// lowercase names (e.g., `vllm`).
	FetchModelServers(context.Context, *FetchModelServersRequest) (*FetchModelServersResponse, error)
	// Fetches available model server versions. Open-source servers use their own
	// versioning schemas (e.g., `vllm` uses semver like `v1.0.0`).
	//
	// Some model servers have different versioning schemas depending on the
	// accelerator. For example, `vllm` uses semver on GPUs, but returns nightly
	// build tags on TPUs. All available versions will be returned when different
	// schemas are present.
	FetchModelServerVersions(context.Context, *FetchModelServerVersionsRequest) (*FetchModelServerVersionsResponse, error)
	// Fetches available profiles. A profile contains performance metrics and
	// cost information for a specific model server setup. Profiles can be
	// filtered by parameters. If no filters are provided, all profiles are
	// returned.
	//
	// Profiles display a single value per performance metric based on the
	// provided performance requirements. If no requirements are given, the
	// metrics represent the inflection point. See [Run best practice inference
	// with GKE Inference Quickstart
	// recipes](https://cloud.google.com/kubernetes-engine/docs/how-to/machine-learning/inference/inference-quickstart#how)
	// for details.
	FetchProfiles(context.Context, *FetchProfilesRequest) (*FetchProfilesResponse, error)
	// Generates an optimized deployment manifest for a given model and model
	// server, based on the specified accelerator, performance targets, and
	// configurations. See [Run best practice inference with GKE Inference
	// Quickstart
	// recipes](https://cloud.google.com/kubernetes-engine/docs/how-to/machine-learning/inference/inference-quickstart)
	// for deployment details.
	GenerateOptimizedManifest(context.Context, *GenerateOptimizedManifestRequest) (*GenerateOptimizedManifestResponse, error)
	// Fetches all of the benchmarking data available for a profile. Benchmarking
	// data returns all of the performance metrics available for a given model
	// server setup on a given instance type.
	FetchBenchmarkingData(context.Context, *FetchBenchmarkingDataRequest) (*FetchBenchmarkingDataResponse, error)
}

GkeInferenceQuickstartServer is the server API for GkeInferenceQuickstart service. All implementations should embed UnimplementedGkeInferenceQuickstartServer for forward compatibility

type KubernetesManifest ¶

type KubernetesManifest struct {

	// Output only. Kubernetes resource kind.
	Kind string `protobuf:"bytes,1,opt,name=kind,proto3" json:"kind,omitempty"`
	// Output only. Kubernetes API version.
	ApiVersion string `protobuf:"bytes,2,opt,name=api_version,json=apiVersion,proto3" json:"api_version,omitempty"`
	// Output only. YAML content.
	Content string `protobuf:"bytes,3,opt,name=content,proto3" json:"content,omitempty"`
	// contains filtered or unexported fields
}

A Kubernetes manifest.

func (*KubernetesManifest) Descriptor deprecated

func (*KubernetesManifest) Descriptor() ([]byte, []int)

Deprecated: Use KubernetesManifest.ProtoReflect.Descriptor instead.

func (*KubernetesManifest) GetApiVersion ¶

func (x *KubernetesManifest) GetApiVersion() string

func (*KubernetesManifest) GetContent ¶

func (x *KubernetesManifest) GetContent() string

func (*KubernetesManifest) GetKind ¶

func (x *KubernetesManifest) GetKind() string

func (*KubernetesManifest) ProtoMessage ¶

func (*KubernetesManifest) ProtoMessage()

func (*KubernetesManifest) ProtoReflect ¶

func (x *KubernetesManifest) ProtoReflect() protoreflect.Message

func (*KubernetesManifest) Reset ¶

func (x *KubernetesManifest) Reset()

func (*KubernetesManifest) String ¶

func (x *KubernetesManifest) String() string

type MillisecondRange ¶

type MillisecondRange struct {

	// Output only. The minimum value of the range.
	Min int32 `protobuf:"varint,1,opt,name=min,proto3" json:"min,omitempty"`
	// Output only. The maximum value of the range.
	Max int32 `protobuf:"varint,2,opt,name=max,proto3" json:"max,omitempty"`
	// contains filtered or unexported fields
}

Represents a range of latency values in milliseconds.

func (*MillisecondRange) Descriptor deprecated

func (*MillisecondRange) Descriptor() ([]byte, []int)

Deprecated: Use MillisecondRange.ProtoReflect.Descriptor instead.

func (*MillisecondRange) GetMax ¶

func (x *MillisecondRange) GetMax() int32

func (*MillisecondRange) GetMin ¶

func (x *MillisecondRange) GetMin() int32

func (*MillisecondRange) ProtoMessage ¶

func (*MillisecondRange) ProtoMessage()

func (*MillisecondRange) ProtoReflect ¶

func (x *MillisecondRange) ProtoReflect() protoreflect.Message

func (*MillisecondRange) Reset ¶

func (x *MillisecondRange) Reset()

func (*MillisecondRange) String ¶

func (x *MillisecondRange) String() string

type ModelServerInfo ¶

type ModelServerInfo struct {

	// Required. The model. Open-source models follow the Huggingface Hub
	// `owner/model_name` format. Use
	// [GkeInferenceQuickstart.FetchModels][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModels]
	// to find available models.
	Model string `protobuf:"bytes,1,opt,name=model,proto3" json:"model,omitempty"`
	// Required. The model server. Open-source model servers use simplified,
	// lowercase names (e.g., `vllm`). Use
	// [GkeInferenceQuickstart.FetchModelServers][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServers]
	// to find available servers.
	ModelServer string `protobuf:"bytes,2,opt,name=model_server,json=modelServer,proto3" json:"model_server,omitempty"`
	// Optional. The model server version. Use
	// [GkeInferenceQuickstart.FetchModelServerVersions][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchModelServerVersions]
	// to find available versions. If not provided, the latest available version
	// is used.
	ModelServerVersion string `protobuf:"bytes,3,opt,name=model_server_version,json=modelServerVersion,proto3" json:"model_server_version,omitempty"`
	// contains filtered or unexported fields
}

Model server information gives. Valid model server info combinations can be found using [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles].

func (*ModelServerInfo) Descriptor deprecated

func (*ModelServerInfo) Descriptor() ([]byte, []int)

Deprecated: Use ModelServerInfo.ProtoReflect.Descriptor instead.

func (*ModelServerInfo) GetModel ¶

func (x *ModelServerInfo) GetModel() string

func (*ModelServerInfo) GetModelServer ¶

func (x *ModelServerInfo) GetModelServer() string

func (*ModelServerInfo) GetModelServerVersion ¶

func (x *ModelServerInfo) GetModelServerVersion() string

func (*ModelServerInfo) ProtoMessage ¶

func (*ModelServerInfo) ProtoMessage()

func (*ModelServerInfo) ProtoReflect ¶

func (x *ModelServerInfo) ProtoReflect() protoreflect.Message

func (*ModelServerInfo) Reset ¶

func (x *ModelServerInfo) Reset()

func (*ModelServerInfo) String ¶

func (x *ModelServerInfo) String() string

type PerformanceRange ¶

type PerformanceRange struct {

	// Output only. The range of throughput in output tokens per second. This is
	// measured as total_output_tokens_generated_by_server /
	// elapsed_time_in_seconds.
	ThroughputOutputRange *TokensPerSecondRange `` /* 126-byte string literal not displayed */
	// Output only. The range of TTFT (Time To First Token) in milliseconds. TTFT
	// is the time it takes to generate the first token for a request.
	TtftRange *MillisecondRange `protobuf:"bytes,2,opt,name=ttft_range,json=ttftRange,proto3" json:"ttft_range,omitempty"`
	// Output only. The range of NTPOT (Normalized Time Per Output Token) in
	// milliseconds. NTPOT is the request latency normalized by the number of
	// output tokens, measured as request_latency / total_output_tokens.
	NtpotRange *MillisecondRange `protobuf:"bytes,3,opt,name=ntpot_range,json=ntpotRange,proto3" json:"ntpot_range,omitempty"`
	// contains filtered or unexported fields
}

Performance range for a model deployment.

func (*PerformanceRange) Descriptor deprecated

func (*PerformanceRange) Descriptor() ([]byte, []int)

Deprecated: Use PerformanceRange.ProtoReflect.Descriptor instead.

func (*PerformanceRange) GetNtpotRange ¶

func (x *PerformanceRange) GetNtpotRange() *MillisecondRange

func (*PerformanceRange) GetThroughputOutputRange ¶

func (x *PerformanceRange) GetThroughputOutputRange() *TokensPerSecondRange

func (*PerformanceRange) GetTtftRange ¶

func (x *PerformanceRange) GetTtftRange() *MillisecondRange

func (*PerformanceRange) ProtoMessage ¶

func (*PerformanceRange) ProtoMessage()

func (*PerformanceRange) ProtoReflect ¶

func (x *PerformanceRange) ProtoReflect() protoreflect.Message

func (*PerformanceRange) Reset ¶

func (x *PerformanceRange) Reset()

func (*PerformanceRange) String ¶

func (x *PerformanceRange) String() string

type PerformanceRequirements ¶

type PerformanceRequirements struct {

	// Optional. The target Normalized Time Per Output Token (NTPOT) in
	// milliseconds. NTPOT is calculated as `request_latency /
	// total_output_tokens`. If not provided, this target will not be enforced.
	TargetNtpotMilliseconds *int32 `` /* 139-byte string literal not displayed */
	// Optional. The target Time To First Token (TTFT) in milliseconds. TTFT is
	// the time it takes to generate the first token for a request.  If not
	// provided, this target will not be enforced.
	TargetTtftMilliseconds *int32 `` /* 136-byte string literal not displayed */
	// Optional. The target cost for running a profile's model server. If not
	// provided, this requirement will not be enforced.
	TargetCost *Cost `protobuf:"bytes,3,opt,name=target_cost,json=targetCost,proto3" json:"target_cost,omitempty"`
	// contains filtered or unexported fields
}

Performance requirements for a profile and or model deployment.

func (*PerformanceRequirements) Descriptor deprecated

func (*PerformanceRequirements) Descriptor() ([]byte, []int)

Deprecated: Use PerformanceRequirements.ProtoReflect.Descriptor instead.

func (*PerformanceRequirements) GetTargetCost ¶

func (x *PerformanceRequirements) GetTargetCost() *Cost

func (*PerformanceRequirements) GetTargetNtpotMilliseconds ¶

func (x *PerformanceRequirements) GetTargetNtpotMilliseconds() int32

func (*PerformanceRequirements) GetTargetTtftMilliseconds ¶

func (x *PerformanceRequirements) GetTargetTtftMilliseconds() int32

func (*PerformanceRequirements) ProtoMessage ¶

func (*PerformanceRequirements) ProtoMessage()

func (*PerformanceRequirements) ProtoReflect ¶

func (x *PerformanceRequirements) ProtoReflect() protoreflect.Message

func (*PerformanceRequirements) Reset ¶

func (x *PerformanceRequirements) Reset()

func (*PerformanceRequirements) String ¶

func (x *PerformanceRequirements) String() string

type PerformanceStats ¶

type PerformanceStats struct {

	// Output only. The number of queries per second.
	// Note: This metric can vary widely based on context length and may not be a
	// reliable measure of LLM throughput.
	QueriesPerSecond float32 `protobuf:"fixed32,1,opt,name=queries_per_second,json=queriesPerSecond,proto3" json:"queries_per_second,omitempty"`
	// Output only. The number of output tokens per second. This is the throughput
	// measured as total_output_tokens_generated_by_server /
	// elapsed_time_in_seconds.
	OutputTokensPerSecond int32 `` /* 129-byte string literal not displayed */
	// Output only. The Normalized Time Per Output Token (NTPOT) in milliseconds.
	// This is the request latency normalized by the number of output tokens,
	// measured as request_latency / total_output_tokens.
	NtpotMilliseconds int32 `protobuf:"varint,3,opt,name=ntpot_milliseconds,json=ntpotMilliseconds,proto3" json:"ntpot_milliseconds,omitempty"`
	// Output only. The Time To First Token (TTFT) in milliseconds. This is the
	// time it takes to generate the first token for a request.
	TtftMilliseconds int32 `protobuf:"varint,4,opt,name=ttft_milliseconds,json=ttftMilliseconds,proto3" json:"ttft_milliseconds,omitempty"`
	// Output only. The cost of running the model deployment.
	Cost []*Cost `protobuf:"bytes,5,rep,name=cost,proto3" json:"cost,omitempty"`
	// contains filtered or unexported fields
}

Performance statistics for a model deployment.

func (*PerformanceStats) Descriptor deprecated

func (*PerformanceStats) Descriptor() ([]byte, []int)

Deprecated: Use PerformanceStats.ProtoReflect.Descriptor instead.

func (*PerformanceStats) GetCost ¶

func (x *PerformanceStats) GetCost() []*Cost

func (*PerformanceStats) GetNtpotMilliseconds ¶

func (x *PerformanceStats) GetNtpotMilliseconds() int32

func (*PerformanceStats) GetOutputTokensPerSecond ¶

func (x *PerformanceStats) GetOutputTokensPerSecond() int32

func (*PerformanceStats) GetQueriesPerSecond ¶

func (x *PerformanceStats) GetQueriesPerSecond() float32

func (*PerformanceStats) GetTtftMilliseconds ¶

func (x *PerformanceStats) GetTtftMilliseconds() int32

func (*PerformanceStats) ProtoMessage ¶

func (*PerformanceStats) ProtoMessage()

func (*PerformanceStats) ProtoReflect ¶

func (x *PerformanceStats) ProtoReflect() protoreflect.Message

func (*PerformanceStats) Reset ¶

func (x *PerformanceStats) Reset()

func (*PerformanceStats) String ¶

func (x *PerformanceStats) String() string

type Profile ¶

type Profile struct {

	// Output only. The model server configuration. Use
	// [GkeInferenceQuickstart.FetchProfiles][google.cloud.gkerecommender.v1.GkeInferenceQuickstart.FetchProfiles]
	// to find valid configurations.
	ModelServerInfo *ModelServerInfo `protobuf:"bytes,1,opt,name=model_server_info,json=modelServerInfo,proto3" json:"model_server_info,omitempty"`
	// Output only. The accelerator type. Expected format: `nvidia-h100-80gb`.
	AcceleratorType string `protobuf:"bytes,2,opt,name=accelerator_type,json=acceleratorType,proto3" json:"accelerator_type,omitempty"`
	// Output only. The TPU topology (if applicable).
	TpuTopology string `protobuf:"bytes,3,opt,name=tpu_topology,json=tpuTopology,proto3" json:"tpu_topology,omitempty"`
	// Output only. The instance type. Expected format: `a2-highgpu-1g`.
	InstanceType string `protobuf:"bytes,4,opt,name=instance_type,json=instanceType,proto3" json:"instance_type,omitempty"`
	// Output only. The resources used by the model deployment.
	ResourcesUsed *ResourcesUsed `protobuf:"bytes,5,opt,name=resources_used,json=resourcesUsed,proto3" json:"resources_used,omitempty"`
	// Output only. The performance statistics for this profile.
	PerformanceStats []*PerformanceStats `protobuf:"bytes,6,rep,name=performance_stats,json=performanceStats,proto3" json:"performance_stats,omitempty"`
	// contains filtered or unexported fields
}

A profile containing information about a model deployment.

func (*Profile) Descriptor deprecated

func (*Profile) Descriptor() ([]byte, []int)

Deprecated: Use Profile.ProtoReflect.Descriptor instead.

func (*Profile) GetAcceleratorType ¶

func (x *Profile) GetAcceleratorType() string

func (*Profile) GetInstanceType ¶

func (x *Profile) GetInstanceType() string

func (*Profile) GetModelServerInfo ¶

func (x *Profile) GetModelServerInfo() *ModelServerInfo

func (*Profile) GetPerformanceStats ¶

func (x *Profile) GetPerformanceStats() []*PerformanceStats

func (*Profile) GetResourcesUsed ¶

func (x *Profile) GetResourcesUsed() *ResourcesUsed

func (*Profile) GetTpuTopology ¶

func (x *Profile) GetTpuTopology() string

func (*Profile) ProtoMessage ¶

func (*Profile) ProtoMessage()

func (*Profile) ProtoReflect ¶

func (x *Profile) ProtoReflect() protoreflect.Message

func (*Profile) Reset ¶

func (x *Profile) Reset()

func (*Profile) String ¶

func (x *Profile) String() string

type ResourcesUsed ¶

type ResourcesUsed struct {

	// Output only. The number of accelerators (e.g., GPUs or TPUs) used by the
	// model deployment on the Kubernetes node.
	AcceleratorCount int32 `protobuf:"varint,1,opt,name=accelerator_count,json=acceleratorCount,proto3" json:"accelerator_count,omitempty"`
	// contains filtered or unexported fields
}

Resources used by a model deployment.

func (*ResourcesUsed) Descriptor deprecated

func (*ResourcesUsed) Descriptor() ([]byte, []int)

Deprecated: Use ResourcesUsed.ProtoReflect.Descriptor instead.

func (*ResourcesUsed) GetAcceleratorCount ¶

func (x *ResourcesUsed) GetAcceleratorCount() int32

func (*ResourcesUsed) ProtoMessage ¶

func (*ResourcesUsed) ProtoMessage()

func (*ResourcesUsed) ProtoReflect ¶

func (x *ResourcesUsed) ProtoReflect() protoreflect.Message

func (*ResourcesUsed) Reset ¶

func (x *ResourcesUsed) Reset()

func (*ResourcesUsed) String ¶

func (x *ResourcesUsed) String() string

type StorageConfig ¶

type StorageConfig struct {

	// Optional. The Google Cloud Storage bucket URI to load the model from. This
	// URI must point to the directory containing the model's config file
	// (`config.json`) and model weights. A tuned GCSFuse setup can improve
	// LLM Pod startup time by more than 7x. Expected format:
	// `gs://<bucket-name>/<path-to-model>`.
	ModelBucketUri string `protobuf:"bytes,1,opt,name=model_bucket_uri,json=modelBucketUri,proto3" json:"model_bucket_uri,omitempty"`
	// Optional. The URI for the GCS bucket containing the XLA compilation cache.
	// If using TPUs, the XLA cache will be written to the same path as
	// `model_bucket_uri`. This can speed up vLLM model preparation for repeated
	// deployments.
	XlaCacheBucketUri string `protobuf:"bytes,2,opt,name=xla_cache_bucket_uri,json=xlaCacheBucketUri,proto3" json:"xla_cache_bucket_uri,omitempty"`
	// contains filtered or unexported fields
}

Storage configuration for a model deployment.

func (*StorageConfig) Descriptor deprecated

func (*StorageConfig) Descriptor() ([]byte, []int)

Deprecated: Use StorageConfig.ProtoReflect.Descriptor instead.

func (*StorageConfig) GetModelBucketUri ¶

func (x *StorageConfig) GetModelBucketUri() string

func (*StorageConfig) GetXlaCacheBucketUri ¶

func (x *StorageConfig) GetXlaCacheBucketUri() string

func (*StorageConfig) ProtoMessage ¶

func (*StorageConfig) ProtoMessage()

func (*StorageConfig) ProtoReflect ¶

func (x *StorageConfig) ProtoReflect() protoreflect.Message

func (*StorageConfig) Reset ¶

func (x *StorageConfig) Reset()

func (*StorageConfig) String ¶

func (x *StorageConfig) String() string

type TokensPerSecondRange ¶

type TokensPerSecondRange struct {

	// Output only. The minimum value of the range.
	Min int32 `protobuf:"varint,1,opt,name=min,proto3" json:"min,omitempty"`
	// Output only. The maximum value of the range.
	Max int32 `protobuf:"varint,2,opt,name=max,proto3" json:"max,omitempty"`
	// contains filtered or unexported fields
}

Represents a range of throughput values in tokens per second.

func (*TokensPerSecondRange) Descriptor deprecated

func (*TokensPerSecondRange) Descriptor() ([]byte, []int)

Deprecated: Use TokensPerSecondRange.ProtoReflect.Descriptor instead.

func (*TokensPerSecondRange) GetMax ¶

func (x *TokensPerSecondRange) GetMax() int32

func (*TokensPerSecondRange) GetMin ¶

func (x *TokensPerSecondRange) GetMin() int32

func (*TokensPerSecondRange) ProtoMessage ¶

func (*TokensPerSecondRange) ProtoMessage()

func (*TokensPerSecondRange) ProtoReflect ¶

func (x *TokensPerSecondRange) ProtoReflect() protoreflect.Message

func (*TokensPerSecondRange) Reset ¶

func (x *TokensPerSecondRange) Reset()

func (*TokensPerSecondRange) String ¶

func (x *TokensPerSecondRange) String() string

type UnimplementedGkeInferenceQuickstartServer ¶

type UnimplementedGkeInferenceQuickstartServer struct {
}

UnimplementedGkeInferenceQuickstartServer should be embedded to have forward compatible implementations.

func (UnimplementedGkeInferenceQuickstartServer) FetchBenchmarkingData ¶

func (UnimplementedGkeInferenceQuickstartServer) FetchBenchmarkingData(context.Context, *FetchBenchmarkingDataRequest) (*FetchBenchmarkingDataResponse, error)

func (UnimplementedGkeInferenceQuickstartServer) FetchModelServerVersions ¶

func (UnimplementedGkeInferenceQuickstartServer) FetchModelServerVersions(context.Context, *FetchModelServerVersionsRequest) (*FetchModelServerVersionsResponse, error)

func (UnimplementedGkeInferenceQuickstartServer) FetchModelServers ¶

func (UnimplementedGkeInferenceQuickstartServer) FetchModelServers(context.Context, *FetchModelServersRequest) (*FetchModelServersResponse, error)

func (UnimplementedGkeInferenceQuickstartServer) FetchModels ¶

func (UnimplementedGkeInferenceQuickstartServer) FetchModels(context.Context, *FetchModelsRequest) (*FetchModelsResponse, error)

func (UnimplementedGkeInferenceQuickstartServer) FetchProfiles ¶

func (UnimplementedGkeInferenceQuickstartServer) FetchProfiles(context.Context, *FetchProfilesRequest) (*FetchProfilesResponse, error)

func (UnimplementedGkeInferenceQuickstartServer) GenerateOptimizedManifest ¶

func (UnimplementedGkeInferenceQuickstartServer) GenerateOptimizedManifest(context.Context, *GenerateOptimizedManifestRequest) (*GenerateOptimizedManifestResponse, error)

type UnsafeGkeInferenceQuickstartServer ¶

type UnsafeGkeInferenceQuickstartServer interface {
	// contains filtered or unexported methods
}

UnsafeGkeInferenceQuickstartServer may be embedded to opt out of forward compatibility for this service. Use of this interface is not recommended, as added methods to GkeInferenceQuickstartServer will result in compilation errors.

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL