xla

package
v0.5.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 19, 2024 License: Apache-2.0 Imports: 7 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	DebugOptions_ShapeChecks_name = map[int32]string{
		0: "IGNORE",
		1: "RUNTIME",
		2: "COMPILE_TIME",
	}
	DebugOptions_ShapeChecks_value = map[string]int32{
		"IGNORE":       0,
		"RUNTIME":      1,
		"COMPILE_TIME": 2,
	}
)

Enum value maps for DebugOptions_ShapeChecks.

View Source
var (
	DebugOptions_StepMarkerLocation_name = map[int32]string{
		0: "STEP_MARK_AT_ENTRY",
		1: "STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP",
		3: "STEP_MARK_AT_SECOND_LEVEL_WHILE_LOOP",
		2: "STEP_MARK_NONE",
	}
	DebugOptions_StepMarkerLocation_value = map[string]int32{
		"STEP_MARK_AT_ENTRY":                   0,
		"STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP":    1,
		"STEP_MARK_AT_SECOND_LEVEL_WHILE_LOOP": 3,
		"STEP_MARK_NONE":                       2,
	}
)

Enum value maps for DebugOptions_StepMarkerLocation.

View Source
var (
	DebugOptions_CollectiveOpType_name = map[int32]string{
		0: "NOOP",
		1: "ALLREDUCE",
		2: "ALLGATHER",
		3: "REDUCESCATTER",
		4: "COLLECTIVEBROADCAST",
		5: "ALLTOALL",
		6: "COLLECTIVEPERMUTE",
	}
	DebugOptions_CollectiveOpType_value = map[string]int32{
		"NOOP":                0,
		"ALLREDUCE":           1,
		"ALLGATHER":           2,
		"REDUCESCATTER":       3,
		"COLLECTIVEBROADCAST": 4,
		"ALLTOALL":            5,
		"COLLECTIVEPERMUTE":   6,
	}
)

Enum value maps for DebugOptions_CollectiveOpType.

View Source
var (
	DebugOptions_CommandBufferCmdType_name = map[int32]string{
		0: "INVALID",
		1: "FUSION",
		2: "CUBLAS",
		3: "CUDNN",
		4: "COLLECTIVES",
		5: "CONDITIONALS",
		6: "CUSTOM_CALL",
		7: "CUBLASLT",
		8: "DYNAMIC_SLICE",
	}
	DebugOptions_CommandBufferCmdType_value = map[string]int32{
		"INVALID":       0,
		"FUSION":        1,
		"CUBLAS":        2,
		"CUDNN":         3,
		"COLLECTIVES":   4,
		"CONDITIONALS":  5,
		"CUSTOM_CALL":   6,
		"CUBLASLT":      7,
		"DYNAMIC_SLICE": 8,
	}
)

Enum value maps for DebugOptions_CommandBufferCmdType.

View Source
var (
	DebugOptions_PartitioningAlgorithm_name = map[int32]string{
		0: "PARTITIONING_ALGORITHM_NOOP",
		1: "PARTITIONING_ALGORITHM_EXP0",
		2: "PARTITIONING_ALGORITHM_EXP1",
		3: "PARTITIONING_ALGORITHM_EXP2",
	}
	DebugOptions_PartitioningAlgorithm_value = map[string]int32{
		"PARTITIONING_ALGORITHM_NOOP": 0,
		"PARTITIONING_ALGORITHM_EXP0": 1,
		"PARTITIONING_ALGORITHM_EXP1": 2,
		"PARTITIONING_ALGORITHM_EXP2": 3,
	}
)

Enum value maps for DebugOptions_PartitioningAlgorithm.

View Source
var (
	DebugOptions_WhileLoopUnrolling_name = map[int32]string{
		0: "WHILE_LOOP_UNROLLING_NO_UNROLL",
		1: "WHILE_LOOP_UNROLLING_DOUBLE_BUFFER",
		2: "WHILE_LOOP_UNROLLING_FULL_UNROLL",
		3: "WHILE_LOOP_UNROLLING_AUTO_UNROLL",
	}
	DebugOptions_WhileLoopUnrolling_value = map[string]int32{
		"WHILE_LOOP_UNROLLING_NO_UNROLL":     0,
		"WHILE_LOOP_UNROLLING_DOUBLE_BUFFER": 1,
		"WHILE_LOOP_UNROLLING_FULL_UNROLL":   2,
		"WHILE_LOOP_UNROLLING_AUTO_UNROLL":   3,
	}
)

Enum value maps for DebugOptions_WhileLoopUnrolling.

View Source
var (
	DebugOptions_LibNvJitLinkMode_name = map[int32]string{
		0: "LIB_NV_JIT_LINK_MODE_AUTO",
		1: "LIB_NV_JIT_LINK_MODE_DISABLED",
		2: "LIB_NV_JIT_LINK_MODE_ENABLED",
	}
	DebugOptions_LibNvJitLinkMode_value = map[string]int32{
		"LIB_NV_JIT_LINK_MODE_AUTO":     0,
		"LIB_NV_JIT_LINK_MODE_DISABLED": 1,
		"LIB_NV_JIT_LINK_MODE_ENABLED":  2,
	}
)

Enum value maps for DebugOptions_LibNvJitLinkMode.

View Source
var (
	DebugOptions_AutotuneCacheMode_name = map[int32]string{
		0: "AUTOTUNE_CACHE_MODE_UNSPECIFIED",
		1: "AUTOTUNE_CACHE_MODE_UPDATE",
		2: "AUTOTUNE_CACHE_MODE_READ",
	}
	DebugOptions_AutotuneCacheMode_value = map[string]int32{
		"AUTOTUNE_CACHE_MODE_UNSPECIFIED": 0,
		"AUTOTUNE_CACHE_MODE_UPDATE":      1,
		"AUTOTUNE_CACHE_MODE_READ":        2,
	}
)

Enum value maps for DebugOptions_AutotuneCacheMode.

View Source
var (
	DebugOptions_PGLEStrictnessLevel_name = map[int32]string{
		0: "PGLE_STRICTNESS_LEVEL_OFF",
		1: "PGLE_STRICTNESS_LEVEL_WARN",
		2: "PGLE_STRICTNESS_LEVEL_ERROR",
	}
	DebugOptions_PGLEStrictnessLevel_value = map[string]int32{
		"PGLE_STRICTNESS_LEVEL_OFF":   0,
		"PGLE_STRICTNESS_LEVEL_WARN":  1,
		"PGLE_STRICTNESS_LEVEL_ERROR": 2,
	}
)

Enum value maps for DebugOptions_PGLEStrictnessLevel.

View Source
var (
	HloModuleConfigProto_FusionConfigCollection_name = map[int32]string{
		0: "OFF",
		1: "PER_EDGE",
		2: "PER_NODE",
	}
	HloModuleConfigProto_FusionConfigCollection_value = map[string]int32{
		"OFF":      0,
		"PER_EDGE": 1,
		"PER_NODE": 2,
	}
)

Enum value maps for HloModuleConfigProto_FusionConfigCollection.

View Source
var File_xla_xla_proto protoreflect.FileDescriptor

Functions

This section is empty.

Types

type CompilationEnvironmentsProto

type CompilationEnvironmentsProto struct {
	Environments []*anypb.Any `protobuf:"bytes,1,rep,name=environments,proto3" json:"environments,omitempty"`
	// contains filtered or unexported fields
}

Proto version of `xla::CompilationEnvironments`.

func (*CompilationEnvironmentsProto) Descriptor deprecated

func (*CompilationEnvironmentsProto) Descriptor() ([]byte, []int)

Deprecated: Use CompilationEnvironmentsProto.ProtoReflect.Descriptor instead.

func (*CompilationEnvironmentsProto) GetEnvironments

func (x *CompilationEnvironmentsProto) GetEnvironments() []*anypb.Any

func (*CompilationEnvironmentsProto) ProtoMessage

func (*CompilationEnvironmentsProto) ProtoMessage()

func (*CompilationEnvironmentsProto) ProtoReflect

func (*CompilationEnvironmentsProto) Reset

func (x *CompilationEnvironmentsProto) Reset()

func (*CompilationEnvironmentsProto) String

type DebugOptions

type DebugOptions struct {

	// The execution time optimization effort to expend during compilation.
	// See `exec_time_optimization_effort` for accepted ranges. This flag will
	// override any changes set in `ExecutionOptions`. Most likely this is just a
	// temporary measure before https://github.com/jax-ml/jax/issues/24715 is in.
	//
	// TODO(b/377871215): Check whether we still need this.
	XlaExperimentalExecTimeOptimizationEffort float32 `` /* 196-byte string literal not displayed */
	// Use region analysis in copy insertion pass.
	XlaCpuCopyInsertionUseRegionAnalysis bool `` /* 182-byte string literal not displayed */
	// When true, XLA:CPU uses HLO module scheduler that is optimized for
	// extracting concurrency at the cost of extra memory: we extend the live
	// ranges of temporaries to allow XLA runtime to schedule independent
	// operations in parallel on separate threads.
	XlaCpuEnableConcurrencyOptimizedScheduler bool `` /* 195-byte string literal not displayed */
	// When true, "unsafe" mathematical optimizations are enabled. These
	// transformations include but are not limited to:
	//
	//   - Reducing the precision of operations (e.g. using an approximate sin
	//     function, or transforming x/y into x * (1/y)).
	//   - Assuming that operations never produce or consume NaN or +/- Inf (this
	//     behavior can be adjusted using xla_cpu_fast_math_allow_{nans|infs}).
	//   - Assuming that +0 and -0 are indistinguishable.
	XlaCpuEnableFastMath bool `` /* 129-byte string literal not displayed */
	// When false we lower the Minimum and Maximum hlos in the CPU backend such
	// that Min(NotNaN, NaN) = Min(NaN, NotNaN) = NaN.  In other words, if flag
	// this is false we always propagate NaNs through Min and Max.
	//
	// Note, this does not correspond to the exact same behavior as the gpu flag
	// below!
	XlaCpuEnableFastMinMax bool `` /* 138-byte string literal not displayed */
	// When xla_cpu_enable_fast_math is true then this controls whether we forbid
	// to use the reciprocal of an argument instead of division. Ignored when
	// xla_cpu_enable_fast_math is false.
	XlaCpuFastMathHonorDivision bool `` /* 153-byte string literal not displayed */
	// When xla_cpu_enable_fast_math is true then this controls whether we forbid
	// to approximate calculations for functions. Ignored when
	// xla_cpu_enable_fast_math is false.
	XlaCpuFastMathHonorFunctions bool `` /* 156-byte string literal not displayed */
	// When xla_cpu_enable_fast_math is true then this controls whether we allow
	// operations to produce infinites. Ignored when xla_cpu_enable_fast_math is
	// false.
	XlaCpuFastMathHonorInfs bool `` /* 141-byte string literal not displayed */
	// When xla_cpu_enable_fast_math is true then this controls whether we allow
	// operations to produce NaNs.  Ignored when xla_cpu_enable_fast_math is
	// false.
	XlaCpuFastMathHonorNans bool `` /* 141-byte string literal not displayed */
	// When true, XLA:CPU uses the thunk runtime to execute compiled program.
	XlaCpuUseThunkRuntime bool `` /* 133-byte string literal not displayed */
	// Enabling this will enable optimizations that ignore the possibility of NaN.
	XlaEnableFastMath bool `protobuf:"varint,335,opt,name=xla_enable_fast_math,json=xlaEnableFastMath,proto3" json:"xla_enable_fast_math,omitempty"`
	// The number of parts to split the LLVM module into before codegen. This
	// allows XLA to compile all parts in parallel, and resolve kernel symbols
	// from different dynamic libraries.
	XlaCpuParallelCodegenSplitCount int32 `` /* 165-byte string literal not displayed */
	// A `prefer-vector-width` value that is passed to the LLVM backend. Default
	// value is `256` (AVX2 on x86 platforms).
	XlaCpuPreferVectorWidth int32 `` /* 139-byte string literal not displayed */
	// When set, XLA:CPU will only generate code up to the specified ISA.
	// (It will not use newer ISAs.) Using the string format allows us to extend
	// the flag for more flexible control if necessary.
	XlaCpuMaxIsa string `protobuf:"bytes,333,opt,name=xla_cpu_max_isa,json=xlaCpuMaxIsa,proto3" json:"xla_cpu_max_isa,omitempty"`
	// Specifies the behavior of per kernel autotuning cache.
	XlaGpuExperimentalAutotuneCacheMode DebugOptions_AutotuneCacheMode `` /* 217-byte string literal not displayed */
	// Experimentally disables binary libraries in GPU compiler passes.
	XlaGpuExperimentalDisableBinaryLibraries bool `` /* 192-byte string literal not displayed */
	// Dump FDO profiles in a binary format to a separate file.
	XlaGpuExperimentalDumpFdoProfiles bool `` /* 171-byte string literal not displayed */
	// Enabling this flag will attempt to redirect every already-constructed
	// fusion possible to the Triton emitter.
	//
	// For example, a fusion with kind kLoop will be transformed to a fusion with
	// kind kCustom (and underlying kTritonFusionKind) if it can be tiled
	// correctly, and if all the instructions it contains are supported by XLA's
	// Triton emitter. Tile sizes are assigned automatically.
	//
	// Pre-existing block-level fusions are left unmodified.
	XlaGpuExperimentalEnableFusionBlockLevelRewriter bool `` /* 220-byte string literal not displayed */
	// When enabled, the PriorityFusion pass will try to make Triton fusions first
	// and foremost where it is possible.
	//
	// A kCustom fusion with underlying kTritonFusionKind will be created if it
	// can be tiled correctly, and if all the instructions it contains are
	// supported by XLA's Triton emitter. Tile sizes are assigned automatically.
	XlaGpuExperimentalEnableTritonHerolessPriorityFusion bool `` /* 232-byte string literal not displayed */
	// Gates the experimental feature coupling the Triton Softmax pattern matcher
	// with priority fusion.
	XlaGpuExperimentalEnableTritonSoftmaxPriorityFusion bool `` /* 229-byte string literal not displayed */
	// Internal debug/testing flag to switch Triton GEMM fusions on or off.
	XlaGpuUnsupportedEnableTritonGemm bool `` /* 171-byte string literal not displayed */
	// Show addresses of HLO ops in graph dump.
	XlaHloGraphAddresses bool `` /* 126-byte string literal not displayed */
	// Instrument the computation to collect per-HLO cycle counts.
	XlaHloProfile bool `protobuf:"varint,9,opt,name=xla_hlo_profile,json=xlaHloProfile,proto3" json:"xla_hlo_profile,omitempty"`
	// List of HLO passes to disable/enable. These names must exactly match the
	// pass names as specified by the HloPassInterface::name() method.
	//
	// At least one of xla_disable_hlo_passes and xla_enable_hlo_passes_only must
	// be empty.
	XlaDisableHloPasses    []string `protobuf:"bytes,30,rep,name=xla_disable_hlo_passes,json=xlaDisableHloPasses,proto3" json:"xla_disable_hlo_passes,omitempty"`
	XlaEnableHloPassesOnly []string `` /* 135-byte string literal not displayed */
	// Disables all HLO passes.  Notes that some passes are necessary for
	// correctness and the invariants that must be satisfied by "fully optimized"
	// HLO are different for different devices and may change over time.  The only
	// "guarantee", such as it is, is that if you compile XLA and dump the
	// optimized HLO for some graph, you should be able to run it again on the
	// same device with the same build of XLA.
	XlaDisableAllHloPasses bool `` /* 136-byte string literal not displayed */
	// Numerical optimization level for the XLA compiler backend; the specific
	// interpretation of this value is left to the backends.
	XlaBackendOptimizationLevel int32 `` /* 148-byte string literal not displayed */
	// Embed the compiler IR as a string in the executable.
	XlaEmbedIrInExecutable bool `` /* 135-byte string literal not displayed */
	// Eliminate implicit broadcasts when lowering user computations to HLO
	// instructions; use explicit broadcast instead.
	XlaEliminateHloImplicitBroadcast bool `` /* 165-byte string literal not displayed */
	// When generating calls to Eigen in the CPU backend, use multi-threaded Eigen
	// mode.
	XlaCpuMultiThreadEigen bool `` /* 135-byte string literal not displayed */
	// Path to directory with cuda/ptx tools and libraries.
	XlaGpuCudaDataDir string `protobuf:"bytes,61,opt,name=xla_gpu_cuda_data_dir,json=xlaGpuCudaDataDir,proto3" json:"xla_gpu_cuda_data_dir,omitempty"`
	// Enable flush-to-zero semantics in the GPU backend.
	XlaGpuFtz bool `protobuf:"varint,62,opt,name=xla_gpu_ftz,json=xlaGpuFtz,proto3" json:"xla_gpu_ftz,omitempty"`
	// If true, in LLVM-based backends, emit !alias.scope metadata in
	// generated IR.
	XlaLlvmEnableAliasScopeMetadata bool `` /* 164-byte string literal not displayed */
	// If true, in LLVM-based backends, emit !noalias metadata in the
	// generated IR.
	XlaLlvmEnableNoaliasMetadata bool `` /* 153-byte string literal not displayed */
	// If true, in LLVM-based backends, emit !invariant.load metadata in
	// the generated IR.
	XlaLlvmEnableInvariantLoadMetadata bool `` /* 173-byte string literal not displayed */
	// If true, a set of expensive LLVM optimization passes will not be run.
	XlaLlvmDisableExpensivePasses bool `` /* 156-byte string literal not displayed */
	// This is used by ClientLibraryTestBase::ComputeAndCompare*. If true, the
	// computation will run n! times with all permunations of layouts for the
	// output shape in rank n. For example, with a 3D shape, all permutations of
	// the set {0, 1, 2} are tried.
	XlaTestAllOutputLayouts bool `` /* 138-byte string literal not displayed */
	// This is used by ClientLibraryTestBase::ComputeAndCompare*. If true, the
	// computation will run for all permunations of layouts of all input
	// arguments. For example, with 2 input arguments in 2D and 4D shapes, the
	// computation will run 2! * 4! times.
	XlaTestAllInputLayouts bool `` /* 135-byte string literal not displayed */
	// Assign colors based on sharding information when generating the Graphviz
	// HLO graph.
	XlaHloGraphShardingColor bool `` /* 141-byte string literal not displayed */
	// Generate calls to MKL-DNN in the CPU backend.
	XlaCpuUseMklDnn bool `protobuf:"varint,97,opt,name=xla_cpu_use_mkl_dnn,json=xlaCpuUseMklDnn,proto3" json:"xla_cpu_use_mkl_dnn,omitempty"`
	// When true we lower the Minimum and Maximum hlos in the GPU backend such
	// that Min(NotNaN, NaN) = Min(NaN, NotNaN) = NotNaN.  In other words, if flag
	// this is true we don't propagate NaNs through Min and Max.
	//
	// Note, this does not correspond to the exact same behavior as the cpu flag
	// above!
	XlaGpuEnableFastMinMax bool `` /* 138-byte string literal not displayed */
	// Allows xla to increase the output precision of floating point operations
	// and all floating-point conversions to be simplified, including those
	// that affect the numerics. The `FloatNormalization` pass inserts many
	// `f32 -> bf16 -> f32` conversion pairs. These are not removed by the
	// `AlgebraicSimplifier`, as that will only simplify conversions that are
	// no-ops, e.g. `bf16 -> f32 -> bf16`. Removing these improves accuracy.
	XlaAllowExcessPrecision bool `` /* 137-byte string literal not displayed */
	// Crashes the program when any kind of verification fails, instead of just
	// logging the failures. One example is cross checking of convolution results
	// among different algorithms.
	XlaGpuCrashOnVerificationFailures bool `` /* 171-byte string literal not displayed */
	// 0:   Disable gemm and convolution autotuning.
	// 1:   Enable autotuning, but disable correctness checking.
	// 2:   Also set output buffers to random numbers during autotuning.
	// 3:   Also reset output buffers to random numbers after autotuning each
	//
	//	algorithm.
	//
	// 4+:  Also check for correct outputs and for out-of-bounds reads/writes.
	//
	// Default: 4.
	XlaGpuAutotuneLevel int32 `protobuf:"varint,123,opt,name=xla_gpu_autotune_level,json=xlaGpuAutotuneLevel,proto3" json:"xla_gpu_autotune_level,omitempty"`
	// Force the host platform to pretend that there are these many host
	// "devices".  All these devices are backed by the same threadpool.  Defaults
	// to 1.
	//
	// Setting this to anything other than 1 can increase overhead from context
	// switching but we let the user override this behavior to help run tests on
	// the host that run models in parallel across multiple devices.
	XlaForceHostPlatformDeviceCount int32 `` /* 165-byte string literal not displayed */
	// If set to true XLA:GPU invokes `ptxas` with -O0 (default is -O3).
	XlaGpuDisableGpuasmOptimizations bool                     `` /* 166-byte string literal not displayed */
	XlaGpuShapeChecks                DebugOptions_ShapeChecks `` /* 153-byte string literal not displayed */
	// Enable fast math with eigen in the HLO evaluator.
	XlaHloEvaluatorUseFastPath bool `` /* 150-byte string literal not displayed */
	// Temporary option to allow support for both the R1 and the scalar index
	// versions of DynamicSlice and DynamicUpdateSlice. Only used for testing.
	XlaAllowScalarIndexDynamicOps bool `` /* 159-byte string literal not displayed */
	// Option to emit a target-specific marker to indicate the start of a training
	// step. The location of the marker (if any) is determined by the option
	// value.
	XlaStepMarkerLocation DebugOptions_StepMarkerLocation `` /* 172-byte string literal not displayed */
	// Directory to dump into.
	XlaDumpTo string `protobuf:"bytes,109,opt,name=xla_dump_to,json=xlaDumpTo,proto3" json:"xla_dump_to,omitempty"`
	// If specified, will only dump modules which match this regexp.
	XlaDumpHloModuleRe string `protobuf:"bytes,110,opt,name=xla_dump_hlo_module_re,json=xlaDumpHloModuleRe,proto3" json:"xla_dump_hlo_module_re,omitempty"`
	// If this flag is specified, will also dump HLO before and after passes that
	// match this regular expression.  Set to .* to dump before/after all passes.
	XlaDumpHloPassRe string `protobuf:"bytes,111,opt,name=xla_dump_hlo_pass_re,json=xlaDumpHloPassRe,proto3" json:"xla_dump_hlo_pass_re,omitempty"`
	// Specifies the format that HLO is dumped in.  Multiple of these may be
	// specified.
	XlaDumpHloAsText  bool `protobuf:"varint,112,opt,name=xla_dump_hlo_as_text,json=xlaDumpHloAsText,proto3" json:"xla_dump_hlo_as_text,omitempty"`
	XlaDumpHloAsProto bool `protobuf:"varint,113,opt,name=xla_dump_hlo_as_proto,json=xlaDumpHloAsProto,proto3" json:"xla_dump_hlo_as_proto,omitempty"`
	XlaDumpHloAsDot   bool `protobuf:"varint,114,opt,name=xla_dump_hlo_as_dot,json=xlaDumpHloAsDot,proto3" json:"xla_dump_hlo_as_dot,omitempty"`
	XlaDumpHloAsUrl   bool `protobuf:"varint,115,opt,name=xla_dump_hlo_as_url,json=xlaDumpHloAsUrl,proto3" json:"xla_dump_hlo_as_url,omitempty"`
	// Dump HLO graphs as an HTML (DOT -> SVG inlined in HTML)
	XlaDumpHloAsHtml bool `protobuf:"varint,116,opt,name=xla_dump_hlo_as_html,json=xlaDumpHloAsHtml,proto3" json:"xla_dump_hlo_as_html,omitempty"`
	// Dump the visualization of the fusion progress.
	XlaDumpFusionVisualization bool `` /* 146-byte string literal not displayed */
	// If true, every time an HLO module is run, we will dump an HloSnapshot
	// (essentially, a serialized module plus its inputs) to the --xla_dump_to
	// directory.
	XlaDumpHloSnapshots bool `protobuf:"varint,118,opt,name=xla_dump_hlo_snapshots,json=xlaDumpHloSnapshots,proto3" json:"xla_dump_hlo_snapshots,omitempty"`
	// Include a timestamp in the dumped filenames.
	XlaDumpIncludeTimestamp bool `` /* 137-byte string literal not displayed */
	// Max number of hlo module dumps in a directory. Set to < 0 for unbounded.
	XlaDumpMaxHloModules int32 `` /* 130-byte string literal not displayed */
	// Dump HloModuleMetadata as a text proto for each HLO module.
	XlaDumpModuleMetadata bool `` /* 131-byte string literal not displayed */
	// GZip-compress protos dumped via --xla_dump_hlo_as_proto.
	XlaDumpCompressProtos bool `` /* 131-byte string literal not displayed */
	// Dump HLO in long text format. Ignored unless xla_dump_hlo_as_text is true.
	XlaDumpHloAsLongText bool `` /* 132-byte string literal not displayed */
	// Overrides for XLA GPU's convolution layout heuristic.
	XlaGpuForceConvNchw bool `` /* 127-byte string literal not displayed */
	XlaGpuForceConvNhwc bool `` /* 127-byte string literal not displayed */
	// Paths to files with ptx code.
	XlaGpuPtxFile []string `protobuf:"bytes,127,rep,name=xla_gpu_ptx_file,json=xlaGpuPtxFile,proto3" json:"xla_gpu_ptx_file,omitempty"`
	// Whether to dump llvm ir when compiling to ptx.
	XlaGpuDumpLlvmir bool `protobuf:"varint,155,opt,name=xla_gpu_dump_llvmir,json=xlaGpuDumpLlvmir,proto3" json:"xla_gpu_dump_llvmir,omitempty"`
	// Whether to dump mlir using pretty print form.
	XlaDumpEnableMlirPrettyForm bool `` /* 153-byte string literal not displayed */
	// Denylist for cuDNN convolutions.
	XlaGpuAlgorithmDenylistPath string `` /* 150-byte string literal not displayed */
	// Debug options that trigger execution errors when NaN or Inf are detected.
	XlaTpuDetectNan bool `protobuf:"varint,135,opt,name=xla_tpu_detect_nan,json=xlaTpuDetectNan,proto3" json:"xla_tpu_detect_nan,omitempty"`
	XlaTpuDetectInf bool `protobuf:"varint,136,opt,name=xla_tpu_detect_inf,json=xlaTpuDetectInf,proto3" json:"xla_tpu_detect_inf,omitempty"`
	// True if TraceMe annotations are enabled for XLA:CPU.
	XlaCpuEnableXprofTraceme bool `` /* 142-byte string literal not displayed */
	// It is usually preferable to not fallback to the driver; it can consume more
	// memory, or have bugs.
	XlaGpuUnsafeFallbackToDriverOnPtxasNotFound bool `` /* 209-byte string literal not displayed */
	// Extra parameters to pass the GPU assembler.
	XlaGpuAsmExtraFlags string `` /* 126-byte string literal not displayed */
	// Per-heap size constraint. New heaps will be created if per-heap max size is
	// reached.
	XlaMultiheapSizeConstraintPerHeap int32 `` /* 171-byte string literal not displayed */
	// Enable detailed logging into vlog. If this is disabled, no
	// compilation summary will be printed in the end of computation.
	XlaDetailedLogging bool `protobuf:"varint,252,opt,name=xla_detailed_logging,json=xlaDetailedLogging,proto3" json:"xla_detailed_logging,omitempty"`
	// Enable HLO dumping. If this is disabled, no HLO modules will be dumped.
	XlaEnableDumping bool `protobuf:"varint,253,opt,name=xla_enable_dumping,json=xlaEnableDumping,proto3" json:"xla_enable_dumping,omitempty"`
	// Overrides normal multi-threaded compilation setting to use this many
	// threads. Setting to 0 (the default value) means no enforcement.
	XlaGpuForceCompilationParallelism            int32 `` /* 169-byte string literal not displayed */
	XlaGpuEnableLlvmModuleCompilationParallelism bool  `` /* 206-byte string literal not displayed */
	// Guarantees run-to-run determinism.
	// This flag implies --xla_gpu_exclude_nondeterministic_ops and in addition
	// disables autotuning.
	XlaGpuDeterministicOps bool `` /* 134-byte string literal not displayed */
	// Paths to files with LLVM code.
	XlaGpuLlvmIrFile              []string                        `protobuf:"bytes,150,rep,name=xla_gpu_llvm_ir_file,json=xlaGpuLlvmIrFile,proto3" json:"xla_gpu_llvm_ir_file,omitempty"`
	XlaGpuDisableAsyncCollectives []DebugOptions_CollectiveOpType `` /* 203-byte string literal not displayed */
	// Size threshold (in bytes) for the GPU collective combiners.
	XlaGpuAllReduceCombineThresholdBytes     int64 `` /* 182-byte string literal not displayed */
	XlaGpuAllGatherCombineThresholdBytes     int64 `` /* 182-byte string literal not displayed */
	XlaGpuReduceScatterCombineThresholdBytes int64 `` /* 194-byte string literal not displayed */
	// Combine all-gather/scatter-reduce ops with the same dimension or
	// irrespective of their dimension.
	XlaGpuEnableAllGatherCombineByDim     bool `` /* 175-byte string literal not displayed */
	XlaGpuEnableReduceScatterCombineByDim bool `` /* 187-byte string literal not displayed */
	// Enable allreduce reassociation on allreduces that are converted to a wider
	// type. The resulting allreduce will be promoted to a wider-typed allreduce.
	XlaGpuEnableReassociationForConvertedAr bool `` /* 191-byte string literal not displayed */
	// Number of devices per host for first stage of BlueConnect decomposition
	// pass. The pass will attempt to decompose all-reduces ops into a
	// ReduceScatter-AllReduce-AllGather sequence, with the initial ReduceScatter
	// being performed over all of the devices in the same host. Set to < 1 to
	// disable all-reduce decomposition.
	XlaGpuAllReduceBlueconnectNumDevicesPerHost int32 `` /* 207-byte string literal not displayed */
	// Enable hoisting of reduce-scatter out of while loops.
	XlaGpuEnableWhileLoopReduceScatterCodeMotion bool `` /* 210-byte string literal not displayed */
	// Inflate collective cost by running each collective multiple times.
	XlaGpuCollectiveInflationFactor int32 `` /* 163-byte string literal not displayed */
	// Whether to force inline before llvm module split to get a more balanced
	// splits for parallel compilation.
	XlaLlvmForceInlineBeforeSplit bool `` /* 159-byte string literal not displayed */
	// Whether to use the cuDNN frontend API for convolutions when possible.
	XlaGpuEnableCudnnFrontend       bool `` /* 145-byte string literal not displayed */
	XlaGpuEnableCudnnFmha           bool `` /* 133-byte string literal not displayed */
	XlaGpuFusedAttentionUseCudnnRng bool `` /* 167-byte string literal not displayed */
	// Rewrite layer norm patterns into cuDNN library calls.
	XlaGpuEnableCudnnLayerNorm bool `` /* 150-byte string literal not displayed */
	// Disable dumping metadata in HLO dumps.
	XlaDumpDisableMetadata bool `` /* 134-byte string literal not displayed */
	// If this flag is specified, will only dump HLO before and after passes in
	// the pass pipeline that matches this regular expression. Default empty value
	// enables dumping in all pipelines.
	XlaDumpHloPipelineRe string `` /* 129-byte string literal not displayed */
	// If true, abort immediately when conv algorithm picker fails, rather than
	// logging a warning and proceeding with fallback.
	XlaGpuStrictConvAlgorithmPicker bool `` /* 165-byte string literal not displayed */
	// If true, XLA will try to pattern match subgraphs of HLO operations into
	// custom fusions registered in the current process (pre-compiled hand written
	// kernels, e.g. various GEMM fusions writtent in CUTLASS).
	XlaGpuEnableCustomFusions bool `` /* 145-byte string literal not displayed */
	// A regular expression enabling only a subset of custom fusions. Enabled only
	// if `xla_gpu_enable_custom_fusion` set to true.
	XlaGpuEnableCustomFusionsRe string `` /* 152-byte string literal not displayed */
	// Enables address computation fusion to optimize dynamic-slice and
	// dynamic-update-slice operations around library calls.
	XlaGpuEnableDynamicSliceFusion bool `` /* 162-byte string literal not displayed */
	// Timeout in seconds before terminating jobs that are stuck in a NCCL
	// Rendezvous. Negative value disables the timeout and will not terminate.
	XlaGpuNcclTerminationTimeoutSeconds int64 `` /* 177-byte string literal not displayed */
	// Enables shared constants for XLA/GPU. This allows large constants to be
	// shared among multiple GPU executables.
	XlaGpuEnableSharedConstants bool `` /* 151-byte string literal not displayed */
	// Whether to use cuBLASLt for GEMMs on GPUs.
	XlaGpuEnableCublaslt bool `` /* 128-byte string literal not displayed */
	// Determine the types of commands that are recorded into command buffers.
	XlaGpuEnableCommandBuffer []DebugOptions_CommandBufferCmdType `` /* 195-byte string literal not displayed */
	// This number determines how many moved instructions like fusion kernels are
	// required for a region to be captured as a function to be launched as a GPU
	// graph.
	XlaGpuGraphMinGraphSize int32 `` /* 141-byte string literal not displayed */
	// Identify concurrent regions in GPU graphs and execute them concurrently.
	XlaGpuGraphEnableConcurrentRegion bool `` /* 171-byte string literal not displayed */
	// Size threshold (in megabytes) for the GPU redzone scratch allocator.
	XlaGpuRedzoneScratchMaxMegabytes int64 `` /* 168-byte string literal not displayed */
	// Amount of padding the redzone allocator will put on one side of each buffer
	// it allocates.  (So the buffer's total size will be increased by 2x this
	// value.)
	//
	// Higher values make it more likely that we'll catch an out-of-bounds read or
	// write.  Smaller values consume less memory during autotuning.  Note that a
	// fused cudnn conv has up to 6 total buffers (4 inputs, 1 output, and 1
	// scratch), so this can be multiplied by quite a lot.
	XlaGpuRedzonePaddingBytes int64 `` /* 145-byte string literal not displayed */
	// Generate calls to Arm Compute Library in the CPU backend.
	XlaCpuUseAcl bool `protobuf:"varint,174,opt,name=xla_cpu_use_acl,json=xlaCpuUseAcl,proto3" json:"xla_cpu_use_acl,omitempty"`
	// By default, XLA:CPU will run fp16 dot/conv as fp32, as this is generally
	// (much) faster on our hardware.  Set this flag to disable this behavior.
	XlaCpuStrictDotConvMath bool `` /* 141-byte string literal not displayed */
	// An option to enable using cuDNN runtime compiled fusion kernels which is
	// available and recommended for Ampere+ GPUs.
	XlaGpuUseRuntimeFusion       bool `` /* 136-byte string literal not displayed */
	XlaDumpLatencyHidingSchedule bool `` /* 154-byte string literal not displayed */
	// By default, MLIR lowering will use Linalg elementwise fusion. If this flag
	// is enabled, the pipeline will use tiling, fusion, peeling, vectorization
	// instead.
	XlaCpuEnableMlirTilingAndFusion bool `` /* 167-byte string literal not displayed */
	// XLA:CPU-Next tiling parameters for matmul.
	XlaCpuEnableCustomMatmulTiling  bool  `` /* 162-byte string literal not displayed */
	XlaCpuMatmulTilingMDim          int64 `` /* 138-byte string literal not displayed */
	XlaCpuMatmulTilingNDim          int64 `` /* 138-byte string literal not displayed */
	XlaCpuMatmulTilingKDim          int64 `` /* 138-byte string literal not displayed */
	XlaCpuEnableMlirFusionOutlining bool  `` /* 165-byte string literal not displayed */
	// If set, use the experimental deallocation pass from mlir-hlo.
	XlaCpuEnableExperimentalDeallocation   bool   `` /* 178-byte string literal not displayed */
	XlaGpuEnableLatencyHidingScheduler     bool   `` /* 174-byte string literal not displayed */
	XlaGpuEnableHighestPriorityAsyncStream bool   `` /* 188-byte string literal not displayed */
	XlaGpuEnableAnalyticalLatencyEstimator bool   `` /* 186-byte string literal not displayed */
	XlaGpuLhsEnableGpuAsyncTracker         bool   `` /* 164-byte string literal not displayed */
	XlaGpuPgleProfileFileOrDirectoryPath   string `` /* 183-byte string literal not displayed */
	XlaGpuMemoryLimitSlopFactor            int32  `` /* 153-byte string literal not displayed */
	XlaGpuEnablePipelinedCollectives       bool   `` /* 166-byte string literal not displayed */
	XlaGpuEnablePipelinedAllReduce         bool   `` /* 162-byte string literal not displayed */
	XlaGpuEnablePipelinedAllGather         bool   `` /* 162-byte string literal not displayed */
	XlaGpuEnablePipelinedReduceScatter     bool   `` /* 174-byte string literal not displayed */
	XlaGpuEnablePipelinedP2P               bool   `` /* 142-byte string literal not displayed */
	// The minimum data size in bytes to trigger collective-permute-decomposer
	// transformation.
	XlaGpuCollectivePermuteDecomposerThreshold int64 `` /* 198-byte string literal not displayed */
	// The partitioning algorithm to be used in the PartitionAssignment pass.
	XlaPartitioningAlgorithm                      DebugOptions_PartitioningAlgorithm `` /* 182-byte string literal not displayed */
	XlaGpuEnableTritonGemm                        bool                               `` /* 136-byte string literal not displayed */
	XlaGpuEnableCudnnInt8X32ConvolutionReordering bool                               `` /* 209-byte string literal not displayed */
	// Creates triton fusion for all supported gemms.
	// To make sure only triton gemm is chosen by the autotuner run with
	// `xla_gpu_cublas_fallback` set to false.
	XlaGpuTritonGemmAny          bool `` /* 127-byte string literal not displayed */
	XlaGpuExhaustiveTilingSearch bool `` /* 154-byte string literal not displayed */
	// File to write autotune results to. It will be a binary file unless the name
	// ends with .txt or .textproto. Warning: The results are written at every
	// compilation, possibly multiple times per process. This only works on CUDA.
	XlaGpuDumpAutotuneResultsTo string `` /* 152-byte string literal not displayed */
	// File to load autotune results from. It will be considered a binary file
	// unless the name ends with .txt or .textproto. At most one loading will
	// happen during the lifetime of one process, even if the first one is
	// unsuccessful or different file paths are passed here. This only works on
	// CUDA.
	XlaGpuLoadAutotuneResultsFrom string `` /* 158-byte string literal not displayed */
	// Description of the target platform in GpuTargetConfigProto format; if
	// provided, deviceless compilation is assumed, and the current device is
	// ignored.
	XlaGpuTargetConfigFilename string `` /* 147-byte string literal not displayed */
	// Memory budget in GB per device for AutoSharding.
	XlaGpuAutoSpmdPartitioningMemoryBudgetGb int32 `` /* 196-byte string literal not displayed */
	// See the definition of the
	// xla_gpu_auto_spmd_partitioning_memory_budget_ratio flag for the meaning of
	// this field.
	XlaGpuAutoSpmdPartitioningMemoryBudgetRatio      float32 `` /* 206-byte string literal not displayed */
	XlaGpuTritonGemmDisableReducedPrecisionReduction bool    `` /* 220-byte string literal not displayed */
	XlaGpuTritonFusionLevel                          int32   `` /* 139-byte string literal not displayed */
	XlaGpuDumpAutotunedGemmFusions                   bool    `` /* 162-byte string literal not displayed */
	XlaGpuOverrideGemmAutotuner                      string  `` /* 150-byte string literal not displayed */
	XlaGpuCopyInsertionUseRegionAnalysis             bool    `` /* 182-byte string literal not displayed */
	// If true, each fusion instruction will have a cost model runtime estimate in
	// backend config after compilation.
	XlaGpuCollectCostModelStats  bool `` /* 153-byte string literal not displayed */
	XlaGpuEnableSplitKAutotuning bool `` /* 156-byte string literal not displayed */
	// Whether reduction epilogue fusion is enabled in fusion passes.
	XlaGpuEnableReductionEpilogueFusion bool `` /* 177-byte string literal not displayed */
	// Allow early return when acquiring NCCL cliques.
	XlaGpuEnableNcclCliqueOptimization bool `` /* 174-byte string literal not displayed */
	// Replace custom calls with noop operations.
	XlaGpuMockCustomCalls bool `` /* 133-byte string literal not displayed */
	// Allow Triton GEMM autotuning to fall back to cuBLAS when that is
	// faster.
	XlaGpuCublasFallback bool `` /* 128-byte string literal not displayed */
	// Enable double buffering for loops.
	XlaGpuEnableWhileLoopDoubleBuffering bool `` /* 182-byte string literal not displayed */
	// Determine the while loop unrolling scheme.
	XlaGpuEnableWhileLoopUnrolling DebugOptions_WhileLoopUnrolling `` /* 203-byte string literal not displayed */
	// Change the layout of the second triton dot operand to be column major.
	// Only works for (bf16 x bf16) -> bf16.
	XlaGpuEnsureMinorDotContractionDims bool `` /* 179-byte string literal not displayed */
	// Filter out kernels that spill registers during autotuning.
	XlaGpuFilterKernelsSpillingRegistersOnAutotuning bool `` /* 220-byte string literal not displayed */
	// Maximum number of buffers to print when debugging buffer assignment.
	XlaDebugBufferAssignmentShowMax int64 `` /* 165-byte string literal not displayed */
	XlaGpuLlvmVerificationLevel     int32 `` /* 151-byte string literal not displayed */
	// Enable radix sort using CUB.
	XlaGpuEnableCubRadixSort bool `` /* 144-byte string literal not displayed */
	// Threshold to enable windowed einsum (collective matmul) in MB.
	XlaGpuThresholdForWindowedEinsumMib int64 `` /* 179-byte string literal not displayed */
	// Enables currently disabled features within Triton for Hopper.
	XlaGpuEnableTritonHopper bool `` /* 142-byte string literal not displayed */
	// Enable NCCL user buffers.
	XlaGpuEnableNcclUserBuffers bool `` /* 153-byte string literal not displayed */
	// Enable NCCL communicator splitting.
	XlaGpuEnableNcclCommSplitting bool `` /* 159-byte string literal not displayed */
	// Enable NCCL per stream communicators.
	XlaGpuEnableNcclPerStreamComms bool `` /* 164-byte string literal not displayed */
	// If enabled, uses the libnvptxcompiler library to compile PTX to cuBIN.
	XlaGpuEnableLibnvptxcompiler     bool `` /* 152-byte string literal not displayed */
	XlaGpuEnableDotStrengthReduction bool `` /* 168-byte string literal not displayed */
	// Whether to use multiple compute streams to run windowed einsum.
	XlaGpuMultiStreamedWindowedEinsum  bool `` /* 171-byte string literal not displayed */
	XlaGpuExperimentalStreamAnnotation bool `` /* 172-byte string literal not displayed */
	// If enabled, uses bf16_6way gemm to compute F32 gemm.
	XlaGpuEnableBf16_6WayGemm bool `` /* 144-byte string literal not displayed */
	// If enabled, uses bf16_3way gemm to compute F32 gemm.
	XlaGpuEnableBf16_3WayGemm bool `` /* 144-byte string literal not displayed */
	// Specify the maximum number of channels(SMs) NCCL
	// will use for collective operations.
	XlaGpuNcclCollectiveMaxNchannels int64 `` /* 168-byte string literal not displayed */
	// Specify the maximum number of channels(SMs) NCCL
	// will use for p2p operations.
	XlaGpuNcclP2PMaxNchannels int64 `` /* 147-byte string literal not displayed */
	// Threshold to rewrite matmul to cuBLAS or Triton (minimum combined number of
	// elements of both matrices in non-batch dimensions to be considered for a
	// rewrite).
	XlaGpuGemmRewriteSizeThreshold int64 `` /* 162-byte string literal not displayed */
	// If true, will require complete AOT autotuning results; in the case of
	// missing AOT result, the model will not be compiled or executed, a
	// `NotFound` error will be returned.
	XlaGpuRequireCompleteAotAutotuneResults bool `` /* 191-byte string literal not displayed */
	// Let GEMM fusion autotuning probe cuDNN as a backend.
	// Current levels:
	// 0: Disabled.
	// 1: Fusions of GEMM, elementwise, transpose/reshape operations.
	// 2: + Broadcasts, slicing.
	// 3: + Nontrivial noncontracting dimension reshapes/transposes.
	XlaGpuCudnnGemmFusionLevel int32 `` /* 150-byte string literal not displayed */
	// This instructs the runtime whether to use
	// memcpy for p2p communication when source and
	// target are located within a node(nvlink).
	XlaGpuUseMemcpyLocalP2P bool `` /* 141-byte string literal not displayed */
	// If non-zero, limits the number of solutions to be used by GEMM autotuner.
	// This might be useful if underlying math library returns too many GEMM
	// solutions.
	XlaGpuAutotuneMaxSolutions int64 `` /* 148-byte string literal not displayed */
	// If true, large constants will be printed out when dumping HLOs.
	XlaDumpLargeConstants bool `` /* 131-byte string literal not displayed */
	// If true, will verify that the numerical results of Triton fusions match
	// the results of regular emitters.
	XlaGpuVerifyTritonFusionNumerics bool `` /* 168-byte string literal not displayed */
	// File to write autotune logs to. It will stored in txt format.
	XlaGpuDumpAutotuneLogsTo string `` /* 143-byte string literal not displayed */
	// Base length to rewrite the reduce window to, no rewrite if set to 0.
	XlaReduceWindowRewriteBaseLength int64 `` /* 168-byte string literal not displayed */
	// If true, will enable host memory offloading on a device.
	XlaGpuEnableHostMemoryOffloading bool `` /* 168-byte string literal not displayed */
	// Excludes non-deterministic ops from compiled executables.
	// Unlike --xla_gpu_deterministic_ops does not disable autotuning - the
	// compilation itself can be non-deterministic.
	// Scatter ops can non-deterministic by default; these get converted to
	// a deterministic implementation.
	XlaGpuExcludeNondeterministicOps bool `` /* 166-byte string literal not displayed */
	// If true, Nccl errors will terminate the process.
	XlaGpuNcclTerminateOnError          bool   `` /* 150-byte string literal not displayed */
	XlaGpuShardAutotuning               bool   `` /* 131-byte string literal not displayed */
	XlaGpuEnableApproxCostlyCollectives bool   `` /* 177-byte string literal not displayed */
	XlaGpuKernelCacheFile               string `` /* 132-byte string literal not displayed */
	// Recognises rotate-right patterns (slice, slice, concat) within a while
	// loop and labels the while loop as a pipelined while loop. This is an
	// unsafe flag.
	XlaGpuUnsafePipelinedLoopAnnotator bool   `` /* 174-byte string literal not displayed */
	XlaGpuPerFusionAutotuneCacheDir    string `` /* 166-byte string literal not displayed */
	// The command buffer trace cache size, increasing the cache size may
	// sometimes reduces the chances of doing command buffer tracing for
	// updating command buffer instance.
	XlaCmdBufferTraceCacheSize int64 `` /* 150-byte string literal not displayed */
	// Enable this flag will use a separate memory space color for
	// temp buffer, and then will use separate memory allocator to allocate it,
	// as there is no other memory allocation interference,
	// it will allocate temp buffer to some fix address on every iteration,
	// which is good for cuda-graph perf.
	XlaGpuTempBufferUseSeparateColor bool `` /* 170-byte string literal not displayed */
	// Custom call targets with legacy registry API (non FFI API),
	// that support recording to command buffer custom command,
	// i.e., custom call target supports cuda-graph capturing for CUDA devices.
	// This flag is read if CUSTOM_CALL command type is recorded into
	// command buffer.
	LegacyCommandBufferCustomCallTargets []string `` /* 179-byte string literal not displayed */
	// This flag is used for controlling HLO dumping and NVTX marker. If turned
	// on, both HLO dumping and NVTX marker will use syntactic sugar wrappers
	// as op names, while the actual op names will be shown if turned off.
	//
	// Here is an example HLO excerpt with the flag off:
	//
	//	 async_computation {
	//	  param_0 = f32[1,4,8]{1,0,2} parameter(0)
	//	  ROOT all-to-all.3.1 = f32[1,4,8]{1,0,2} all-to-all(param_0),
	//	                        replica_groups={{0,1,2,3,4,5,6,7}}, dimensions={2}
	//	 }
	//	...
	//
	//	all-to-all-start =
	//	  ((f32[1,4,8]{1,0,2}), f32[1,4,8]{1,0,2}) async-start(bitcast.24.0),
	//	  calls=async_computation, backend_config={...}
	//	all-to-all-done = f32[1,4,8]{1,0,2} async-done(all-to-all-start)
	//
	// and with the flag on:
	//
	//	all-to-all-start = ((f32[1,4,8]{1,0,2}), f32[1,4,8]{1,0,2})
	//	                   all-to-all-start(bitcast.24.0),
	//	                   replica_groups={{0,1,2,3,4,5,6,7}}, dimensions={2},
	//	                   backend_config={...}
	//	all-to-all-done = f32[1,4,8]{1,0,2} all-to-all-done(all-to-all-start)
	XlaSyntaxSugarAsyncOps bool `` /* 136-byte string literal not displayed */
	// Relative precision for comparing different GEMM solutions
	XlaGpuAutotuneGemmRtol float32 `` /* 137-byte string literal not displayed */
	// Allow launching command buffers while profiling active.
	// When disabled, execute in op-by-op mode.
	// TODO(b/355487968): Remove this option when validation complete.
	XlaEnableCommandBuffersDuringProfiling bool `` /* 186-byte string literal not displayed */
	// Limit for the number of kernel configurations (plans) to use during
	// autotuning of cuDNN GEMM fusions. The more - the slower the autotuning
	// but potentially higher the performance.
	XlaGpuCudnnGemmMaxPlans int32 `` /* 141-byte string literal not displayed */
	// If enabled, uses the libnvjitlink library for PTX compilation and linking
	XlaGpuLibnvjitlinkMode DebugOptions_LibNvJitLinkMode `` /* 173-byte string literal not displayed */
	// If true, XLA will wrap `dot` operations into async computations in an
	// effort to parallelize matrix operations.
	XlaGpuAsyncDot bool `protobuf:"varint,321,opt,name=xla_gpu_async_dot,json=xlaGpuAsyncDot,proto3" json:"xla_gpu_async_dot,omitempty"`
	// Timeouts for RendezvousSingle stuck warning and termination.
	XlaGpuExecutableWarnStuckTimeoutSeconds int32 `` /* 191-byte string literal not displayed */
	XlaGpuExecutableTerminateTimeoutSeconds int32 `` /* 189-byte string literal not displayed */
	// Whether to ignore channel ids(including verifier channel id checks)
	// for collectives in the given HLO.
	XlaExperimentalIgnoreChannelId bool `` /* 160-byte string literal not displayed */
	// DotMerger pass threshold size to be used in MB.
	XlaGpuDotMergerThresholdMb int32 `` /* 150-byte string literal not displayed */
	// If enabled, in the absence of user provided knobs might tune pass
	// configurations based on the HLO. For example it decides to unroll the while
	// loop by a factor of two if a collective op is present.
	XlaGpuEnableHeuristicPassConfiguration bool `` /* 186-byte string literal not displayed */
	// This controls how many in-flight collectives latency hiding scheduler
	// can schedule. Example usage:
	// With xla_gpu_experimental_parallel_collective_overlap_limit = 1:
	//
	//	coll.1-start = collective(input)
	//	coll.1-done = collective(coll.1-start)
	//	coll.2-start = collective(input2)
	//	coll.2-done = collective(coll.2-start)
	//
	// With xla_gpu_experimental_parallel_collective_overlap_limit = 2:
	//
	//	coll.1-start = collective(input)
	//	coll.2-start = collective(input2)
	//	coll.1-done = collective(coll.1-start)
	//	coll.2-done = collective(coll.2-start)
	XlaGpuExperimentalParallelCollectiveOverlapLimit int32 `` /* 218-byte string literal not displayed */
	// If set >= 0, this controls the total bytes(combined sizes of both
	// operands in bytes) to enable windowed einsum and
	// xla_gpu_threshold_for_windowed_einsum_mib will be ignored.
	XlaGpuOperandBytesThresholdForWindowedEinsum int64                            `` /* 208-byte string literal not displayed */
	XlaGpuPgleAccuracyChecker                    DebugOptions_PGLEStrictnessLevel `` /* 187-byte string literal not displayed */
	XlaPjrtAllowAutoLayoutInHlo                  bool                             `` /* 155-byte string literal not displayed */
	// Enable the scatter determinism expander, an optimized pass that
	// rewrites scatter operations to ensure deterministic behavior with high
	// performance.
	// Note that even when this flag is disabled, scatter operations may still
	// be deterministic, although with additional overhead.
	XlaGpuEnableScatterDeterminismExpander bool `` /* 186-byte string literal not displayed */
	// Extra options to pass to the compilation backend (e.g. LLVM); specific
	// interpretation of these values is left to the backend.
	XlaBackendExtraOptions map[string]string `` /* 221-byte string literal not displayed */
	// contains filtered or unexported fields
}

Debugging options for XLA. These options may change at any time - there are no guarantees about backward or forward compatibility for these fields.

Debug options naming and organization:

  1. Backend-agnostic options: `xla_$flag_name` - go first, and sorted alphabetically by the flag name.

  2. Backend-specific options: `xla_$backend_$flag_name` - must be in the corresponding backend section, and sorted alphabetically by the flag name.

func (*DebugOptions) Descriptor deprecated

func (*DebugOptions) Descriptor() ([]byte, []int)

Deprecated: Use DebugOptions.ProtoReflect.Descriptor instead.

func (*DebugOptions) GetLegacyCommandBufferCustomCallTargets

func (x *DebugOptions) GetLegacyCommandBufferCustomCallTargets() []string

func (*DebugOptions) GetXlaAllowExcessPrecision

func (x *DebugOptions) GetXlaAllowExcessPrecision() bool

func (*DebugOptions) GetXlaAllowScalarIndexDynamicOps

func (x *DebugOptions) GetXlaAllowScalarIndexDynamicOps() bool

func (*DebugOptions) GetXlaBackendExtraOptions

func (x *DebugOptions) GetXlaBackendExtraOptions() map[string]string

func (*DebugOptions) GetXlaBackendOptimizationLevel

func (x *DebugOptions) GetXlaBackendOptimizationLevel() int32

func (*DebugOptions) GetXlaCmdBufferTraceCacheSize

func (x *DebugOptions) GetXlaCmdBufferTraceCacheSize() int64

func (*DebugOptions) GetXlaCpuCopyInsertionUseRegionAnalysis added in v0.4.7

func (x *DebugOptions) GetXlaCpuCopyInsertionUseRegionAnalysis() bool

func (*DebugOptions) GetXlaCpuEnableConcurrencyOptimizedScheduler

func (x *DebugOptions) GetXlaCpuEnableConcurrencyOptimizedScheduler() bool

func (*DebugOptions) GetXlaCpuEnableCustomMatmulTiling

func (x *DebugOptions) GetXlaCpuEnableCustomMatmulTiling() bool

func (*DebugOptions) GetXlaCpuEnableExperimentalDeallocation

func (x *DebugOptions) GetXlaCpuEnableExperimentalDeallocation() bool

func (*DebugOptions) GetXlaCpuEnableFastMath

func (x *DebugOptions) GetXlaCpuEnableFastMath() bool

func (*DebugOptions) GetXlaCpuEnableFastMinMax

func (x *DebugOptions) GetXlaCpuEnableFastMinMax() bool

func (*DebugOptions) GetXlaCpuEnableMlirFusionOutlining

func (x *DebugOptions) GetXlaCpuEnableMlirFusionOutlining() bool

func (*DebugOptions) GetXlaCpuEnableMlirTilingAndFusion

func (x *DebugOptions) GetXlaCpuEnableMlirTilingAndFusion() bool

func (*DebugOptions) GetXlaCpuEnableXprofTraceme

func (x *DebugOptions) GetXlaCpuEnableXprofTraceme() bool

func (*DebugOptions) GetXlaCpuFastMathHonorDivision

func (x *DebugOptions) GetXlaCpuFastMathHonorDivision() bool

func (*DebugOptions) GetXlaCpuFastMathHonorFunctions

func (x *DebugOptions) GetXlaCpuFastMathHonorFunctions() bool

func (*DebugOptions) GetXlaCpuFastMathHonorInfs

func (x *DebugOptions) GetXlaCpuFastMathHonorInfs() bool

func (*DebugOptions) GetXlaCpuFastMathHonorNans

func (x *DebugOptions) GetXlaCpuFastMathHonorNans() bool

func (*DebugOptions) GetXlaCpuMatmulTilingKDim

func (x *DebugOptions) GetXlaCpuMatmulTilingKDim() int64

func (*DebugOptions) GetXlaCpuMatmulTilingMDim

func (x *DebugOptions) GetXlaCpuMatmulTilingMDim() int64

func (*DebugOptions) GetXlaCpuMatmulTilingNDim

func (x *DebugOptions) GetXlaCpuMatmulTilingNDim() int64

func (*DebugOptions) GetXlaCpuMaxIsa added in v0.4.7

func (x *DebugOptions) GetXlaCpuMaxIsa() string

func (*DebugOptions) GetXlaCpuMultiThreadEigen

func (x *DebugOptions) GetXlaCpuMultiThreadEigen() bool

func (*DebugOptions) GetXlaCpuParallelCodegenSplitCount

func (x *DebugOptions) GetXlaCpuParallelCodegenSplitCount() int32

func (*DebugOptions) GetXlaCpuPreferVectorWidth

func (x *DebugOptions) GetXlaCpuPreferVectorWidth() int32

func (*DebugOptions) GetXlaCpuStrictDotConvMath

func (x *DebugOptions) GetXlaCpuStrictDotConvMath() bool

func (*DebugOptions) GetXlaCpuUseAcl

func (x *DebugOptions) GetXlaCpuUseAcl() bool

func (*DebugOptions) GetXlaCpuUseMklDnn

func (x *DebugOptions) GetXlaCpuUseMklDnn() bool

func (*DebugOptions) GetXlaCpuUseThunkRuntime

func (x *DebugOptions) GetXlaCpuUseThunkRuntime() bool

func (*DebugOptions) GetXlaDebugBufferAssignmentShowMax

func (x *DebugOptions) GetXlaDebugBufferAssignmentShowMax() int64

func (*DebugOptions) GetXlaDetailedLogging

func (x *DebugOptions) GetXlaDetailedLogging() bool

func (*DebugOptions) GetXlaDisableAllHloPasses

func (x *DebugOptions) GetXlaDisableAllHloPasses() bool

func (*DebugOptions) GetXlaDisableHloPasses

func (x *DebugOptions) GetXlaDisableHloPasses() []string

func (*DebugOptions) GetXlaDumpCompressProtos

func (x *DebugOptions) GetXlaDumpCompressProtos() bool

func (*DebugOptions) GetXlaDumpDisableMetadata

func (x *DebugOptions) GetXlaDumpDisableMetadata() bool

func (*DebugOptions) GetXlaDumpEnableMlirPrettyForm

func (x *DebugOptions) GetXlaDumpEnableMlirPrettyForm() bool

func (*DebugOptions) GetXlaDumpFusionVisualization

func (x *DebugOptions) GetXlaDumpFusionVisualization() bool

func (*DebugOptions) GetXlaDumpHloAsDot

func (x *DebugOptions) GetXlaDumpHloAsDot() bool

func (*DebugOptions) GetXlaDumpHloAsHtml

func (x *DebugOptions) GetXlaDumpHloAsHtml() bool

func (*DebugOptions) GetXlaDumpHloAsLongText

func (x *DebugOptions) GetXlaDumpHloAsLongText() bool

func (*DebugOptions) GetXlaDumpHloAsProto

func (x *DebugOptions) GetXlaDumpHloAsProto() bool

func (*DebugOptions) GetXlaDumpHloAsText

func (x *DebugOptions) GetXlaDumpHloAsText() bool

func (*DebugOptions) GetXlaDumpHloAsUrl

func (x *DebugOptions) GetXlaDumpHloAsUrl() bool

func (*DebugOptions) GetXlaDumpHloModuleRe

func (x *DebugOptions) GetXlaDumpHloModuleRe() string

func (*DebugOptions) GetXlaDumpHloPassRe

func (x *DebugOptions) GetXlaDumpHloPassRe() string

func (*DebugOptions) GetXlaDumpHloPipelineRe

func (x *DebugOptions) GetXlaDumpHloPipelineRe() string

func (*DebugOptions) GetXlaDumpHloSnapshots

func (x *DebugOptions) GetXlaDumpHloSnapshots() bool

func (*DebugOptions) GetXlaDumpIncludeTimestamp

func (x *DebugOptions) GetXlaDumpIncludeTimestamp() bool

func (*DebugOptions) GetXlaDumpLargeConstants

func (x *DebugOptions) GetXlaDumpLargeConstants() bool

func (*DebugOptions) GetXlaDumpLatencyHidingSchedule

func (x *DebugOptions) GetXlaDumpLatencyHidingSchedule() bool

func (*DebugOptions) GetXlaDumpMaxHloModules

func (x *DebugOptions) GetXlaDumpMaxHloModules() int32

func (*DebugOptions) GetXlaDumpModuleMetadata

func (x *DebugOptions) GetXlaDumpModuleMetadata() bool

func (*DebugOptions) GetXlaDumpTo

func (x *DebugOptions) GetXlaDumpTo() string

func (*DebugOptions) GetXlaEliminateHloImplicitBroadcast

func (x *DebugOptions) GetXlaEliminateHloImplicitBroadcast() bool

func (*DebugOptions) GetXlaEmbedIrInExecutable

func (x *DebugOptions) GetXlaEmbedIrInExecutable() bool

func (*DebugOptions) GetXlaEnableCommandBuffersDuringProfiling

func (x *DebugOptions) GetXlaEnableCommandBuffersDuringProfiling() bool

func (*DebugOptions) GetXlaEnableDumping

func (x *DebugOptions) GetXlaEnableDumping() bool

func (*DebugOptions) GetXlaEnableFastMath added in v0.4.7

func (x *DebugOptions) GetXlaEnableFastMath() bool

func (*DebugOptions) GetXlaEnableHloPassesOnly

func (x *DebugOptions) GetXlaEnableHloPassesOnly() []string

func (*DebugOptions) GetXlaExperimentalExecTimeOptimizationEffort added in v0.4.9

func (x *DebugOptions) GetXlaExperimentalExecTimeOptimizationEffort() float32

func (*DebugOptions) GetXlaExperimentalIgnoreChannelId added in v0.4.2

func (x *DebugOptions) GetXlaExperimentalIgnoreChannelId() bool

func (*DebugOptions) GetXlaForceHostPlatformDeviceCount

func (x *DebugOptions) GetXlaForceHostPlatformDeviceCount() int32

func (*DebugOptions) GetXlaGpuAlgorithmDenylistPath

func (x *DebugOptions) GetXlaGpuAlgorithmDenylistPath() string

func (*DebugOptions) GetXlaGpuAllGatherCombineThresholdBytes

func (x *DebugOptions) GetXlaGpuAllGatherCombineThresholdBytes() int64

func (*DebugOptions) GetXlaGpuAllReduceBlueconnectNumDevicesPerHost

func (x *DebugOptions) GetXlaGpuAllReduceBlueconnectNumDevicesPerHost() int32

func (*DebugOptions) GetXlaGpuAllReduceCombineThresholdBytes

func (x *DebugOptions) GetXlaGpuAllReduceCombineThresholdBytes() int64

func (*DebugOptions) GetXlaGpuAsmExtraFlags

func (x *DebugOptions) GetXlaGpuAsmExtraFlags() string

func (*DebugOptions) GetXlaGpuAsyncDot

func (x *DebugOptions) GetXlaGpuAsyncDot() bool

func (*DebugOptions) GetXlaGpuAutoSpmdPartitioningMemoryBudgetGb

func (x *DebugOptions) GetXlaGpuAutoSpmdPartitioningMemoryBudgetGb() int32

func (*DebugOptions) GetXlaGpuAutoSpmdPartitioningMemoryBudgetRatio

func (x *DebugOptions) GetXlaGpuAutoSpmdPartitioningMemoryBudgetRatio() float32

func (*DebugOptions) GetXlaGpuAutotuneGemmRtol

func (x *DebugOptions) GetXlaGpuAutotuneGemmRtol() float32

func (*DebugOptions) GetXlaGpuAutotuneLevel

func (x *DebugOptions) GetXlaGpuAutotuneLevel() int32

func (*DebugOptions) GetXlaGpuAutotuneMaxSolutions

func (x *DebugOptions) GetXlaGpuAutotuneMaxSolutions() int64

func (*DebugOptions) GetXlaGpuCollectCostModelStats

func (x *DebugOptions) GetXlaGpuCollectCostModelStats() bool

func (*DebugOptions) GetXlaGpuCollectiveInflationFactor

func (x *DebugOptions) GetXlaGpuCollectiveInflationFactor() int32

func (*DebugOptions) GetXlaGpuCollectivePermuteDecomposerThreshold

func (x *DebugOptions) GetXlaGpuCollectivePermuteDecomposerThreshold() int64

func (*DebugOptions) GetXlaGpuCopyInsertionUseRegionAnalysis

func (x *DebugOptions) GetXlaGpuCopyInsertionUseRegionAnalysis() bool

func (*DebugOptions) GetXlaGpuCrashOnVerificationFailures

func (x *DebugOptions) GetXlaGpuCrashOnVerificationFailures() bool

func (*DebugOptions) GetXlaGpuCublasFallback

func (x *DebugOptions) GetXlaGpuCublasFallback() bool

func (*DebugOptions) GetXlaGpuCudaDataDir

func (x *DebugOptions) GetXlaGpuCudaDataDir() string

func (*DebugOptions) GetXlaGpuCudnnGemmFusionLevel

func (x *DebugOptions) GetXlaGpuCudnnGemmFusionLevel() int32

func (*DebugOptions) GetXlaGpuCudnnGemmMaxPlans

func (x *DebugOptions) GetXlaGpuCudnnGemmMaxPlans() int32

func (*DebugOptions) GetXlaGpuDeterministicOps

func (x *DebugOptions) GetXlaGpuDeterministicOps() bool

func (*DebugOptions) GetXlaGpuDisableAsyncCollectives

func (x *DebugOptions) GetXlaGpuDisableAsyncCollectives() []DebugOptions_CollectiveOpType

func (*DebugOptions) GetXlaGpuDisableGpuasmOptimizations

func (x *DebugOptions) GetXlaGpuDisableGpuasmOptimizations() bool

func (*DebugOptions) GetXlaGpuDotMergerThresholdMb added in v0.4.2

func (x *DebugOptions) GetXlaGpuDotMergerThresholdMb() int32

func (*DebugOptions) GetXlaGpuDumpAutotuneLogsTo

func (x *DebugOptions) GetXlaGpuDumpAutotuneLogsTo() string

func (*DebugOptions) GetXlaGpuDumpAutotuneResultsTo

func (x *DebugOptions) GetXlaGpuDumpAutotuneResultsTo() string

func (*DebugOptions) GetXlaGpuDumpAutotunedGemmFusions

func (x *DebugOptions) GetXlaGpuDumpAutotunedGemmFusions() bool

func (*DebugOptions) GetXlaGpuDumpLlvmir

func (x *DebugOptions) GetXlaGpuDumpLlvmir() bool

func (*DebugOptions) GetXlaGpuEnableAllGatherCombineByDim

func (x *DebugOptions) GetXlaGpuEnableAllGatherCombineByDim() bool

func (*DebugOptions) GetXlaGpuEnableAnalyticalLatencyEstimator

func (x *DebugOptions) GetXlaGpuEnableAnalyticalLatencyEstimator() bool

func (*DebugOptions) GetXlaGpuEnableApproxCostlyCollectives

func (x *DebugOptions) GetXlaGpuEnableApproxCostlyCollectives() bool

func (*DebugOptions) GetXlaGpuEnableBf16_3WayGemm

func (x *DebugOptions) GetXlaGpuEnableBf16_3WayGemm() bool

func (*DebugOptions) GetXlaGpuEnableBf16_6WayGemm

func (x *DebugOptions) GetXlaGpuEnableBf16_6WayGemm() bool

func (*DebugOptions) GetXlaGpuEnableCommandBuffer

func (x *DebugOptions) GetXlaGpuEnableCommandBuffer() []DebugOptions_CommandBufferCmdType

func (*DebugOptions) GetXlaGpuEnableCubRadixSort

func (x *DebugOptions) GetXlaGpuEnableCubRadixSort() bool

func (*DebugOptions) GetXlaGpuEnableCublaslt

func (x *DebugOptions) GetXlaGpuEnableCublaslt() bool

func (*DebugOptions) GetXlaGpuEnableCudnnFmha

func (x *DebugOptions) GetXlaGpuEnableCudnnFmha() bool

func (*DebugOptions) GetXlaGpuEnableCudnnFrontend

func (x *DebugOptions) GetXlaGpuEnableCudnnFrontend() bool

func (*DebugOptions) GetXlaGpuEnableCudnnInt8X32ConvolutionReordering

func (x *DebugOptions) GetXlaGpuEnableCudnnInt8X32ConvolutionReordering() bool

func (*DebugOptions) GetXlaGpuEnableCudnnLayerNorm

func (x *DebugOptions) GetXlaGpuEnableCudnnLayerNorm() bool

func (*DebugOptions) GetXlaGpuEnableCustomFusions

func (x *DebugOptions) GetXlaGpuEnableCustomFusions() bool

func (*DebugOptions) GetXlaGpuEnableCustomFusionsRe

func (x *DebugOptions) GetXlaGpuEnableCustomFusionsRe() string

func (*DebugOptions) GetXlaGpuEnableDotStrengthReduction

func (x *DebugOptions) GetXlaGpuEnableDotStrengthReduction() bool

func (*DebugOptions) GetXlaGpuEnableDynamicSliceFusion

func (x *DebugOptions) GetXlaGpuEnableDynamicSliceFusion() bool

func (*DebugOptions) GetXlaGpuEnableFastMinMax

func (x *DebugOptions) GetXlaGpuEnableFastMinMax() bool

func (*DebugOptions) GetXlaGpuEnableHeuristicPassConfiguration added in v0.4.2

func (x *DebugOptions) GetXlaGpuEnableHeuristicPassConfiguration() bool

func (*DebugOptions) GetXlaGpuEnableHighestPriorityAsyncStream

func (x *DebugOptions) GetXlaGpuEnableHighestPriorityAsyncStream() bool

func (*DebugOptions) GetXlaGpuEnableHostMemoryOffloading

func (x *DebugOptions) GetXlaGpuEnableHostMemoryOffloading() bool

func (*DebugOptions) GetXlaGpuEnableLatencyHidingScheduler

func (x *DebugOptions) GetXlaGpuEnableLatencyHidingScheduler() bool

func (*DebugOptions) GetXlaGpuEnableLibnvptxcompiler

func (x *DebugOptions) GetXlaGpuEnableLibnvptxcompiler() bool

func (*DebugOptions) GetXlaGpuEnableLlvmModuleCompilationParallelism

func (x *DebugOptions) GetXlaGpuEnableLlvmModuleCompilationParallelism() bool

func (*DebugOptions) GetXlaGpuEnableNcclCliqueOptimization

func (x *DebugOptions) GetXlaGpuEnableNcclCliqueOptimization() bool

func (*DebugOptions) GetXlaGpuEnableNcclCommSplitting

func (x *DebugOptions) GetXlaGpuEnableNcclCommSplitting() bool

func (*DebugOptions) GetXlaGpuEnableNcclPerStreamComms

func (x *DebugOptions) GetXlaGpuEnableNcclPerStreamComms() bool

func (*DebugOptions) GetXlaGpuEnableNcclUserBuffers

func (x *DebugOptions) GetXlaGpuEnableNcclUserBuffers() bool

func (*DebugOptions) GetXlaGpuEnablePipelinedAllGather

func (x *DebugOptions) GetXlaGpuEnablePipelinedAllGather() bool

func (*DebugOptions) GetXlaGpuEnablePipelinedAllReduce

func (x *DebugOptions) GetXlaGpuEnablePipelinedAllReduce() bool

func (*DebugOptions) GetXlaGpuEnablePipelinedCollectives

func (x *DebugOptions) GetXlaGpuEnablePipelinedCollectives() bool

func (*DebugOptions) GetXlaGpuEnablePipelinedP2P

func (x *DebugOptions) GetXlaGpuEnablePipelinedP2P() bool

func (*DebugOptions) GetXlaGpuEnablePipelinedReduceScatter

func (x *DebugOptions) GetXlaGpuEnablePipelinedReduceScatter() bool

func (*DebugOptions) GetXlaGpuEnableReassociationForConvertedAr

func (x *DebugOptions) GetXlaGpuEnableReassociationForConvertedAr() bool

func (*DebugOptions) GetXlaGpuEnableReduceScatterCombineByDim

func (x *DebugOptions) GetXlaGpuEnableReduceScatterCombineByDim() bool

func (*DebugOptions) GetXlaGpuEnableReductionEpilogueFusion

func (x *DebugOptions) GetXlaGpuEnableReductionEpilogueFusion() bool

func (*DebugOptions) GetXlaGpuEnableScatterDeterminismExpander added in v0.4.9

func (x *DebugOptions) GetXlaGpuEnableScatterDeterminismExpander() bool

func (*DebugOptions) GetXlaGpuEnableSharedConstants

func (x *DebugOptions) GetXlaGpuEnableSharedConstants() bool

func (*DebugOptions) GetXlaGpuEnableSplitKAutotuning

func (x *DebugOptions) GetXlaGpuEnableSplitKAutotuning() bool

func (*DebugOptions) GetXlaGpuEnableTritonGemm

func (x *DebugOptions) GetXlaGpuEnableTritonGemm() bool

func (*DebugOptions) GetXlaGpuEnableTritonHopper

func (x *DebugOptions) GetXlaGpuEnableTritonHopper() bool

func (*DebugOptions) GetXlaGpuEnableWhileLoopDoubleBuffering

func (x *DebugOptions) GetXlaGpuEnableWhileLoopDoubleBuffering() bool

func (*DebugOptions) GetXlaGpuEnableWhileLoopReduceScatterCodeMotion

func (x *DebugOptions) GetXlaGpuEnableWhileLoopReduceScatterCodeMotion() bool

func (*DebugOptions) GetXlaGpuEnableWhileLoopUnrolling

func (x *DebugOptions) GetXlaGpuEnableWhileLoopUnrolling() DebugOptions_WhileLoopUnrolling

func (*DebugOptions) GetXlaGpuEnsureMinorDotContractionDims

func (x *DebugOptions) GetXlaGpuEnsureMinorDotContractionDims() bool

func (*DebugOptions) GetXlaGpuExcludeNondeterministicOps

func (x *DebugOptions) GetXlaGpuExcludeNondeterministicOps() bool

func (*DebugOptions) GetXlaGpuExecutableTerminateTimeoutSeconds added in v0.4.2

func (x *DebugOptions) GetXlaGpuExecutableTerminateTimeoutSeconds() int32

func (*DebugOptions) GetXlaGpuExecutableWarnStuckTimeoutSeconds added in v0.4.2

func (x *DebugOptions) GetXlaGpuExecutableWarnStuckTimeoutSeconds() int32

func (*DebugOptions) GetXlaGpuExhaustiveTilingSearch

func (x *DebugOptions) GetXlaGpuExhaustiveTilingSearch() bool

func (*DebugOptions) GetXlaGpuExperimentalAutotuneCacheMode added in v0.4.0

func (x *DebugOptions) GetXlaGpuExperimentalAutotuneCacheMode() DebugOptions_AutotuneCacheMode

func (*DebugOptions) GetXlaGpuExperimentalDisableBinaryLibraries added in v0.4.2

func (x *DebugOptions) GetXlaGpuExperimentalDisableBinaryLibraries() bool

func (*DebugOptions) GetXlaGpuExperimentalDumpFdoProfiles added in v0.4.7

func (x *DebugOptions) GetXlaGpuExperimentalDumpFdoProfiles() bool

func (*DebugOptions) GetXlaGpuExperimentalEnableFusionBlockLevelRewriter added in v0.4.7

func (x *DebugOptions) GetXlaGpuExperimentalEnableFusionBlockLevelRewriter() bool

func (*DebugOptions) GetXlaGpuExperimentalEnableTritonHerolessPriorityFusion added in v0.4.7

func (x *DebugOptions) GetXlaGpuExperimentalEnableTritonHerolessPriorityFusion() bool

func (*DebugOptions) GetXlaGpuExperimentalEnableTritonSoftmaxPriorityFusion added in v0.4.2

func (x *DebugOptions) GetXlaGpuExperimentalEnableTritonSoftmaxPriorityFusion() bool

func (*DebugOptions) GetXlaGpuExperimentalParallelCollectiveOverlapLimit added in v0.4.7

func (x *DebugOptions) GetXlaGpuExperimentalParallelCollectiveOverlapLimit() int32

func (*DebugOptions) GetXlaGpuExperimentalStreamAnnotation added in v0.4.7

func (x *DebugOptions) GetXlaGpuExperimentalStreamAnnotation() bool

func (*DebugOptions) GetXlaGpuFilterKernelsSpillingRegistersOnAutotuning

func (x *DebugOptions) GetXlaGpuFilterKernelsSpillingRegistersOnAutotuning() bool

func (*DebugOptions) GetXlaGpuForceCompilationParallelism

func (x *DebugOptions) GetXlaGpuForceCompilationParallelism() int32

func (*DebugOptions) GetXlaGpuForceConvNchw

func (x *DebugOptions) GetXlaGpuForceConvNchw() bool

func (*DebugOptions) GetXlaGpuForceConvNhwc

func (x *DebugOptions) GetXlaGpuForceConvNhwc() bool

func (*DebugOptions) GetXlaGpuFtz

func (x *DebugOptions) GetXlaGpuFtz() bool

func (*DebugOptions) GetXlaGpuFusedAttentionUseCudnnRng

func (x *DebugOptions) GetXlaGpuFusedAttentionUseCudnnRng() bool

func (*DebugOptions) GetXlaGpuGemmRewriteSizeThreshold

func (x *DebugOptions) GetXlaGpuGemmRewriteSizeThreshold() int64

func (*DebugOptions) GetXlaGpuGraphEnableConcurrentRegion

func (x *DebugOptions) GetXlaGpuGraphEnableConcurrentRegion() bool

func (*DebugOptions) GetXlaGpuGraphMinGraphSize

func (x *DebugOptions) GetXlaGpuGraphMinGraphSize() int32

func (*DebugOptions) GetXlaGpuKernelCacheFile

func (x *DebugOptions) GetXlaGpuKernelCacheFile() string

func (*DebugOptions) GetXlaGpuLhsEnableGpuAsyncTracker

func (x *DebugOptions) GetXlaGpuLhsEnableGpuAsyncTracker() bool

func (*DebugOptions) GetXlaGpuLibnvjitlinkMode added in v0.4.9

func (x *DebugOptions) GetXlaGpuLibnvjitlinkMode() DebugOptions_LibNvJitLinkMode

func (*DebugOptions) GetXlaGpuLlvmIrFile

func (x *DebugOptions) GetXlaGpuLlvmIrFile() []string

func (*DebugOptions) GetXlaGpuLlvmVerificationLevel

func (x *DebugOptions) GetXlaGpuLlvmVerificationLevel() int32

func (*DebugOptions) GetXlaGpuLoadAutotuneResultsFrom

func (x *DebugOptions) GetXlaGpuLoadAutotuneResultsFrom() string

func (*DebugOptions) GetXlaGpuMemoryLimitSlopFactor

func (x *DebugOptions) GetXlaGpuMemoryLimitSlopFactor() int32

func (*DebugOptions) GetXlaGpuMockCustomCalls

func (x *DebugOptions) GetXlaGpuMockCustomCalls() bool

func (*DebugOptions) GetXlaGpuMultiStreamedWindowedEinsum

func (x *DebugOptions) GetXlaGpuMultiStreamedWindowedEinsum() bool

func (*DebugOptions) GetXlaGpuNcclCollectiveMaxNchannels

func (x *DebugOptions) GetXlaGpuNcclCollectiveMaxNchannels() int64

func (*DebugOptions) GetXlaGpuNcclP2PMaxNchannels

func (x *DebugOptions) GetXlaGpuNcclP2PMaxNchannels() int64

func (*DebugOptions) GetXlaGpuNcclTerminateOnError

func (x *DebugOptions) GetXlaGpuNcclTerminateOnError() bool

func (*DebugOptions) GetXlaGpuNcclTerminationTimeoutSeconds

func (x *DebugOptions) GetXlaGpuNcclTerminationTimeoutSeconds() int64

func (*DebugOptions) GetXlaGpuOperandBytesThresholdForWindowedEinsum added in v0.4.7

func (x *DebugOptions) GetXlaGpuOperandBytesThresholdForWindowedEinsum() int64

func (*DebugOptions) GetXlaGpuOverrideGemmAutotuner

func (x *DebugOptions) GetXlaGpuOverrideGemmAutotuner() string

func (*DebugOptions) GetXlaGpuPerFusionAutotuneCacheDir

func (x *DebugOptions) GetXlaGpuPerFusionAutotuneCacheDir() string

func (*DebugOptions) GetXlaGpuPgleAccuracyChecker added in v0.4.7

func (x *DebugOptions) GetXlaGpuPgleAccuracyChecker() DebugOptions_PGLEStrictnessLevel

func (*DebugOptions) GetXlaGpuPgleProfileFileOrDirectoryPath

func (x *DebugOptions) GetXlaGpuPgleProfileFileOrDirectoryPath() string

func (*DebugOptions) GetXlaGpuPtxFile

func (x *DebugOptions) GetXlaGpuPtxFile() []string

func (*DebugOptions) GetXlaGpuReduceScatterCombineThresholdBytes

func (x *DebugOptions) GetXlaGpuReduceScatterCombineThresholdBytes() int64

func (*DebugOptions) GetXlaGpuRedzonePaddingBytes

func (x *DebugOptions) GetXlaGpuRedzonePaddingBytes() int64

func (*DebugOptions) GetXlaGpuRedzoneScratchMaxMegabytes

func (x *DebugOptions) GetXlaGpuRedzoneScratchMaxMegabytes() int64

func (*DebugOptions) GetXlaGpuRequireCompleteAotAutotuneResults

func (x *DebugOptions) GetXlaGpuRequireCompleteAotAutotuneResults() bool

func (*DebugOptions) GetXlaGpuShapeChecks

func (x *DebugOptions) GetXlaGpuShapeChecks() DebugOptions_ShapeChecks

func (*DebugOptions) GetXlaGpuShardAutotuning

func (x *DebugOptions) GetXlaGpuShardAutotuning() bool

func (*DebugOptions) GetXlaGpuStrictConvAlgorithmPicker

func (x *DebugOptions) GetXlaGpuStrictConvAlgorithmPicker() bool

func (*DebugOptions) GetXlaGpuTargetConfigFilename

func (x *DebugOptions) GetXlaGpuTargetConfigFilename() string

func (*DebugOptions) GetXlaGpuTempBufferUseSeparateColor

func (x *DebugOptions) GetXlaGpuTempBufferUseSeparateColor() bool

func (*DebugOptions) GetXlaGpuThresholdForWindowedEinsumMib

func (x *DebugOptions) GetXlaGpuThresholdForWindowedEinsumMib() int64

func (*DebugOptions) GetXlaGpuTritonFusionLevel

func (x *DebugOptions) GetXlaGpuTritonFusionLevel() int32

func (*DebugOptions) GetXlaGpuTritonGemmAny

func (x *DebugOptions) GetXlaGpuTritonGemmAny() bool

func (*DebugOptions) GetXlaGpuTritonGemmDisableReducedPrecisionReduction

func (x *DebugOptions) GetXlaGpuTritonGemmDisableReducedPrecisionReduction() bool

func (*DebugOptions) GetXlaGpuUnsafeFallbackToDriverOnPtxasNotFound

func (x *DebugOptions) GetXlaGpuUnsafeFallbackToDriverOnPtxasNotFound() bool

func (*DebugOptions) GetXlaGpuUnsafePipelinedLoopAnnotator

func (x *DebugOptions) GetXlaGpuUnsafePipelinedLoopAnnotator() bool

func (*DebugOptions) GetXlaGpuUnsupportedEnableTritonGemm

func (x *DebugOptions) GetXlaGpuUnsupportedEnableTritonGemm() bool

func (*DebugOptions) GetXlaGpuUseMemcpyLocalP2P

func (x *DebugOptions) GetXlaGpuUseMemcpyLocalP2P() bool

func (*DebugOptions) GetXlaGpuUseRuntimeFusion

func (x *DebugOptions) GetXlaGpuUseRuntimeFusion() bool

func (*DebugOptions) GetXlaGpuVerifyTritonFusionNumerics

func (x *DebugOptions) GetXlaGpuVerifyTritonFusionNumerics() bool

func (*DebugOptions) GetXlaHloEvaluatorUseFastPath

func (x *DebugOptions) GetXlaHloEvaluatorUseFastPath() bool

func (*DebugOptions) GetXlaHloGraphAddresses

func (x *DebugOptions) GetXlaHloGraphAddresses() bool

func (*DebugOptions) GetXlaHloGraphShardingColor

func (x *DebugOptions) GetXlaHloGraphShardingColor() bool

func (*DebugOptions) GetXlaHloProfile

func (x *DebugOptions) GetXlaHloProfile() bool

func (*DebugOptions) GetXlaLlvmDisableExpensivePasses

func (x *DebugOptions) GetXlaLlvmDisableExpensivePasses() bool

func (*DebugOptions) GetXlaLlvmEnableAliasScopeMetadata

func (x *DebugOptions) GetXlaLlvmEnableAliasScopeMetadata() bool

func (*DebugOptions) GetXlaLlvmEnableInvariantLoadMetadata

func (x *DebugOptions) GetXlaLlvmEnableInvariantLoadMetadata() bool

func (*DebugOptions) GetXlaLlvmEnableNoaliasMetadata

func (x *DebugOptions) GetXlaLlvmEnableNoaliasMetadata() bool

func (*DebugOptions) GetXlaLlvmForceInlineBeforeSplit

func (x *DebugOptions) GetXlaLlvmForceInlineBeforeSplit() bool

func (*DebugOptions) GetXlaMultiheapSizeConstraintPerHeap

func (x *DebugOptions) GetXlaMultiheapSizeConstraintPerHeap() int32

func (*DebugOptions) GetXlaPartitioningAlgorithm

func (x *DebugOptions) GetXlaPartitioningAlgorithm() DebugOptions_PartitioningAlgorithm

func (*DebugOptions) GetXlaPjrtAllowAutoLayoutInHlo added in v0.4.9

func (x *DebugOptions) GetXlaPjrtAllowAutoLayoutInHlo() bool

func (*DebugOptions) GetXlaReduceWindowRewriteBaseLength

func (x *DebugOptions) GetXlaReduceWindowRewriteBaseLength() int64

func (*DebugOptions) GetXlaStepMarkerLocation

func (x *DebugOptions) GetXlaStepMarkerLocation() DebugOptions_StepMarkerLocation

func (*DebugOptions) GetXlaSyntaxSugarAsyncOps

func (x *DebugOptions) GetXlaSyntaxSugarAsyncOps() bool

func (*DebugOptions) GetXlaTestAllInputLayouts

func (x *DebugOptions) GetXlaTestAllInputLayouts() bool

func (*DebugOptions) GetXlaTestAllOutputLayouts

func (x *DebugOptions) GetXlaTestAllOutputLayouts() bool

func (*DebugOptions) GetXlaTpuDetectInf

func (x *DebugOptions) GetXlaTpuDetectInf() bool

func (*DebugOptions) GetXlaTpuDetectNan

func (x *DebugOptions) GetXlaTpuDetectNan() bool

func (*DebugOptions) ProtoMessage

func (*DebugOptions) ProtoMessage()

func (*DebugOptions) ProtoReflect

func (x *DebugOptions) ProtoReflect() protoreflect.Message

func (*DebugOptions) Reset

func (x *DebugOptions) Reset()

func (*DebugOptions) String

func (x *DebugOptions) String() string

type DebugOptions_AutotuneCacheMode added in v0.4.0

type DebugOptions_AutotuneCacheMode int32
const (
	DebugOptions_AUTOTUNE_CACHE_MODE_UNSPECIFIED DebugOptions_AutotuneCacheMode = 0
	// If the cache exists per fusion autotuner loads it and terminates,
	// otherwise runs autotuner and dumps the result.
	DebugOptions_AUTOTUNE_CACHE_MODE_UPDATE DebugOptions_AutotuneCacheMode = 1
	// Sets readonly access to the cache for the per fusion autotuner. Same as
	// above, but doesn't dump anything.
	DebugOptions_AUTOTUNE_CACHE_MODE_READ DebugOptions_AutotuneCacheMode = 2
)

func (DebugOptions_AutotuneCacheMode) Descriptor added in v0.4.0

func (DebugOptions_AutotuneCacheMode) Enum added in v0.4.0

func (DebugOptions_AutotuneCacheMode) EnumDescriptor deprecated added in v0.4.0

func (DebugOptions_AutotuneCacheMode) EnumDescriptor() ([]byte, []int)

Deprecated: Use DebugOptions_AutotuneCacheMode.Descriptor instead.

func (DebugOptions_AutotuneCacheMode) Number added in v0.4.0

func (DebugOptions_AutotuneCacheMode) String added in v0.4.0

func (DebugOptions_AutotuneCacheMode) Type added in v0.4.0

type DebugOptions_CollectiveOpType

type DebugOptions_CollectiveOpType int32

Enum to define all collective ops that xla supports.

const (
	DebugOptions_NOOP                DebugOptions_CollectiveOpType = 0
	DebugOptions_ALLREDUCE           DebugOptions_CollectiveOpType = 1
	DebugOptions_ALLGATHER           DebugOptions_CollectiveOpType = 2
	DebugOptions_REDUCESCATTER       DebugOptions_CollectiveOpType = 3
	DebugOptions_COLLECTIVEBROADCAST DebugOptions_CollectiveOpType = 4
	DebugOptions_ALLTOALL            DebugOptions_CollectiveOpType = 5
	DebugOptions_COLLECTIVEPERMUTE   DebugOptions_CollectiveOpType = 6
)

func (DebugOptions_CollectiveOpType) Descriptor

func (DebugOptions_CollectiveOpType) Enum

func (DebugOptions_CollectiveOpType) EnumDescriptor deprecated

func (DebugOptions_CollectiveOpType) EnumDescriptor() ([]byte, []int)

Deprecated: Use DebugOptions_CollectiveOpType.Descriptor instead.

func (DebugOptions_CollectiveOpType) Number

func (DebugOptions_CollectiveOpType) String

func (DebugOptions_CollectiveOpType) Type

type DebugOptions_CommandBufferCmdType

type DebugOptions_CommandBufferCmdType int32

Commands are categorized into 5 types: FUSION represents regular fusion kernels. CUBLAS/CUBLASLT, CUDNN, and COLLECTIVES represent library calls. CONDITIONALS represents control flow.

const (
	DebugOptions_INVALID       DebugOptions_CommandBufferCmdType = 0
	DebugOptions_FUSION        DebugOptions_CommandBufferCmdType = 1
	DebugOptions_CUBLAS        DebugOptions_CommandBufferCmdType = 2
	DebugOptions_CUDNN         DebugOptions_CommandBufferCmdType = 3
	DebugOptions_COLLECTIVES   DebugOptions_CommandBufferCmdType = 4
	DebugOptions_CONDITIONALS  DebugOptions_CommandBufferCmdType = 5
	DebugOptions_CUSTOM_CALL   DebugOptions_CommandBufferCmdType = 6
	DebugOptions_CUBLASLT      DebugOptions_CommandBufferCmdType = 7
	DebugOptions_DYNAMIC_SLICE DebugOptions_CommandBufferCmdType = 8
)

func (DebugOptions_CommandBufferCmdType) Descriptor

func (DebugOptions_CommandBufferCmdType) Enum

func (DebugOptions_CommandBufferCmdType) EnumDescriptor deprecated

func (DebugOptions_CommandBufferCmdType) EnumDescriptor() ([]byte, []int)

Deprecated: Use DebugOptions_CommandBufferCmdType.Descriptor instead.

func (DebugOptions_CommandBufferCmdType) Number

func (DebugOptions_CommandBufferCmdType) String

func (DebugOptions_CommandBufferCmdType) Type

type DebugOptions_LibNvJitLinkMode added in v0.4.9

type DebugOptions_LibNvJitLinkMode int32
const (
	// LibNvJitLink is used if it is available and no buggy version has been
	// detected.
	DebugOptions_LIB_NV_JIT_LINK_MODE_AUTO DebugOptions_LibNvJitLinkMode = 0
	// LibNvJitLink is never used.
	DebugOptions_LIB_NV_JIT_LINK_MODE_DISABLED DebugOptions_LibNvJitLinkMode = 1
	// LibNvJitLink is used always. If it is not available, compilation will
	// fail.
	DebugOptions_LIB_NV_JIT_LINK_MODE_ENABLED DebugOptions_LibNvJitLinkMode = 2
)

func (DebugOptions_LibNvJitLinkMode) Descriptor added in v0.4.9

func (DebugOptions_LibNvJitLinkMode) Enum added in v0.4.9

func (DebugOptions_LibNvJitLinkMode) EnumDescriptor deprecated added in v0.4.9

func (DebugOptions_LibNvJitLinkMode) EnumDescriptor() ([]byte, []int)

Deprecated: Use DebugOptions_LibNvJitLinkMode.Descriptor instead.

func (DebugOptions_LibNvJitLinkMode) Number added in v0.4.9

func (DebugOptions_LibNvJitLinkMode) String added in v0.4.9

func (DebugOptions_LibNvJitLinkMode) Type added in v0.4.9

type DebugOptions_PGLEStrictnessLevel added in v0.4.7

type DebugOptions_PGLEStrictnessLevel int32

Enables strict PGLE checking. If an FDO profile is specified and latency hiding scheduler encounters missing instructions in the profile compilation will halt or warn depending on the value of this option.

const (
	DebugOptions_PGLE_STRICTNESS_LEVEL_OFF   DebugOptions_PGLEStrictnessLevel = 0
	DebugOptions_PGLE_STRICTNESS_LEVEL_WARN  DebugOptions_PGLEStrictnessLevel = 1
	DebugOptions_PGLE_STRICTNESS_LEVEL_ERROR DebugOptions_PGLEStrictnessLevel = 2
)

func (DebugOptions_PGLEStrictnessLevel) Descriptor added in v0.4.7

func (DebugOptions_PGLEStrictnessLevel) Enum added in v0.4.7

func (DebugOptions_PGLEStrictnessLevel) EnumDescriptor deprecated added in v0.4.7

func (DebugOptions_PGLEStrictnessLevel) EnumDescriptor() ([]byte, []int)

Deprecated: Use DebugOptions_PGLEStrictnessLevel.Descriptor instead.

func (DebugOptions_PGLEStrictnessLevel) Number added in v0.4.7

func (DebugOptions_PGLEStrictnessLevel) String added in v0.4.7

func (DebugOptions_PGLEStrictnessLevel) Type added in v0.4.7

type DebugOptions_PartitioningAlgorithm

type DebugOptions_PartitioningAlgorithm int32
const (
	DebugOptions_PARTITIONING_ALGORITHM_NOOP DebugOptions_PartitioningAlgorithm = 0
	DebugOptions_PARTITIONING_ALGORITHM_EXP0 DebugOptions_PartitioningAlgorithm = 1
	DebugOptions_PARTITIONING_ALGORITHM_EXP1 DebugOptions_PartitioningAlgorithm = 2
	DebugOptions_PARTITIONING_ALGORITHM_EXP2 DebugOptions_PartitioningAlgorithm = 3
)

func (DebugOptions_PartitioningAlgorithm) Descriptor

func (DebugOptions_PartitioningAlgorithm) Enum

func (DebugOptions_PartitioningAlgorithm) EnumDescriptor deprecated

func (DebugOptions_PartitioningAlgorithm) EnumDescriptor() ([]byte, []int)

Deprecated: Use DebugOptions_PartitioningAlgorithm.Descriptor instead.

func (DebugOptions_PartitioningAlgorithm) Number

func (DebugOptions_PartitioningAlgorithm) String

func (DebugOptions_PartitioningAlgorithm) Type

type DebugOptions_ShapeChecks

type DebugOptions_ShapeChecks int32
const (
	// Do not insert any shape checks for dynamically shaped operations; output
	// buffers might contain garbage data if shapes don't match.
	DebugOptions_IGNORE DebugOptions_ShapeChecks = 0
	// Check shapes at runtime, will insert an extra synchronization if shapes
	// cannot be proven correct at compile time.
	DebugOptions_RUNTIME DebugOptions_ShapeChecks = 1
	// Will refuse to compile any program where shape correctness can not be
	// established at compile time.
	DebugOptions_COMPILE_TIME DebugOptions_ShapeChecks = 2
)

func (DebugOptions_ShapeChecks) Descriptor

func (DebugOptions_ShapeChecks) Enum

func (DebugOptions_ShapeChecks) EnumDescriptor deprecated

func (DebugOptions_ShapeChecks) EnumDescriptor() ([]byte, []int)

Deprecated: Use DebugOptions_ShapeChecks.Descriptor instead.

func (DebugOptions_ShapeChecks) Number

func (DebugOptions_ShapeChecks) String

func (x DebugOptions_ShapeChecks) String() string

func (DebugOptions_ShapeChecks) Type

type DebugOptions_StepMarkerLocation

type DebugOptions_StepMarkerLocation int32
const (
	// Generate a step marker at the program entry. This handles the case where
	// each step is done by one or multiple program execution(s). Only the first
	// program will be tagged for generating a step marker at the program entry.
	// This is the default.
	DebugOptions_STEP_MARK_AT_ENTRY DebugOptions_StepMarkerLocation = 0
	// Generate a step marker at each iteration of the top level while loop,
	// which is assumed to be a training loop.
	DebugOptions_STEP_MARK_AT_TOP_LEVEL_WHILE_LOOP DebugOptions_StepMarkerLocation = 1
	// Generate a step marker at each iteration of the second level while loops,
	// which is assumed to be a training or eval loop.
	DebugOptions_STEP_MARK_AT_SECOND_LEVEL_WHILE_LOOP DebugOptions_StepMarkerLocation = 3
	// No step marker generated.
	DebugOptions_STEP_MARK_NONE DebugOptions_StepMarkerLocation = 2
)

func (DebugOptions_StepMarkerLocation) Descriptor

func (DebugOptions_StepMarkerLocation) Enum

func (DebugOptions_StepMarkerLocation) EnumDescriptor deprecated

func (DebugOptions_StepMarkerLocation) EnumDescriptor() ([]byte, []int)

Deprecated: Use DebugOptions_StepMarkerLocation.Descriptor instead.

func (DebugOptions_StepMarkerLocation) Number

func (DebugOptions_StepMarkerLocation) String

func (DebugOptions_StepMarkerLocation) Type

type DebugOptions_WhileLoopUnrolling

type DebugOptions_WhileLoopUnrolling int32
const (
	DebugOptions_WHILE_LOOP_UNROLLING_NO_UNROLL DebugOptions_WhileLoopUnrolling = 0
	// Has the same effect as setting
	// `xla_gpu_enable_while_loop_double_buffering`.
	DebugOptions_WHILE_LOOP_UNROLLING_DOUBLE_BUFFER DebugOptions_WhileLoopUnrolling = 1
	// Enables full loop unrolling using the same strategy as `DOUBLE_BUFFER`.
	DebugOptions_WHILE_LOOP_UNROLLING_FULL_UNROLL DebugOptions_WhileLoopUnrolling = 2
	// Enables loop unrolling when we have at least one collective within a
	// while loop.
	DebugOptions_WHILE_LOOP_UNROLLING_AUTO_UNROLL DebugOptions_WhileLoopUnrolling = 3
)

func (DebugOptions_WhileLoopUnrolling) Descriptor

func (DebugOptions_WhileLoopUnrolling) Enum

func (DebugOptions_WhileLoopUnrolling) EnumDescriptor deprecated

func (DebugOptions_WhileLoopUnrolling) EnumDescriptor() ([]byte, []int)

Deprecated: Use DebugOptions_WhileLoopUnrolling.Descriptor instead.

func (DebugOptions_WhileLoopUnrolling) Number

func (DebugOptions_WhileLoopUnrolling) String

func (DebugOptions_WhileLoopUnrolling) Type

type ExecutionOptions

type ExecutionOptions struct {

	// This optional field's layout is used as a hint when storing the output of
	// this computation.  Subsequent transfers of this output array to the client
	// may be faster when using this layout.
	//
	// We use a Shape here to accommodate computations that return a tuple.
	ShapeWithOutputLayout *xla_data.ShapeProto `` /* 128-byte string literal not displayed */
	// Used to seed random-number generators used in this computation.  If this is
	// 0, we generate a seed ourselves.
	//
	// TODO(b/32083678): Changing the seed unnecessarily forces a recompilation.
	Seed         uint64        `protobuf:"varint,3,opt,name=seed,proto3" json:"seed,omitempty"`
	DebugOptions *DebugOptions `protobuf:"bytes,4,opt,name=debug_options,json=debugOptions,proto3" json:"debug_options,omitempty"`
	// This optional field specifies a particular set of devices to run the
	// computation on. The computation will be partitioned across these devices.
	// If not provided, the default device will be chosen.
	DeviceHandles []*xla_data.DeviceHandle `protobuf:"bytes,5,rep,name=device_handles,json=deviceHandles,proto3" json:"device_handles,omitempty"`
	// Number of replicas of the computation to run. If zero, uses the default
	// number of replicas for the XLA service.
	NumReplicas int32 `protobuf:"varint,6,opt,name=num_replicas,json=numReplicas,proto3" json:"num_replicas,omitempty"`
	// This optional field specifies the device assignment if known at compile
	// time.
	DeviceAssignment *xla_data.DeviceAssignmentProto `protobuf:"bytes,7,opt,name=device_assignment,json=deviceAssignment,proto3" json:"device_assignment,omitempty"`
	// Alias input and output buffers for parameters that are passed-through XLA
	// modules without being changed.
	AliasPassthroughParams bool `` /* 130-byte string literal not displayed */
	// Number of partitions of the computation to run (model parallelism).
	// If zero, uses the default number of partitions for the XLA service.
	NumPartitions int32 `protobuf:"varint,9,opt,name=num_partitions,json=numPartitions,proto3" json:"num_partitions,omitempty"`
	// Used to identify a set of programs that should be launch together.
	LaunchId int32 `protobuf:"varint,10,opt,name=launch_id,json=launchId,proto3" json:"launch_id,omitempty"`
	// Indicates whether to use SPMD (true) or MPMD (false) partitioning when
	// num_partitions > 1 and XLA is requested to partition the input program.
	UseSpmdPartitioning bool `protobuf:"varint,11,opt,name=use_spmd_partitioning,json=useSpmdPartitioning,proto3" json:"use_spmd_partitioning,omitempty"`
	// Whether to automatically generate XLA shardings for SPMD partitioner.
	UseAutoSpmdPartitioning bool `` /* 136-byte string literal not displayed */
	// Device mesh shape used to create the sharding search space when
	// use_auto_spmd_partitioning=true.
	AutoSpmdPartitioningMeshShape []int64 `` /* 163-byte string literal not displayed */
	// Device mesh ids compatible with the above mesh_shape used when
	// use_auto_spmd_partitioning=true.
	AutoSpmdPartitioningMeshIds []int64 `` /* 157-byte string literal not displayed */
	// The amount of effort to spend on optimizing for minimizing program
	// execution time, as a value in [-1.0, +1.0]. The baseline is 0.0, which
	// strongly prioritizes execution time at the cost of longer compile times,
	// suitable for production workloads. A value of -0.5 would be appropriate for
	// research use cases that prefer faster compilations to iterate more quickly.
	// Positive values, on the other hand, might enable costly optimizations that
	// are off by default.
	ExecTimeOptimizationEffort float32 `` /* 146-byte string literal not displayed */
	// The amount of effort to spend on making the program fit in memory (where
	// "fit in memory" here has a backend-dependent meaning), as a value in
	// [-1.0,+1.0]. The baseline is 0.0, which expends significant effort on
	// attempting to make the program fit. A value of -1.0 would be appropriate
	// for use cases that wish to spend minimal effort here and fail as quickly as
	// possible instead. Positive values, on the other hand, might enable costly
	// algorithms to reduce memory usage that are off by default.
	MemoryFittingEffort float32 `protobuf:"fixed32,26,opt,name=memory_fitting_effort,json=memoryFittingEffort,proto3" json:"memory_fitting_effort,omitempty"`
	// If set, deduplicate hlo into function calls to reduce binary size. Only
	// works on TPU.
	DeduplicateHlo bool `protobuf:"varint,12,opt,name=deduplicate_hlo,json=deduplicateHlo,proto3" json:"deduplicate_hlo,omitempty"`
	// Allows sharding propagation to propagate to the parameters. This changes
	// the input shape of the computation (which is undesirable), but it can be
	// used to allow to run partial compilation to determine what would be the
	// input sharding of a computation if XLA would be allowed to propagate the
	// sharding which can be used by higher level framework as a way to query
	// intermediate sharding of operations when multiple computation would be
	// chained and merged together.
	// This is a vector of bool, because the user can control which parameters can
	// have the sharding substituted. If only one boolean value is passed in the
	// vector that is interpreted as the value to be applied for every parameter.
	AllowSpmdShardingPropagationToParameters []bool `` /* 198-byte string literal not displayed */
	// Allows sharding propagation to propagate to the outputs. This changes the
	// output shape of the computation (which is undesirable), but it can be used
	// to allow to run partial compilation to determine what would be the output
	// sharding of a computation if XLA would be allowed to propagate the sharding
	// which can be used by higher level framework as a way to query intermediate
	// sharding of operations when multiple computation would be chained and
	// merged together.
	// This is a vector of bool, because the user can control (if the output of
	// the computation is a tuple) which elements of the tuple can have the
	// sharding substituted and which don't. If only one boolean value is passed
	// in the vector that's interpreted as the value to be applied for every
	// single element of the output tuple. One value per element of the tuple
	// means that each value is attached to one of the output elements.
	AllowSpmdShardingPropagationToOutput []bool `` /* 186-byte string literal not displayed */
	// Whether to broadcast args across all replicas. One entry per arg.
	ParamRequiresBroadcastViaCollectives []bool `` /* 184-byte string literal not displayed */
	// If enabled, the compiler may generate sharding and unsharding programs as
	// separate HLO modules, and modify the main program's input and output to
	// be sharded.
	AllowSeparateShardingPrograms bool `` /* 154-byte string literal not displayed */
	// The list of input/output pairs in the main program that could be sharded.
	ShardableValueUpdatePairs []*ShardableValueUpdatePairProto `` /* 141-byte string literal not displayed */
	// Profiling data for feedback directed optimizations. Note that this is not
	// the only way to feed FDO data into the compiler and individual backends
	// may choose to get FDO data by other means.
	FdoProfile []byte `protobuf:"bytes,21,opt,name=fdo_profile,json=fdoProfile,proto3" json:"fdo_profile,omitempty"`
	// Amount of device memory available for the executable to use.
	DeviceMemorySize int64 `protobuf:"varint,22,opt,name=device_memory_size,json=deviceMemorySize,proto3" json:"device_memory_size,omitempty"`
	// Use Shardy, a new partitioner, to replace the existing
	// ShardingPropagation and SpmdPartitioner. See go/xla-sdy-pipeline for
	// details.
	UseShardyPartitioner bool `protobuf:"varint,24,opt,name=use_shardy_partitioner,json=useShardyPartitioner,proto3" json:"use_shardy_partitioner,omitempty"`
	// contains filtered or unexported fields
}

These settings control how XLA compiles and/or runs code. Not all settings will have an effect on every platform.

When adding new fields, keep in mind that boolean fields default to false. Next id: 27.

func (*ExecutionOptions) Descriptor deprecated

func (*ExecutionOptions) Descriptor() ([]byte, []int)

Deprecated: Use ExecutionOptions.ProtoReflect.Descriptor instead.

func (*ExecutionOptions) GetAliasPassthroughParams

func (x *ExecutionOptions) GetAliasPassthroughParams() bool

func (*ExecutionOptions) GetAllowSeparateShardingPrograms

func (x *ExecutionOptions) GetAllowSeparateShardingPrograms() bool

func (*ExecutionOptions) GetAllowSpmdShardingPropagationToOutput

func (x *ExecutionOptions) GetAllowSpmdShardingPropagationToOutput() []bool

func (*ExecutionOptions) GetAllowSpmdShardingPropagationToParameters

func (x *ExecutionOptions) GetAllowSpmdShardingPropagationToParameters() []bool

func (*ExecutionOptions) GetAutoSpmdPartitioningMeshIds

func (x *ExecutionOptions) GetAutoSpmdPartitioningMeshIds() []int64

func (*ExecutionOptions) GetAutoSpmdPartitioningMeshShape

func (x *ExecutionOptions) GetAutoSpmdPartitioningMeshShape() []int64

func (*ExecutionOptions) GetDebugOptions

func (x *ExecutionOptions) GetDebugOptions() *DebugOptions

func (*ExecutionOptions) GetDeduplicateHlo

func (x *ExecutionOptions) GetDeduplicateHlo() bool

func (*ExecutionOptions) GetDeviceAssignment

func (x *ExecutionOptions) GetDeviceAssignment() *xla_data.DeviceAssignmentProto

func (*ExecutionOptions) GetDeviceHandles

func (x *ExecutionOptions) GetDeviceHandles() []*xla_data.DeviceHandle

func (*ExecutionOptions) GetDeviceMemorySize

func (x *ExecutionOptions) GetDeviceMemorySize() int64

func (*ExecutionOptions) GetExecTimeOptimizationEffort added in v0.4.7

func (x *ExecutionOptions) GetExecTimeOptimizationEffort() float32

func (*ExecutionOptions) GetFdoProfile

func (x *ExecutionOptions) GetFdoProfile() []byte

func (*ExecutionOptions) GetLaunchId

func (x *ExecutionOptions) GetLaunchId() int32

func (*ExecutionOptions) GetMemoryFittingEffort added in v0.4.7

func (x *ExecutionOptions) GetMemoryFittingEffort() float32

func (*ExecutionOptions) GetNumPartitions

func (x *ExecutionOptions) GetNumPartitions() int32

func (*ExecutionOptions) GetNumReplicas

func (x *ExecutionOptions) GetNumReplicas() int32

func (*ExecutionOptions) GetParamRequiresBroadcastViaCollectives

func (x *ExecutionOptions) GetParamRequiresBroadcastViaCollectives() []bool

func (*ExecutionOptions) GetSeed

func (x *ExecutionOptions) GetSeed() uint64

func (*ExecutionOptions) GetShapeWithOutputLayout

func (x *ExecutionOptions) GetShapeWithOutputLayout() *xla_data.ShapeProto

func (*ExecutionOptions) GetShardableValueUpdatePairs

func (x *ExecutionOptions) GetShardableValueUpdatePairs() []*ShardableValueUpdatePairProto

func (*ExecutionOptions) GetUseAutoSpmdPartitioning

func (x *ExecutionOptions) GetUseAutoSpmdPartitioning() bool

func (*ExecutionOptions) GetUseShardyPartitioner

func (x *ExecutionOptions) GetUseShardyPartitioner() bool

func (*ExecutionOptions) GetUseSpmdPartitioning

func (x *ExecutionOptions) GetUseSpmdPartitioning() bool

func (*ExecutionOptions) ProtoMessage

func (*ExecutionOptions) ProtoMessage()

func (*ExecutionOptions) ProtoReflect

func (x *ExecutionOptions) ProtoReflect() protoreflect.Message

func (*ExecutionOptions) Reset

func (x *ExecutionOptions) Reset()

func (*ExecutionOptions) String

func (x *ExecutionOptions) String() string

type GpuCompilationEnvironment

type GpuCompilationEnvironment struct {

	// Temporary dummy flag is added to test the flow.
	// To be removed when we add flags here.
	DummyFlag int64 `protobuf:"varint,1,opt,name=dummy_flag,json=dummyFlag,proto3" json:"dummy_flag,omitempty"`
	// contains filtered or unexported fields
}

Contains flags which affects the GPU compilation result. These flags are part of Debug Options as of now, and will be migrated to this proto.

func (*GpuCompilationEnvironment) Descriptor deprecated

func (*GpuCompilationEnvironment) Descriptor() ([]byte, []int)

Deprecated: Use GpuCompilationEnvironment.ProtoReflect.Descriptor instead.

func (*GpuCompilationEnvironment) GetDummyFlag

func (x *GpuCompilationEnvironment) GetDummyFlag() int64

func (*GpuCompilationEnvironment) ProtoMessage

func (*GpuCompilationEnvironment) ProtoMessage()

func (*GpuCompilationEnvironment) ProtoReflect

func (*GpuCompilationEnvironment) Reset

func (x *GpuCompilationEnvironment) Reset()

func (*GpuCompilationEnvironment) String

func (x *GpuCompilationEnvironment) String() string

type HloModuleConfigProto

type HloModuleConfigProto struct {
	EntryComputationLayout               *xla_data.ProgramShapeProto     `` /* 129-byte string literal not displayed */
	Seed                                 uint64                          `protobuf:"varint,2,opt,name=seed,proto3" json:"seed,omitempty"`
	LaunchId                             int32                           `protobuf:"varint,3,opt,name=launch_id,json=launchId,proto3" json:"launch_id,omitempty"`
	ReplicaCount                         int64                           `protobuf:"varint,4,opt,name=replica_count,json=replicaCount,proto3" json:"replica_count,omitempty"`
	NumPartitions                        int64                           `protobuf:"varint,5,opt,name=num_partitions,json=numPartitions,proto3" json:"num_partitions,omitempty"`
	ParamRequiresBroadcastViaCollectives []bool                          `` /* 183-byte string literal not displayed */
	UseSpmdPartitioning                  bool                            `protobuf:"varint,7,opt,name=use_spmd_partitioning,json=useSpmdPartitioning,proto3" json:"use_spmd_partitioning,omitempty"`
	UseAutoSpmdPartitioning              bool                            `` /* 135-byte string literal not displayed */
	AutoSpmdPartitioningMeshShape        []int64                         `` /* 162-byte string literal not displayed */
	AutoSpmdPartitioningMeshIds          []int64                         `` /* 157-byte string literal not displayed */
	ExecTimeOptimizationEffort           float32                         `` /* 146-byte string literal not displayed */
	MemoryFittingEffort                  float32                         `protobuf:"fixed32,37,opt,name=memory_fitting_effort,json=memoryFittingEffort,proto3" json:"memory_fitting_effort,omitempty"`
	DeduplicateHlo                       bool                            `protobuf:"varint,11,opt,name=deduplicate_hlo,json=deduplicateHlo,proto3" json:"deduplicate_hlo,omitempty"`
	IntraOpParallelismThreads            int64                           `` /* 142-byte string literal not displayed */
	DeviceType                           string                          `protobuf:"bytes,13,opt,name=device_type,json=deviceType,proto3" json:"device_type,omitempty"`
	DebugOptions                         *DebugOptions                   `protobuf:"bytes,14,opt,name=debug_options,json=debugOptions,proto3" json:"debug_options,omitempty"`
	StaticDeviceAssignment               *xla_data.DeviceAssignmentProto `` /* 130-byte string literal not displayed */
	// The original device assignment before being changed by a simulator.
	// Simulators, like HybridSim, may change the device assignment to a smaller
	// topology, to make simulation easier.
	PreSimulationDeviceAssignment            *xla_data.DeviceAssignmentProto             `` /* 153-byte string literal not displayed */
	AllowSeparateShardingPrograms            bool                                        `` /* 154-byte string literal not displayed */
	ShardableValueUpdatePairs                []*ShardableValueUpdatePairProto            `` /* 141-byte string literal not displayed */
	AliasPassthroughParams                   bool                                        `` /* 131-byte string literal not displayed */
	ContentAwareComputationSorting           bool                                        `` /* 157-byte string literal not displayed */
	FusionConfigCollection                   HloModuleConfigProto_FusionConfigCollection `` /* 184-byte string literal not displayed */
	FusionConfig                             []*HloModuleConfigProto_BoolList            `protobuf:"bytes,20,rep,name=fusion_config,json=fusionConfig,proto3" json:"fusion_config,omitempty"`
	DotConfig                                map[string]*HloModuleConfigProto_Int64List  `` /* 177-byte string literal not displayed */
	LayoutConfig                             []*HloModuleConfigProto_Int64ListList       `protobuf:"bytes,22,rep,name=layout_config,json=layoutConfig,proto3" json:"layout_config,omitempty"`
	MemorySpaceAssignmentConfig              []uint64                                    `` /* 155-byte string literal not displayed */
	PhaseOrderingConfig                      []*HloModuleConfigProto_BoolList            `protobuf:"bytes,24,rep,name=phase_ordering_config,json=phaseOrderingConfig,proto3" json:"phase_ordering_config,omitempty"`
	PhaseIndex                               int32                                       `protobuf:"varint,25,opt,name=phase_index,json=phaseIndex,proto3" json:"phase_index,omitempty"`
	AllowSpmdShardingPropagationToParameters []bool                                      `` /* 198-byte string literal not displayed */
	AllowSpmdShardingPropagationToOutput     []bool                                      `` /* 186-byte string literal not displayed */
	AnalysisAllowanceMap                     map[string]int64                            `` /* 213-byte string literal not displayed */
	MatrixUnitOperandPrecision               xla_data.PrecisionConfig_Precision          `` /* 180-byte string literal not displayed */
	FdoProfile                               []byte                                      `protobuf:"bytes,31,opt,name=fdo_profile,json=fdoProfile,proto3" json:"fdo_profile,omitempty"`
	DeviceMemorySize                         int64                                       `protobuf:"varint,32,opt,name=device_memory_size,json=deviceMemorySize,proto3" json:"device_memory_size,omitempty"`
	UseShardyPartitioner                     bool                                        `protobuf:"varint,34,opt,name=use_shardy_partitioner,json=useShardyPartitioner,proto3" json:"use_shardy_partitioner,omitempty"`
	// contains filtered or unexported fields
}

Serialization of HloModuleConfig. See the C++ class definition for descriptions of each field. There are no guarantees of backwards or forwards compatibility. Next id: 38.

func (*HloModuleConfigProto) Descriptor deprecated

func (*HloModuleConfigProto) Descriptor() ([]byte, []int)

Deprecated: Use HloModuleConfigProto.ProtoReflect.Descriptor instead.

func (*HloModuleConfigProto) GetAliasPassthroughParams

func (x *HloModuleConfigProto) GetAliasPassthroughParams() bool

func (*HloModuleConfigProto) GetAllowSeparateShardingPrograms

func (x *HloModuleConfigProto) GetAllowSeparateShardingPrograms() bool

func (*HloModuleConfigProto) GetAllowSpmdShardingPropagationToOutput

func (x *HloModuleConfigProto) GetAllowSpmdShardingPropagationToOutput() []bool

func (*HloModuleConfigProto) GetAllowSpmdShardingPropagationToParameters

func (x *HloModuleConfigProto) GetAllowSpmdShardingPropagationToParameters() []bool

func (*HloModuleConfigProto) GetAnalysisAllowanceMap

func (x *HloModuleConfigProto) GetAnalysisAllowanceMap() map[string]int64

func (*HloModuleConfigProto) GetAutoSpmdPartitioningMeshIds

func (x *HloModuleConfigProto) GetAutoSpmdPartitioningMeshIds() []int64

func (*HloModuleConfigProto) GetAutoSpmdPartitioningMeshShape

func (x *HloModuleConfigProto) GetAutoSpmdPartitioningMeshShape() []int64

func (*HloModuleConfigProto) GetContentAwareComputationSorting

func (x *HloModuleConfigProto) GetContentAwareComputationSorting() bool

func (*HloModuleConfigProto) GetDebugOptions

func (x *HloModuleConfigProto) GetDebugOptions() *DebugOptions

func (*HloModuleConfigProto) GetDeduplicateHlo

func (x *HloModuleConfigProto) GetDeduplicateHlo() bool

func (*HloModuleConfigProto) GetDeviceMemorySize

func (x *HloModuleConfigProto) GetDeviceMemorySize() int64

func (*HloModuleConfigProto) GetDeviceType

func (x *HloModuleConfigProto) GetDeviceType() string

func (*HloModuleConfigProto) GetDotConfig

func (*HloModuleConfigProto) GetEntryComputationLayout

func (x *HloModuleConfigProto) GetEntryComputationLayout() *xla_data.ProgramShapeProto

func (*HloModuleConfigProto) GetExecTimeOptimizationEffort added in v0.4.7

func (x *HloModuleConfigProto) GetExecTimeOptimizationEffort() float32

func (*HloModuleConfigProto) GetFdoProfile

func (x *HloModuleConfigProto) GetFdoProfile() []byte

func (*HloModuleConfigProto) GetFusionConfig

func (x *HloModuleConfigProto) GetFusionConfig() []*HloModuleConfigProto_BoolList

func (*HloModuleConfigProto) GetFusionConfigCollection

func (*HloModuleConfigProto) GetIntraOpParallelismThreads

func (x *HloModuleConfigProto) GetIntraOpParallelismThreads() int64

func (*HloModuleConfigProto) GetLaunchId

func (x *HloModuleConfigProto) GetLaunchId() int32

func (*HloModuleConfigProto) GetLayoutConfig

func (*HloModuleConfigProto) GetMatrixUnitOperandPrecision

func (x *HloModuleConfigProto) GetMatrixUnitOperandPrecision() xla_data.PrecisionConfig_Precision

func (*HloModuleConfigProto) GetMemoryFittingEffort added in v0.4.7

func (x *HloModuleConfigProto) GetMemoryFittingEffort() float32

func (*HloModuleConfigProto) GetMemorySpaceAssignmentConfig

func (x *HloModuleConfigProto) GetMemorySpaceAssignmentConfig() []uint64

func (*HloModuleConfigProto) GetNumPartitions

func (x *HloModuleConfigProto) GetNumPartitions() int64

func (*HloModuleConfigProto) GetParamRequiresBroadcastViaCollectives

func (x *HloModuleConfigProto) GetParamRequiresBroadcastViaCollectives() []bool

func (*HloModuleConfigProto) GetPhaseIndex

func (x *HloModuleConfigProto) GetPhaseIndex() int32

func (*HloModuleConfigProto) GetPhaseOrderingConfig

func (x *HloModuleConfigProto) GetPhaseOrderingConfig() []*HloModuleConfigProto_BoolList

func (*HloModuleConfigProto) GetPreSimulationDeviceAssignment added in v0.4.2

func (x *HloModuleConfigProto) GetPreSimulationDeviceAssignment() *xla_data.DeviceAssignmentProto

func (*HloModuleConfigProto) GetReplicaCount

func (x *HloModuleConfigProto) GetReplicaCount() int64

func (*HloModuleConfigProto) GetSeed

func (x *HloModuleConfigProto) GetSeed() uint64

func (*HloModuleConfigProto) GetShardableValueUpdatePairs

func (x *HloModuleConfigProto) GetShardableValueUpdatePairs() []*ShardableValueUpdatePairProto

func (*HloModuleConfigProto) GetStaticDeviceAssignment

func (x *HloModuleConfigProto) GetStaticDeviceAssignment() *xla_data.DeviceAssignmentProto

func (*HloModuleConfigProto) GetUseAutoSpmdPartitioning

func (x *HloModuleConfigProto) GetUseAutoSpmdPartitioning() bool

func (*HloModuleConfigProto) GetUseShardyPartitioner

func (x *HloModuleConfigProto) GetUseShardyPartitioner() bool

func (*HloModuleConfigProto) GetUseSpmdPartitioning

func (x *HloModuleConfigProto) GetUseSpmdPartitioning() bool

func (*HloModuleConfigProto) ProtoMessage

func (*HloModuleConfigProto) ProtoMessage()

func (*HloModuleConfigProto) ProtoReflect

func (x *HloModuleConfigProto) ProtoReflect() protoreflect.Message

func (*HloModuleConfigProto) Reset

func (x *HloModuleConfigProto) Reset()

func (*HloModuleConfigProto) String

func (x *HloModuleConfigProto) String() string

type HloModuleConfigProto_BoolList

type HloModuleConfigProto_BoolList struct {
	Vals []bool `protobuf:"varint,1,rep,packed,name=vals,proto3" json:"vals,omitempty"`
	// contains filtered or unexported fields
}

func (*HloModuleConfigProto_BoolList) Descriptor deprecated

func (*HloModuleConfigProto_BoolList) Descriptor() ([]byte, []int)

Deprecated: Use HloModuleConfigProto_BoolList.ProtoReflect.Descriptor instead.

func (*HloModuleConfigProto_BoolList) GetVals

func (x *HloModuleConfigProto_BoolList) GetVals() []bool

func (*HloModuleConfigProto_BoolList) ProtoMessage

func (*HloModuleConfigProto_BoolList) ProtoMessage()

func (*HloModuleConfigProto_BoolList) ProtoReflect

func (*HloModuleConfigProto_BoolList) Reset

func (x *HloModuleConfigProto_BoolList) Reset()

func (*HloModuleConfigProto_BoolList) String

type HloModuleConfigProto_FusionConfigCollection

type HloModuleConfigProto_FusionConfigCollection int32
const (
	HloModuleConfigProto_OFF      HloModuleConfigProto_FusionConfigCollection = 0 // Do not collect configuration.
	HloModuleConfigProto_PER_EDGE HloModuleConfigProto_FusionConfigCollection = 1 // Collect per-edge configuration.
	HloModuleConfigProto_PER_NODE HloModuleConfigProto_FusionConfigCollection = 2 // Collect per-node configuration.
)

func (HloModuleConfigProto_FusionConfigCollection) Descriptor

func (HloModuleConfigProto_FusionConfigCollection) Enum

func (HloModuleConfigProto_FusionConfigCollection) EnumDescriptor deprecated

func (HloModuleConfigProto_FusionConfigCollection) EnumDescriptor() ([]byte, []int)

Deprecated: Use HloModuleConfigProto_FusionConfigCollection.Descriptor instead.

func (HloModuleConfigProto_FusionConfigCollection) Number

func (HloModuleConfigProto_FusionConfigCollection) String

func (HloModuleConfigProto_FusionConfigCollection) Type

type HloModuleConfigProto_Int64List

type HloModuleConfigProto_Int64List struct {
	Vals []int64 `protobuf:"varint,1,rep,packed,name=vals,proto3" json:"vals,omitempty"`
	// contains filtered or unexported fields
}

func (*HloModuleConfigProto_Int64List) Descriptor deprecated

func (*HloModuleConfigProto_Int64List) Descriptor() ([]byte, []int)

Deprecated: Use HloModuleConfigProto_Int64List.ProtoReflect.Descriptor instead.

func (*HloModuleConfigProto_Int64List) GetVals

func (x *HloModuleConfigProto_Int64List) GetVals() []int64

func (*HloModuleConfigProto_Int64List) ProtoMessage

func (*HloModuleConfigProto_Int64List) ProtoMessage()

func (*HloModuleConfigProto_Int64List) ProtoReflect

func (*HloModuleConfigProto_Int64List) Reset

func (x *HloModuleConfigProto_Int64List) Reset()

func (*HloModuleConfigProto_Int64List) String

type HloModuleConfigProto_Int64ListList

type HloModuleConfigProto_Int64ListList struct {
	Lists []*HloModuleConfigProto_Int64List `protobuf:"bytes,1,rep,name=lists,proto3" json:"lists,omitempty"`
	// contains filtered or unexported fields
}

func (*HloModuleConfigProto_Int64ListList) Descriptor deprecated

func (*HloModuleConfigProto_Int64ListList) Descriptor() ([]byte, []int)

Deprecated: Use HloModuleConfigProto_Int64ListList.ProtoReflect.Descriptor instead.

func (*HloModuleConfigProto_Int64ListList) GetLists

func (*HloModuleConfigProto_Int64ListList) ProtoMessage

func (*HloModuleConfigProto_Int64ListList) ProtoMessage()

func (*HloModuleConfigProto_Int64ListList) ProtoReflect

func (*HloModuleConfigProto_Int64ListList) Reset

func (*HloModuleConfigProto_Int64ListList) String

type HloModuleProtoWithConfig

type HloModuleProtoWithConfig struct {
	HloModule *hlo.HloModuleProto   `protobuf:"bytes,1,opt,name=hlo_module,json=hloModule,proto3" json:"hlo_module,omitempty"`
	Config    *HloModuleConfigProto `protobuf:"bytes,2,opt,name=config,proto3" json:"config,omitempty"`
	// contains filtered or unexported fields
}

func (*HloModuleProtoWithConfig) Descriptor deprecated

func (*HloModuleProtoWithConfig) Descriptor() ([]byte, []int)

Deprecated: Use HloModuleProtoWithConfig.ProtoReflect.Descriptor instead.

func (*HloModuleProtoWithConfig) GetConfig

func (*HloModuleProtoWithConfig) GetHloModule

func (x *HloModuleProtoWithConfig) GetHloModule() *hlo.HloModuleProto

func (*HloModuleProtoWithConfig) ProtoMessage

func (*HloModuleProtoWithConfig) ProtoMessage()

func (*HloModuleProtoWithConfig) ProtoReflect

func (x *HloModuleProtoWithConfig) ProtoReflect() protoreflect.Message

func (*HloModuleProtoWithConfig) Reset

func (x *HloModuleProtoWithConfig) Reset()

func (*HloModuleProtoWithConfig) String

func (x *HloModuleProtoWithConfig) String() string

type ScheduleProto

type ScheduleProto struct {
	Instructions []*ScheduleProto_Instruction `protobuf:"bytes,1,rep,name=instructions,proto3" json:"instructions,omitempty"`
	// Computation id (matches the id in HloComputationProto).
	ComputationId        int64               `protobuf:"varint,2,opt,name=computation_id,json=computationId,proto3" json:"computation_id,omitempty"`
	HloModule            *hlo.HloModuleProto `protobuf:"bytes,3,opt,name=hlo_module,json=hloModule,proto3" json:"hlo_module,omitempty"`
	CyclesPerMicrosecond int64               `protobuf:"varint,4,opt,name=cycles_per_microsecond,json=cyclesPerMicrosecond,proto3" json:"cycles_per_microsecond,omitempty"`
	// contains filtered or unexported fields
}

A trace estimated by the Latency Hiding Scheduler.

func (*ScheduleProto) Descriptor deprecated

func (*ScheduleProto) Descriptor() ([]byte, []int)

Deprecated: Use ScheduleProto.ProtoReflect.Descriptor instead.

func (*ScheduleProto) GetComputationId

func (x *ScheduleProto) GetComputationId() int64

func (*ScheduleProto) GetCyclesPerMicrosecond

func (x *ScheduleProto) GetCyclesPerMicrosecond() int64

func (*ScheduleProto) GetHloModule

func (x *ScheduleProto) GetHloModule() *hlo.HloModuleProto

func (*ScheduleProto) GetInstructions

func (x *ScheduleProto) GetInstructions() []*ScheduleProto_Instruction

func (*ScheduleProto) ProtoMessage

func (*ScheduleProto) ProtoMessage()

func (*ScheduleProto) ProtoReflect

func (x *ScheduleProto) ProtoReflect() protoreflect.Message

func (*ScheduleProto) Reset

func (x *ScheduleProto) Reset()

func (*ScheduleProto) String

func (x *ScheduleProto) String() string

type ScheduleProto_Instruction

type ScheduleProto_Instruction struct {

	// Instruction id (matches the id in HloInstructionProto).
	Id int64 `protobuf:"varint,1,opt,name=id,proto3" json:"id,omitempty"`
	// Start and end timestamps in cycles.
	StartTimestampCycles float64 `protobuf:"fixed64,2,opt,name=start_timestamp_cycles,json=startTimestampCycles,proto3" json:"start_timestamp_cycles,omitempty"`
	EndTimestampCycles   float64 `protobuf:"fixed64,3,opt,name=end_timestamp_cycles,json=endTimestampCycles,proto3" json:"end_timestamp_cycles,omitempty"`
	// contains filtered or unexported fields
}

func (*ScheduleProto_Instruction) Descriptor deprecated

func (*ScheduleProto_Instruction) Descriptor() ([]byte, []int)

Deprecated: Use ScheduleProto_Instruction.ProtoReflect.Descriptor instead.

func (*ScheduleProto_Instruction) GetEndTimestampCycles

func (x *ScheduleProto_Instruction) GetEndTimestampCycles() float64

func (*ScheduleProto_Instruction) GetId

func (x *ScheduleProto_Instruction) GetId() int64

func (*ScheduleProto_Instruction) GetStartTimestampCycles

func (x *ScheduleProto_Instruction) GetStartTimestampCycles() float64

func (*ScheduleProto_Instruction) ProtoMessage

func (*ScheduleProto_Instruction) ProtoMessage()

func (*ScheduleProto_Instruction) ProtoReflect

func (*ScheduleProto_Instruction) Reset

func (x *ScheduleProto_Instruction) Reset()

func (*ScheduleProto_Instruction) String

func (x *ScheduleProto_Instruction) String() string

type ShardableValueUpdatePairProto

type ShardableValueUpdatePairProto struct {
	InputParameterNumber int64   `protobuf:"varint,1,opt,name=input_parameter_number,json=inputParameterNumber,proto3" json:"input_parameter_number,omitempty"`
	ParameterShapeIndex  []int64 `` /* 128-byte string literal not displayed */
	OutputShapeIndex     []int64 `protobuf:"varint,3,rep,packed,name=output_shape_index,json=outputShapeIndex,proto3" json:"output_shape_index,omitempty"`
	// contains filtered or unexported fields
}

func (*ShardableValueUpdatePairProto) Descriptor deprecated

func (*ShardableValueUpdatePairProto) Descriptor() ([]byte, []int)

Deprecated: Use ShardableValueUpdatePairProto.ProtoReflect.Descriptor instead.

func (*ShardableValueUpdatePairProto) GetInputParameterNumber

func (x *ShardableValueUpdatePairProto) GetInputParameterNumber() int64

func (*ShardableValueUpdatePairProto) GetOutputShapeIndex

func (x *ShardableValueUpdatePairProto) GetOutputShapeIndex() []int64

func (*ShardableValueUpdatePairProto) GetParameterShapeIndex

func (x *ShardableValueUpdatePairProto) GetParameterShapeIndex() []int64

func (*ShardableValueUpdatePairProto) ProtoMessage

func (*ShardableValueUpdatePairProto) ProtoMessage()

func (*ShardableValueUpdatePairProto) ProtoReflect

func (*ShardableValueUpdatePairProto) Reset

func (x *ShardableValueUpdatePairProto) Reset()

func (*ShardableValueUpdatePairProto) String

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL