Documentation
¶
Overview ¶
Package gputrace provides parsing for .gputrace GPU trace files from Metal.
A .gputrace file is a directory bundle containing multiple files that represent Metal GPU capture data. This package provides utilities to parse trace metadata, extract kernel names, labels, and timing information.
Index ¶
- Constants
- Variables
- type APICallList
- type BufferBinding
- type CDispatchRecord
- type CRecord
- type CSRecord
- type CiRecord
- type CiululRecord
- type CommandBuffer
- type CommandBufferBinding
- type CommandBufferCalls
- type CommandBufferInfo
- type ComputeEncoder
- type CtRecord
- type CtURecord
- type CttRecord
- type CulRecord
- type CululRecord
- type CuwRecord
- type DebugGroupLabel
- type DependencyEdge
- type DependencyEvent
- type DependencyGraph
- type DependencyNode
- type DispatchCall
- type DispatchEstimate
- type DispatchInfo
- type DispatchThreads
- type EncoderInfo
- type EncoderSection
- type EncoderTiming
- type EnhancedMetadata
- type EventType
- type FormattedAPICall
- type FunctionRecord
- type GPUExecutionInterval
- type HazardType
- type IndexData
- type InitCall
- type KDebugEvent
- type KDebugParser
- type KernelStat
- type MTLResourceUsage
- type MTSPHeader
- type MTSPRecord
- func (r *MTSPRecord) ParseCRecord() (*CRecord, error)
- func (r *MTSPRecord) ParseCiRecord() (*CiRecord, error)
- func (r *MTSPRecord) ParseCiululRecord() (*CiululRecord, error)
- func (r *MTSPRecord) ParseCtRecord() (*CtRecord, error)
- func (r *MTSPRecord) ParseCtURecord() (*CtURecord, error)
- func (r *MTSPRecord) ParseCttRecord() (*CttRecord, error)
- func (r *MTSPRecord) ParseCtululRecord() (*CttRecord, error)
- func (r *MTSPRecord) ParseCulRecord() (*CulRecord, error)
- func (r *MTSPRecord) ParseCululRecord() (*CululRecord, error)
- func (r *MTSPRecord) ParseCuwRecord() (*CuwRecord, error)
- func (r *MTSPRecord) ParseDispatchRecord() (*CDispatchRecord, error)
- type Metadata
- type PipelineFunctionMap
- type PipelineStateEvent
- type RecordType
- type ResourceBinding
- type TextureBinding
- type TimingStat
- type Trace
- func (t *Trace) AnalyzeKernels() (map[string]*KernelStat, error)
- func (t *Trace) AnalyzeMTSPRecords() (string, error)
- func (t *Trace) AnalyzeStoreStructure() (string, error)
- func (t *Trace) AnalyzeTraceStructure() string
- func (t *Trace) BuildDependencyGraph() (*DependencyGraph, error)
- func (t *Trace) BuildPipelineFunctionMap() PipelineFunctionMap
- func (t *Trace) Close() error
- func (t *Trace) CountActualDispatches() (int, error)
- func (t *Trace) CountCSRecords() (int, error)
- func (t *Trace) CountCommandBuffers() (int, error)
- func (t *Trace) CountComputeEncoders() (int, error)
- func (t *Trace) CountDispatchCalls() (int, error)
- func (t *Trace) DecompressStore(storeNum int) ([]byte, error)
- func (t *Trace) EstimateDispatches() (*DispatchEstimate, error)
- func (t *Trace) ExtractEnhancedMetadata() (*EnhancedMetadata, error)
- func (t *Trace) FormatAPICallList(w io.Writer) error
- func (t *Trace) FormatAPICallListFull(w io.Writer) error
- func (t *Trace) FormatCommandBufferSummary(w io.Writer) error
- func (t *Trace) GetDebugGroupForLabel(encoderLabel string) string
- func (t *Trace) GetKernelNameCSRecords() ([]*CSRecord, error)
- func (t *Trace) GetUUIDCSRecords() ([]*CSRecord, error)
- func (t *Trace) HasPerfCounters() bool
- func (t *Trace) ParseAPICallList() (*APICallList, error)
- func (t *Trace) ParseCSRecords() ([]*CSRecord, error)
- func (t *Trace) ParseCommandBuffers() ([]*CommandBuffer, error)
- func (t *Trace) ParseComputeEncoders() ([]*ComputeEncoder, error)
- func (t *Trace) ParseDependencyEvents() ([]DependencyEvent, error)
- func (t *Trace) ParseDispatchCalls() ([]*DispatchCall, error)
- func (t *Trace) ParseDispatchInRegion(data []byte, baseOffset int64) ([]DispatchThreads, error)
- func (t *Trace) ParseIndex() (*XDICIndex, error)
- func (t *Trace) ParseIndexFile() (*IndexData, error)
- func (t *Trace) ParseMTSPFromData(data []byte) ([]MTSPRecord, error)
- func (t *Trace) ParseMTSPRecords() ([]MTSPRecord, error)
- func (t *Trace) ParseNestedRecords(rec MTSPRecord) ([]MTSPRecord, error)
- type XDICIndex
Constants ¶
const ( KDebugClassGPU = 0x85 // GPU event class // GPU subclasses KDebugGPUSubmission = 0x3 // GPU command submission KDebugGPUExecutionStart = 0x90 // GPU execution start KDebugGPUExecutionEnd = 0xA9 // GPU execution end )
GPU-related kdebug codes (class 0x85)
const ( RecordTypeCS = "CS" // Command submission with kernel name RecordTypeCt = "Ct" // Command type/transition? RecordTypeCtt = "Ctt" // Command type extended? RecordTypeCU = "CU" // Command unknown? RecordTypeCulul = "Culul" // Command buffer marker RecordTypeCiulul = "Ciulul" // Compute Indirect ulul? RecordTypeCtulul = "Ctulul" // Command type ulul? RecordTypeC = "C" // Generic Command (Pop, EndEncoding, etc.) RecordTypeC_3ul = "C@3ul@3ul" // Dispatch threads RecordTypeCuw = "Cuw" // Command write? RecordTypeCi = "Ci" // Command info? RecordTypeCul = "Cul" // Command? RecordTypeCut = "Cut" // Command type extended? RecordTypeCSuwuw = "CSuwuw" // Command Submission uwuw? RecordTypeCiulSl = "CiulSl" // Command info ul Sl? RecordTypeCtU = "CtU" // Buffer definition (CtU<b>ulul) RecordTypeUnknown = "Unknown" // Fallback for valid-looking records )
MTSP Record Types observed in capture files
const ( MagicMTSP = "MTSP" MagicXDIC = "xdic" MagicBPList = "bplist00" MagicMTLB = "MTLB" )
Magic bytes for different file formats.
Variables ¶
Functions ¶
This section is empty.
Types ¶
type APICallList ¶
type APICallList struct {
InitCalls []InitCall `json:"init_calls"`
CommandBuffers []CommandBufferCalls `json:"command_buffers"`
}
APICallList represents a complete list of API calls for a trace.
type BufferBinding ¶
BufferBinding represents a bound buffer argument.
type CDispatchRecord ¶
type CDispatchRecord struct {
RecordSize uint32
CommandFlags uint32
EncoderID uint64 // Address at 0x30
GridSize [3]uint32
GroupSize [3]uint32
}
CDispatchRecord represents a compute dispatch record (C@3ul@3ul).
type CSRecord ¶
type CSRecord struct {
Offset int64 // File offset where CS marker was found
Address uint64 // Pipeline state or encoder address
Identifier string // Either UUID (for pipeline states) or kernel name
IsKernelName bool // True if Identifier is a kernel name, false if UUID
}
CSRecord represents a Command Submission record from the capture file. CS records mark encoder boundaries and associate them with pipeline states or kernel names.
type CiRecord ¶
type CiRecord struct {
RecordSize uint32 // Total record size (always 52)
CommandFlags uint32 // Command type/flags
Field1 uint32 // Unknown field at offset 0x20
ICBAddr uint64 // Indirect command buffer address at offset 0x28
Count uint32 // Dispatch count or index at offset 0x30
Field2 uint32 // Unknown field at offset 0x34
}
CiRecord represents a parsed Ci (Compute Indirect / ICB) record. These records appear to reference indirect command buffers or command groups. Always 52 bytes in size.
type CiululRecord ¶
type CiululRecord struct {
RecordSize uint32
}
CiululRecord parsed from Ciulul record.
type CommandBuffer ¶
type CommandBuffer struct {
// Index in the trace (0-based)
Index int
// Timestamp when the command buffer was committed
Timestamp uint64
// UUID uniquely identifying this command buffer
UUID string
// Offset in the capture file where this CUUU record appears
Offset int64
}
CommandBuffer represents a Metal command buffer captured in the trace.
type CommandBufferBinding ¶
CommandBufferBinding represents a buffer binding within a command buffer.
type CommandBufferCalls ¶
type CommandBufferCalls struct {
Index int `json:"index"`
Address uint64 `json:"address"`
QueueAddress uint64 `json:"queue_address"`
CallNumber int `json:"call_number"`
Label string `json:"label,omitempty"`
Calls []FormattedAPICall `json:"calls"`
}
CommandBufferCalls represents all API calls for a single command buffer.
type CommandBufferInfo ¶
type CommandBufferInfo struct {
Index int
Address uint64
Label string
Encoders int
StartTime uint64
EndTime uint64
}
CommandBufferInfo represents a Metal command buffer.
type ComputeEncoder ¶
type ComputeEncoder struct {
// Index in the trace (0-based)
Index int
// Address/ID of the encoder
Address uint64
// Label/name of the encoder (from CS record)
Label string
// Offset in the capture file where this CS record appears
Offset int64
}
ComputeEncoder represents a Metal compute command encoder in the trace.
type CtRecord ¶
type CtRecord struct {
RecordSize uint32 // Total record size in bytes
CommandFlags uint32 // Command type/flags
PipelineAddr uint64 // Pipeline state object address
FunctionAddr uint64 // Metal function address
BindingCount uint32 // Number of resource bindings
Stride uint32 // Binding array stride (always 8)
BufferBindings []uint64 // Array of buffer addresses
ResourceBindings []ResourceBinding // Parsed resource bindings with usage flags
}
CtRecord represents a parsed Ct (Command) record containing pipeline state, function, and buffer binding information.
type CttRecord ¶
type CttRecord struct {
RecordSize uint32
CommandFlags uint32 // Command flags
DeviceAddr uint64
FunctionAddr uint64
PipelineAddr uint64
BindingCount uint32 // Number of resource bindings
Stride uint32 // Binding array stride (always 8)
BufferBindings []uint64 // Array of buffer addresses
ResourceBindings []ResourceBinding // Parsed resource bindings with usage flags
}
CttRecord parsed from Ctt record. Matches structure expected by existing ParseCttRecords manually: +0x04: device addr +0x0C: function addr +0x20: pipeline addr
type CulRecord ¶
type CulRecord struct {
RecordSize uint32 // Total record size
CommandFlags uint32 // Command type/flags
MarkerCount uint32 // Count at offset 0x20
BufferAddr uint64 // Buffer address at offset 0x28
Field1 uint32 // Unknown at offset 0x30
Field2 uint32 // Unknown at offset 0x34
PayloadSize uint32 // Size field (when present)
PayloadAddr uint64 // Payload address (when present)
ArrayCount uint32 // Number of array elements
ArrayStride uint32 // Array stride
ArrayAddresses []uint64 // Array of addresses
}
CulRecord represents a parsed Cul record. Variable size, appears to contain buffer or resource bindings.
type CululRecord ¶
type CululRecord struct {
RecordSize uint32 // Total record size (usually 160)
CommandFlags uint32 // Command type/flags
MarkerCount uint32 // Count at offset 0x20
ICBAddr uint64 // ICB or buffer address at offset 0x28
Field1 uint32 // Unknown at offset 0x30
Field2 uint32 // Unknown at offset 0x34
Field3 uint32 // Unknown at offset 0x38
PayloadSize uint32 // Size field at offset 0x40
PayloadAddr uint64 // Payload address at offset 0x48
ArrayCount uint32 // Number of array elements at offset 0x50
ArrayStride uint32 // Array stride at offset 0x54
ArrayAddresses []uint64 // Array of addresses starting at offset 0x58
}
CululRecord represents a parsed Culul record. These appear to be command buffer or indirect command buffer definitions. Usually 160 bytes (sometimes 168).
type CuwRecord ¶
type CuwRecord struct {
RecordSize uint32 // Total record size (56, 68, or 124)
CommandFlags uint32 // Command type/flags
MarkerCount uint32 // Count at offset 0x20
BufferAddr uint64 // Buffer address at offset 0x28
Field1 uint64 // Unknown at offset 0x30 (size 68+)
Field2 uint32 // Unknown (size 68+)
}
CuwRecord represents a parsed Cuw record. Two common sizes: 56 bytes (66.4%) and 68 bytes (33.2%). The 68-byte variant appears 4,397 times (same as Ci count).
type DebugGroupLabel ¶
type DebugGroupLabel struct {
Label string // e.g., "training_iteration:forward_pass:linear_layer"
Offset int // Byte offset in capture file where this label appears
}
DebugGroupLabel represents a hierarchical debug group label with its position in the capture.
type DependencyEdge ¶
type DependencyEdge struct {
From int
To int
Buffer string // Name of the buffer causing dependency
Hazard HazardType // Type of memory hazard
}
type DependencyEvent ¶
type DependencyEvent struct {
Offset int64
Type EventType
Label string // For CS
Address uint64 // For Bind/Use
Name string // For Bind
Usage MTLResourceUsage // Resource usage flags (Read, Write, Sample)
}
DependencyEvent represents a trace event relevant to dependencies.
type DependencyGraph ¶
type DependencyGraph struct {
Nodes []DependencyNode
Edges []DependencyEdge
}
DependencyGraph represents the data flow between operations.
type DependencyNode ¶
type DispatchCall ¶
type DispatchCall struct {
// Index in the trace (0-based)
Index int
// Offset in the capture file where this dispatch marker appears
Offset int64
}
DispatchCall represents a compute kernel dispatch call in the trace.
type DispatchEstimate ¶
type DispatchEstimate struct {
Count int // Estimated dispatch count
Confidence float64 // Confidence level (0.0 to 1.0)
Method string // Method used for estimation
Notes string // Additional notes about the estimate
}
DispatchEstimate represents an estimated dispatch count with confidence level.
type DispatchInfo ¶
type DispatchInfo struct {
KernelName string
ThreadGroup [3]uint32
Threads [3]uint32
StartTime uint64
EndTime uint64
}
DispatchInfo represents a single compute dispatch.
type DispatchThreads ¶
type DispatchThreads struct {
// Thread dimensions
ThreadsX, ThreadsY, ThreadsZ uint64
// Threads per threadgroup dimensions
ThreadsPerGroupX, ThreadsPerGroupY, ThreadsPerGroupZ uint64
// Offset in capture file
Offset int64
}
DispatchThreads represents dispatch thread configuration.
type EncoderInfo ¶
type EncoderInfo struct {
Index int
Label string
Dispatches []DispatchInfo
}
EncoderInfo represents a compute encoder.
type EncoderSection ¶
type EncoderSection struct {
Label string
Address uint64
PipelineAddr uint64
StartOffset int64
EndOffset int64 // Offset where this encoder ends (next encoder or end of CB)
}
EncoderSection represents a compute encoder and its associated calls.
type EncoderTiming ¶
type EncoderTiming struct {
Label string `json:"label"`
KernelName string `json:"kernel_name,omitempty"`
StartTimestamp uint64 `json:"start_timestamp"`
EndTimestamp uint64 `json:"end_timestamp"`
DurationNs uint64 `json:"duration_ns"`
DurationMs float64 `json:"duration_ms"`
Percentage float32 `json:"percentage"`
QueueID uint64 `json:"queue_id,omitempty"`
CommandQueue string `json:"command_queue,omitempty"`
}
EncoderTiming represents GPU timing information for a compute encoder. This is a core type used throughout the system for representing timing data.
type EnhancedMetadata ¶
type EnhancedMetadata struct {
CommandBuffers []CommandBufferInfo
Encoders []EncoderInfo
BufferBindings []BufferBinding
TextureBindings []TextureBinding
TotalKernels int
}
EnhancedMetadata contains detailed information from GPU trace.
type FormattedAPICall ¶
type FormattedAPICall struct {
CallNumber int `json:"call_number"`
Indented bool `json:"indented,omitempty"`
Type string `json:"type"`
Address uint64 `json:"address,omitempty"`
Details string `json:"details"`
Label string `json:"label,omitempty"`
Offset int64 `json:"offset"`
}
FormattedAPICall represents a complete API call with all details.
type FunctionRecord ¶
type FunctionRecord struct {
CSAddress uint64 // The CS record address
FuncAddress uint64 // The runtime function address (if found)
Label string // The label/name
Offset int64 // Offset in the capture file
}
FunctionRecord represents a parsed CS record with function label information.
type GPUExecutionInterval ¶
type GPUExecutionInterval struct {
SubmissionEvent *KDebugEvent
StartEvent *KDebugEvent
EndEvent *KDebugEvent
CommandBufferID uint64
EncoderID uint64
}
GPUExecutionInterval represents a GPU execution interval from kdebug events.
func CorrelateGPUExecution ¶
func CorrelateGPUExecution(events []*KDebugEvent) []*GPUExecutionInterval
CorrelateGPUExecution correlates submission, start, and end events into intervals.
func (*GPUExecutionInterval) Duration ¶
func (interval *GPUExecutionInterval) Duration() uint64
Duration returns the GPU execution duration in nanoseconds.
type HazardType ¶
type HazardType int
HazardType represents the type of memory hazard causing a dependency.
const ( // HazardRAW is Read After Write - reader depends on writer completing. HazardRAW HazardType = iota // HazardWAW is Write After Write - second write must wait for first write. HazardWAW // HazardWAR is Write After Read - write must wait for read to complete. HazardWAR )
func (HazardType) String ¶
func (h HazardType) String() string
type InitCall ¶
type InitCall struct {
CallNumber int `json:"call_number"`
Type string `json:"type"`
Address uint64 `json:"address"`
Info string `json:"info"`
Label string `json:"label,omitempty"`
Offset int64 `json:"offset"`
}
InitCall represents an initialization API call before the first command buffer.
type KDebugEvent ¶
type KDebugEvent struct {
Timestamp uint64 // Mach absolute time
ThreadID uint64 // Thread that triggered the event
DebugID uint32 // Debug code (class + subclass + code)
CPUNum uint32 // CPU number
Args [4]uint64 // Event-specific arguments
}
KDebugEvent represents a kernel debug trace event. These events provide low-level GPU timing information.
func (*KDebugEvent) GetEventClass ¶
func (event *KDebugEvent) GetEventClass() uint8
GetEventClass extracts the event class from debug ID.
func (*KDebugEvent) GetEventSubclass ¶
func (event *KDebugEvent) GetEventSubclass() uint8
GetEventSubclass extracts the event subclass from debug ID.
func (*KDebugEvent) IsGPUExecutionEnd ¶
func (event *KDebugEvent) IsGPUExecutionEnd() bool
IsGPUExecutionEnd checks if this is a GPU execution end event.
func (*KDebugEvent) IsGPUExecutionStart ¶
func (event *KDebugEvent) IsGPUExecutionStart() bool
IsGPUExecutionStart checks if this is a GPU execution start event.
func (*KDebugEvent) IsGPUSubmission ¶
func (event *KDebugEvent) IsGPUSubmission() bool
IsGPUSubmission checks if this is a GPU command submission event.
type KDebugParser ¶
type KDebugParser struct {
// contains filtered or unexported fields
}
KDebugParser parses kernel debug events from trace files.
func NewKDebugParser ¶
func NewKDebugParser(trace *Trace) *KDebugParser
NewKDebugParser creates a new kdebug parser.
func (*KDebugParser) ParseKDebugEvents ¶
func (p *KDebugParser) ParseKDebugEvents() ([]*KDebugEvent, error)
ParseKDebugEvents extracts kdebug events from the trace. These are typically stored in auxiliary trace files, not the main .gputrace bundle.
type KernelStat ¶
type KernelStat struct {
Name string
PipelineAddr uint64
DispatchCount int
DebugGroups map[string]int // Debug group -> count
EncoderLabels map[string]int // Encoder label -> count
}
KernelStat holds statistics for a kernel function.
type MTLResourceUsage ¶
type MTLResourceUsage uint8
MTLResourceUsage represents Metal resource usage flags. These match Apple's MTLResourceUsage enum values.
const ( MTLResourceUsageRead MTLResourceUsage = 0x01 // Buffer is read by shader MTLResourceUsageWrite MTLResourceUsage = 0x02 // Buffer is written by shader MTLResourceUsageSample MTLResourceUsage = 0x04 // Texture is sampled )
func (MTLResourceUsage) IsRead ¶
func (u MTLResourceUsage) IsRead() bool
IsRead returns true if the usage includes read access.
func (MTLResourceUsage) IsReadWrite ¶
func (u MTLResourceUsage) IsReadWrite() bool
IsReadWrite returns true if the usage includes both read and write access.
func (MTLResourceUsage) IsWrite ¶
func (u MTLResourceUsage) IsWrite() bool
IsWrite returns true if the usage includes write access.
func (MTLResourceUsage) String ¶
func (u MTLResourceUsage) String() string
String returns a human-readable representation of the usage flags.
type MTSPHeader ¶
MTSPHeader represents the header of an MTSP file.
func ReadMTSPHeader ¶
func ReadMTSPHeader(data []byte) (*MTSPHeader, error)
ReadMTSPHeader reads the MTSP header from data.
type MTSPRecord ¶
type MTSPRecord struct {
Type string // Record type (CS, CU, Culul, etc.)
Offset int // Offset in file where record starts
Size int // Size of record in bytes
Data []byte // Raw record data
// Parsed fields (type-specific)
Label string // For CS records: kernel/stream name
Address uint64 // Memory address
FunctionAddr uint64 // Metal function address (for CiulSl)
Pointers []uint64 // Referenced pointers
Values []uint32 // Embedded values
Name string // Buffer name (for CtU)
SecondaryAddr uint64 // Function address (for CS Library records)
}
MTSPRecord represents a parsed MTSP record from the capture file.
func (*MTSPRecord) ParseCRecord ¶
func (r *MTSPRecord) ParseCRecord() (*CRecord, error)
func (*MTSPRecord) ParseCiRecord ¶
func (r *MTSPRecord) ParseCiRecord() (*CiRecord, error)
ParseCiRecord parses a Ci (Compute Indirect / ICB) record.
Ci Record Structure (52 bytes):
Offset | Size | Type | Field Name
-------|------|---------|------------------
0x00 | 4 | uint32 | record_size (always 52)
0x04 | 4 | uint32 | command_flags
0x08 | 24 | bytes | reserved
0x20 | 4 | uint32 | field1
0x24 | 4 | char[4] | marker ("Ci\0\0")
0x28 | 8 | uint64 | icb_addr
0x30 | 4 | uint32 | count
0x34 | 4 | uint32 | field2
func (*MTSPRecord) ParseCiululRecord ¶
func (r *MTSPRecord) ParseCiululRecord() (*CiululRecord, error)
func (*MTSPRecord) ParseCtRecord ¶
func (r *MTSPRecord) ParseCtRecord() (*CtRecord, error)
ParseCtRecord parses a Ct (Command) record to extract pipeline state, function address, and buffer bindings.
Ct Record Structure:
Offset | Size | Type | Field Name
-------|------|---------|------------------
0x00 | 4 | uint32 | record_size
0x04 | 4 | uint32 | command_flags
0x08 | 24 | bytes | reserved
0x20 | 4 | uint32 | marker1 (0x00000008)
0x24 | 4 | char[4] | marker2 ("Ct\0\0")
0x28 | 8 | uint64 | pipeline_addr
0x30 | 8 | uint64 | function_addr
0x38 | 4 | uint32 | binding_count
0x3c | 4 | uint32 | stride (always 8)
0x40 | 8*N | uint64[]| buffer_bindings
func (*MTSPRecord) ParseCtURecord ¶
func (r *MTSPRecord) ParseCtURecord() (*CtURecord, error)
ParseCtURecord parses a CtU record (CtU<b>ulul). Format: Marker at ~0x24/0x2C, followed by Address, then Name.
func (*MTSPRecord) ParseCttRecord ¶
func (r *MTSPRecord) ParseCttRecord() (*CttRecord, error)
ParseCttRecord parses a Ctt (Command Type Transfer?) record.
func (*MTSPRecord) ParseCtululRecord ¶
func (r *MTSPRecord) ParseCtululRecord() (*CttRecord, error)
ParseCtululRecord parses a Ctulul record. Structure appears to be similar to Ctt/Ct (Binding info). Marker "Ctulul\0\0" at ~0x24.
func (*MTSPRecord) ParseCulRecord ¶
func (r *MTSPRecord) ParseCulRecord() (*CulRecord, error)
ParseCulRecord parses a Cul (Command / Resource Binding) record.
func (*MTSPRecord) ParseCululRecord ¶
func (r *MTSPRecord) ParseCululRecord() (*CululRecord, error)
ParseCululRecord parses a Culul (Command Buffer / ICB Definition) record.
Culul Record Structure (usually 160 bytes):
Offset | Size | Type | Field Name
-------|------|---------|------------------
0x00 | 4 | uint32 | record_size (160 or 168)
0x04 | 4 | uint32 | command_flags
0x08 | 24 | bytes | reserved
0x20 | 4 | uint32 | marker_count
0x24 | 8 | char[] | marker ("Culul\0\0\0")
0x28 | 8 | uint64 | icb_addr
0x30 | 4 | uint32 | field1
0x34 | 4 | uint32 | field2
0x38 | 4 | uint32 | field3
0x40 | 4 | uint32 | payload_size
0x48 | 8 | uint64 | payload_addr
0x50 | 4 | uint32 | array_count
0x54 | 4 | uint32 | array_stride
0x58 | 8*N | uint64[]| array_addresses
func (*MTSPRecord) ParseCuwRecord ¶
func (r *MTSPRecord) ParseCuwRecord() (*CuwRecord, error)
ParseCuwRecord parses a Cuw (Command Update/Write) record.
func (*MTSPRecord) ParseDispatchRecord ¶
func (r *MTSPRecord) ParseDispatchRecord() (*CDispatchRecord, error)
type Metadata ¶
type Metadata struct {
UUID string
CaptureVersion int
GraphicsAPI int // 1 = Metal
DeviceID int
NativePointerSize int
CapturedFramesCount int
BoundaryLess bool
LibraryLinkVersions map[string]int
UnusedBufferCount int
UnusedTextureCount int
UnusedFunctionCount int
}
Metadata contains information from the metadata plist file.
type PipelineFunctionMap ¶
PipelineFunctionMap maps pipeline state addresses to kernel function names.
type PipelineStateEvent ¶
PipelineStateEvent represents a change in pipeline state within a command buffer.
type RecordType ¶
type RecordType byte
RecordType represents different MTSP record types.
const ( RecordTypeCommand RecordType = 0x43 // 'C' - command record RecordTypeString RecordType = 0x43 // 'C' - string record (disambiguated by following 'S' byte) RecordTypeFunction RecordType = 0x46 // 'F' RecordTypeInteger RecordType = 0x69 // 'i' RecordTypeUnsignedLong RecordType = 0x75 // 'u' followed by 'l' )
type ResourceBinding ¶
type ResourceBinding struct {
Address uint64 // Resource memory address
Index int // Binding index (argument buffer slot)
Usage MTLResourceUsage // Access flags (Read, Write, Sample)
Name string // Resource name (if known)
BufferSize uint64 // Buffer size (if known)
}
ResourceBinding represents a buffer or texture binding with usage flags.
type TextureBinding ¶
TextureBinding represents a bound texture argument.
type TimingStat ¶
type TimingStat struct {
TotalTime float64 // Total execution time in milliseconds
AverageTime float64
MinTime float64
MaxTime float64
}
TimingStat holds timing information for a kernel.
type Trace ¶
type Trace struct {
Path string
Metadata *Metadata
CaptureData []byte
DeviceResources map[string][]byte // key is device address (e.g., "0x862ccc000")
KernelNames []string
EncoderLabels []string
BufferLabels []string
DebugGroupLabels []string // Hierarchical debug group labels (e.g., "training_iteration:forward_pass:linear_layer")
DebugGroupOffsets []DebugGroupLabel // Debug groups with their offsets for encoder association
EncoderDebugGroups map[string]string // Maps encoder label to its debug group (sequence-based)
CommandQueueLabel string
DeviceLabels map[uint64]string // Maps device resource address to label (e.g. "fences")
FunctionToName map[uint64]string // Maps Ct function addresses to kernel names (computed from dispatch order)
MTLBLibraries []*mtlb.MTLBFile // Parsed Metal libraries found in the bundle
}
Trace represents a parsed .gputrace bundle.
func (*Trace) AnalyzeKernels ¶
func (t *Trace) AnalyzeKernels() (map[string]*KernelStat, error)
AnalyzeKernels aggregates statistics for all kernels in the trace.
func (*Trace) AnalyzeMTSPRecords ¶
AnalyzeMTSPRecords provides a detailed analysis of MTSP records.
func (*Trace) AnalyzeStoreStructure ¶
AnalyzeStoreStructure examines the decompressed store data for patterns.
func (*Trace) AnalyzeTraceStructure ¶
AnalyzeTraceStructure provides a detailed analysis of the trace structure.
func (*Trace) BuildDependencyGraph ¶
func (t *Trace) BuildDependencyGraph() (*DependencyGraph, error)
BuildDependencyGraph analyzes the trace to construct a dependency graph. Detects three types of memory hazards:
- RAW (Read After Write): reader depends on writer completing
- WAW (Write After Write): second write must wait for first write
- WAR (Write After Read): write must wait for read to complete
func (*Trace) BuildPipelineFunctionMap ¶
func (t *Trace) BuildPipelineFunctionMap() PipelineFunctionMap
BuildPipelineFunctionMap extracts a mapping from pipeline state addresses to kernel function names by parsing Ctt and CS records from the capture data and device-resources files.
The mapping works by: 1. Parsing CS records to build function_addr → function_name map 2. Parsing Ctt records to get pipeline_addr → function_addr links 3. Combining them: pipeline_addr → function_name
func (*Trace) CountActualDispatches ¶
CountActualDispatches attempts to count dispatches for validation purposes.
This function tries to get the most accurate count available: 1. If performance counters are available, notes they exist (but parsing not implemented yet) 2. Falls back to MTSP-based estimation (95%+ accuracy for standard workloads)
For production use, call EstimateDispatches() which provides confidence levels and method info.
func (*Trace) CountCSRecords ¶
CountCSRecords returns the total number of CS records in the trace.
func (*Trace) CountCommandBuffers ¶
CountCommandBuffers returns the number of command buffers in the trace.
func (*Trace) CountComputeEncoders ¶
CountComputeEncoders returns the number of unique compute encoders (Cuw) in the trace.
func (*Trace) CountDispatchCalls ¶
CountDispatchCalls returns the number of dispatch calls in the trace.
func (*Trace) DecompressStore ¶
DecompressStore decompresses a store file (e.g., store0).
func (*Trace) EstimateDispatches ¶
func (t *Trace) EstimateDispatches() (*DispatchEstimate, error)
EstimateDispatches estimates the number of GPU dispatches using MTSP analysis. This provides a fast estimate (95%+ accuracy for most traces) without requiring full performance counter parsing or Xcode integration.
Returns an estimate with confidence level. For exact counts, use GetExactDispatches which integrates with Xcode Instruments (when available).
func (*Trace) ExtractEnhancedMetadata ¶
func (t *Trace) ExtractEnhancedMetadata() (*EnhancedMetadata, error)
ExtractEnhancedMetadata extracts detailed structure from the GPU trace.
func (*Trace) FormatAPICallList ¶
FormatAPICallList writes a formatted API call list similar to Xcode Instruments.
func (*Trace) FormatAPICallListFull ¶
FormatAPICallListFull writes an expanded/full API call list showing all nesting levels. This matches the Xcode Instruments "expanded tree view" format where command buffers and encoders are shown at multiple indentation levels.
func (*Trace) FormatCommandBufferSummary ¶
FormatCommandBufferSummary writes a human-readable summary of command buffers.
func (*Trace) GetDebugGroupForLabel ¶
GetDebugGroupForLabel returns the debug group for a given encoder label. Uses sequence-based mapping built during capture file parsing.
func (*Trace) GetKernelNameCSRecords ¶
GetKernelNameCSRecords returns only CS records that contain kernel names (not UUIDs).
func (*Trace) GetUUIDCSRecords ¶
GetUUIDCSRecords returns only CS records that contain pipeline state UUIDs.
func (*Trace) HasPerfCounters ¶
HasPerfCounters returns true if the trace has performance counter data.
func (*Trace) ParseAPICallList ¶
func (t *Trace) ParseAPICallList() (*APICallList, error)
ParseAPICallList extracts all API calls from the trace.
func (*Trace) ParseCSRecords ¶
ParseCSRecords extracts all CS (Command Submission) records from the capture file. CS records come in two types:
- With UUIDs (preceded by 0x04000000): Pipeline state identifiers
- With kernel names (preceded by 0x09100000): Actual kernel function names
Format:
[length: uint32] [CS marker: 0x43 0x53 0x00 0x00] [address: uint64] [identifier: null-terminated string]
func (*Trace) ParseCommandBuffers ¶
func (t *Trace) ParseCommandBuffers() ([]*CommandBuffer, error)
ParseCommandBuffers extracts all command buffers from the trace by finding CUUU markers. CUUU markers indicate Metal Command buffer records.
func (*Trace) ParseComputeEncoders ¶
func (t *Trace) ParseComputeEncoders() ([]*ComputeEncoder, error)
ParseComputeEncoders extracts all compute command encoders from the trace. Scans the capture file and device-resources for CS (Command Submission) records.
func (*Trace) ParseDependencyEvents ¶
func (t *Trace) ParseDependencyEvents() ([]DependencyEvent, error)
ParseDependencyEvents extracts relevant events from the capture file. It parses Ct records (compute dispatches with function addresses and buffer bindings) and resolves kernel names from DeviceLabels.
func (*Trace) ParseDispatchCalls ¶
func (t *Trace) ParseDispatchCalls() ([]*DispatchCall, error)
ParseDispatchCalls extracts all compute kernel dispatch calls from the trace.
func (*Trace) ParseDispatchInRegion ¶
func (t *Trace) ParseDispatchInRegion(data []byte, baseOffset int64) ([]DispatchThreads, error)
ParseDispatchInRegion parses dispatch calls within a command buffer region.
func (*Trace) ParseIndex ¶
ParseIndex parses the xdic index file to get device resources mapping.
func (*Trace) ParseIndexFile ¶
ParseIndexFile parses the xdic index file to extract trace structure information.
func (*Trace) ParseMTSPFromData ¶
func (t *Trace) ParseMTSPFromData(data []byte) ([]MTSPRecord, error)
ParseMTSPFromData parses records from a byte slice.
func (*Trace) ParseMTSPRecords ¶
func (t *Trace) ParseMTSPRecords() ([]MTSPRecord, error)
ParseMTSPRecords parses records from the capture file.
func (*Trace) ParseNestedRecords ¶
func (t *Trace) ParseNestedRecords(rec MTSPRecord) ([]MTSPRecord, error)
ParseNestedRecords attempts to parse the data of the current record as a sequence of nested MTSP records. This is used for container records like CS and Ci. It skips the first 16 bytes (standard MTSP header/padding for containers) and attempts to parse the rest.