Documentation
¶
Overview ¶
DNA Engine: Hierarchical Spatial Correlation Engine --------------------------------------------------- A topological reconstruction system for neural networks. Converts structural signatures (LayerType, DType, weights) into 3D directional geometry for high-fidelity comparison across diverse numerical families.
Evolution Engine: DNA Splice & NEAT-style Topology Evolution ------------------------------------------------------------ Extends the DNA Engine (dna.go) with two capabilities:
DNA Splice / Genetic Crossover Takes two trained parent networks, compares their NetworkDNA, and produces a child network whose weights are blended from both parents, guided by per-layer cosine similarity scores.
NEAT-style Topology Evolution Mutates a network's topology (layer types, activations, remote-link connections) and weights without destroying learned structure. Supports a full population-based evolution loop via NEATPopulation.
Index ¶
- Constants
- Variables
- func Activate[T Numeric](v T, act ActivationType) T
- func ActivateDerivative[T Numeric](v T, act ActivationType) T
- func AlignedFloat32(n int) []float32
- func ApplyRecursiveGradients(layer *VolumetricLayer, gradWeights *Tensor[float32], lr float32)
- func ApplyTargetPropGaps[T Numeric](n *VolumetricNetwork, s *TargetPropState[T], lr float32)
- func BindGroupKeyHash(pipeline *wgpu.ComputePipeline, buffers ...*wgpu.Buffer) uint64
- func CalculateLoss[T Numeric](output, target *Tensor[T], lossType string) float64
- func CalculateOptimalGPUTileSizeFromLimits(sharedMemBytes, maxInvocations uint32, headDim int) int
- func CalculateOptimalTileSize(headDim int) int
- func CastWeights[T Numeric](weights any) []T
- func ComputeSilhouetteScore[T Numeric](data []*Tensor[T], assignments []int) float32
- func ConvertSlice[In Numeric, Out Numeric](in []In) []Out
- func CosineDistance[T Numeric](a []T, b []float32) float32
- func CosineSimilarity(s1, s2 LayerSignature) float32
- func DequantizeQ4_0(blocks []Q4_0Block, n int) []float32
- func EuclideanDistance[T Numeric](a []T, b []float32) float32
- func EuclideanDistanceT[T Numeric](a, b []T) float32
- func GetDeviceDescription(net *VolumetricNetwork) string
- func GetLogits[T Numeric](data []T, temp float64, dtype DType) []float32
- func GroupRelatedTensors(detected []DetectedTensor) map[string][]DetectedTensor
- func HierarchicalGroup[T Numeric](data []*Tensor[T], threshold float32) []int
- func KMeansCluster[T Numeric](data []*Tensor[T], k int, maxIter int, parallel bool) (centroids [][]float32, assignments []int)
- func LoadSafetensors(filepath string) (map[string][]float32, error)
- func LoadSafetensorsFromBytes(data []byte) (map[string][]float32, error)
- func LoadSafetensorsWithShapes(data []byte) (map[string]TensorWithShape, error)
- func LoadUniversalDetailed(path string) (int, []LayerArchetype, []int, []TensorMeta, error)
- func LoadWithPrefixes(net *VolumetricNetwork, tensors map[string][]float32) error
- func MajorityVote(outputs [][]int) []int
- func MorphLayer(layer *VolumetricLayer, target DType) error
- func MultiNetworkEvaluation[T Numeric](models map[string]*VolumetricNetwork, inputs []*Tensor[T], expected []float64) (map[string]*DeviationMetrics, error)
- func Normalize(v []float32) []float32
- func PerformanceSimilarity(mA, mB ModelPerformance) float64
- func PrintEnsembleReport(matches []EnsembleMatch, topN int)
- func PrintMultiNetworkSummary(results map[string]*DeviationMetrics)
- func SampleTopK(logits []float32, topK int, temperature float32, deterministic bool) int
- func SerializeNetwork(net *VolumetricNetwork) ([]byte, error)
- func ShaderDenseBackwardDW(tileSize int) string
- func ShaderDenseBackwardDX(tileSize int) string
- func ShaderTiledDenseN(tileSize int) string
- func ShaderTiledDenseQ4(tileSize int) string
- func ShaderTiledMHAN(tileSize, headDim int) string
- func ShaderTiledSwiGLUN(tileSize int) string
- func ShaderTiledSwiGLUQ4(tileSize int) string
- func SimulatePrecision(wVal float32, dtype DType, scale float32) float32
- func Softmax(logits []float32) []float32
- func SoftmaxBackward(gradOutput, softmaxOutput []float32) []float32
- func SoftmaxEntmaxHelper(logits []float32, alpha float32) []float32
- func SoftmaxSparseHelper(logits []float32) []float32
- func SystolicApplyTargetProp[T Numeric](n *VolumetricNetwork, s *SystolicState[T], globalTarget *Tensor[T], lr float32)
- func SystolicForward[T Numeric](n *VolumetricNetwork, s *SystolicState[T], captureHistory bool) time.Duration
- func TargetPropBackward[T Numeric](n *VolumetricNetwork, s *TargetPropState[T], target *Tensor[T])
- func TargetPropBackwardChainRule[T Numeric](n *VolumetricNetwork, s *TargetPropState[T], target *Tensor[T])
- func TargetPropBackwardTargetProp[T Numeric](n *VolumetricNetwork, s *TargetPropState[T], target *Tensor[T])
- type ActivationType
- type AdaptationResult
- type AdaptationTracker
- type AggregatingObserver
- type ArchConfig
- type BindGroupKey
- type BrainType
- type ComparisonResult
- type ConsoleObserver
- type DType
- type DetectedTensor
- type DeviationBucket
- type DeviationMetrics
- type EnsembleMatch
- type GenOptions
- type HTTPObserver
- type HardwareInfo
- type LayerArchetype
- type LayerSignature
- type LayerSpec
- type LayerStats
- type LayerTelemetry
- type LayerType
- type LogicShift
- type MergePair
- type MethodInfo
- type ModelPerformance
- type ModelTelemetry
- type NEATConfig
- type NEATPopulation
- type NetworkBlueprint
- type NetworkComparisonResult
- type NetworkDNA
- type NetworkSpec
- type Numeric
- type PairWithIndex
- type ParameterInfo
- type PersistenceLayerSpec
- type PersistenceNetworkSpec
- type PolyGradientObserver
- type PolyLayerEvent
- type PolyObserver
- type PreTokenizer
- type PredictionResult
- type PrefixWeightMapper
- type Q4_0Block
- type SafetensorsHeader
- type SoftmaxType
- type SpliceConfig
- type SpliceResult
- type Streamer
- type SystolicState
- type TargetPropConfig
- type TargetPropState
- type TaskChange
- type Template
- type Tensor
- func BackwardPolymorphic[T Numeric](n *VolumetricNetwork, gradOutput *Tensor[T], inputs, preActs []*Tensor[T]) (gradInput *Tensor[T], layerGradients [][2]*Tensor[T], ...)
- func CNN1BackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func CNN1BackwardTiled[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func CNN1ForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func CNN1ForwardTiled[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func CNN2BackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func CNN2BackwardTiled[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func CNN2ForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func CNN2ForwardTiled[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func CNN3BackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func CNN3BackwardTiled[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func CNN3ForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func CNN3ForwardTiled[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func ComputeLossGradient[T Numeric](output, target *Tensor[T], lossType string) *Tensor[T]
- func ConvTransposed1DBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func ConvTransposed1DForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func ConvTransposed2DBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func ConvTransposed2DForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func ConvTransposed3DBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func ConvTransposed3DForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func ConvertTensor[In Numeric, Out Numeric](in *Tensor[In]) *Tensor[Out]
- func DenseBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func DenseForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func DenseForwardTiled[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func DispatchLayer[T Numeric](layer *VolumetricLayer, input, skip *Tensor[T]) (preAct, postAct *Tensor[T])
- func DispatchLayerBackward[T Numeric](layer *VolumetricLayer, gradOutput, input, skip, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func EmbeddingBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func EmbeddingBackwardTiled[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func EmbeddingForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func EmbeddingForwardTiled[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func ForwardPolymorphic[T Numeric](n *VolumetricNetwork, input *Tensor[T]) (*Tensor[T], time.Duration, []time.Duration)
- func KMeansBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func KMeansForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func LSTMBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func LSTMBackwardTiled[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func LSTMForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func LSTMForwardTiled[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func LayerNormBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func LayerNormForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func MHABackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func MHAForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func MHAForwardTiled[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func NewTensor[T Numeric](shape ...int) *Tensor[T]
- func NewTensorFromSlice[T Numeric](data []T, shape ...int) *Tensor[T]
- func ParallelBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func ParallelForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func RMSNormBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func RMSNormForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func RNNBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func RNNBackwardTiled[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func RNNForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func RNNForwardTiled[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func ResidualBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func ResidualBackwardTiled[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func ResidualForwardPolymorphic[T Numeric](layer *VolumetricLayer, input, skip *Tensor[T]) (preAct, postAct *Tensor[T])
- func ResidualForwardTiled[T Numeric](layer *VolumetricLayer, input, skip *Tensor[T]) (preAct, postAct *Tensor[T])
- func SequentialBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func SequentialForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func SoftmaxBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, postAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func SoftmaxForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func SwiGLUBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func SwiGLUBackwardTiled[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
- func SwiGLUForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func SwiGLUForwardTiled[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
- func SystolicBackward[T Numeric](n *VolumetricNetwork, s *SystolicState[T], gradOutput *Tensor[T]) (gradIn *Tensor[T], layerGradients [][2]*Tensor[T], err error)
- func TargetPropForward[T Numeric](n *VolumetricNetwork, s *TargetPropState[T], input *Tensor[T]) *Tensor[T]
- type TensorInfo
- type TensorMeta
- type TensorWithShape
- type TimeWindow
- type Tokenizer
- type TokenizerJSON
- type TrainingBatch
- type TrainingConfig
- type TrainingMetrics
- type TrainingResult
- type Transformer
- func (t *Transformer[T]) EnableTiling(tileSize int)
- func (t *Transformer[T]) ForwardTokenIDsWGPU(tokens []uint32, input *Tensor[T], computeLogits bool, onlyLast bool) (*Tensor[T], error)
- func (t *Transformer[T]) ForwardWGPU(input *Tensor[T]) (*Tensor[T], error)
- func (t *Transformer[T]) Generate(encode func(text string) []uint32, decode func(tokens []uint32) string, ...) string
- func (t *Transformer[T]) Reset()
- func (t *Transformer[T]) SyncToGPU() error
- type Turn
- type VolumetricLayer
- func CreateResidualGraft(main *VolumetricNetwork) *VolumetricLayer
- func GraftNetworksPolymorphic(networks []*VolumetricNetwork, combineMode string) (*VolumetricLayer, error)
- func ReconstructCNNLayer(name string, tensors []DetectedTensor, ltype LayerType) (*VolumetricLayer, error)
- func ReconstructLayerNormLayer(name string, tensors []DetectedTensor, dModel int) (*VolumetricLayer, error)
- func ReconstructMHALayer(name string, tensors []DetectedTensor, dModel int, numHeads int) (*VolumetricLayer, error)
- func ReconstructRMSNormLayer(name string, tensors []DetectedTensor, dModel int) (*VolumetricLayer, error)
- func ReconstructSwiGLULayer(name string, tensors []DetectedTensor, dModel int) (*VolumetricLayer, error)
- type VolumetricNetwork
- func BuildCNN(inputSize, numClasses int, dtype DType) *VolumetricNetwork
- func BuildNetworkFromJSON(jsonData []byte) (*VolumetricNetwork, error)
- func BuildRandomNetwork(depth, rows, cols, lpc int, dModel int) *VolumetricNetwork
- func BuildSequentialNetwork(numLayers int, dModel int, act ActivationType, dtype DType) *VolumetricNetwork
- func BuildTransformerNetwork(numBlocks int, dModel int, numHeads int, dtype DType) *VolumetricNetwork
- func DeserializeNetwork(jsonData []byte) (*VolumetricNetwork, error)
- func LoadUniversal(path string) (*VolumetricNetwork, error)
- func MountGeometrically(archs []LayerArchetype, geoms []TensorMeta) *VolumetricNetwork
- func NEATMutate(n *VolumetricNetwork, cfg NEATConfig) *VolumetricNetwork
- func NewVolumetricNetwork(depth, rows, cols, layersPerCell int) *VolumetricNetwork
- func SpliceDNA(parentA, parentB *VolumetricNetwork, cfg SpliceConfig) *VolumetricNetwork
- func (n *VolumetricNetwork) CalculateTotalMemory() int
- func (n *VolumetricNetwork) GetIndex(z, y, x, l int) int
- func (n *VolumetricNetwork) GetLayer(z, y, x, l int) *VolumetricLayer
- func (n *VolumetricNetwork) GetMethodSignature(methodName string) (string, error)
- func (n *VolumetricNetwork) GetMethods() ([]MethodInfo, error)
- func (n *VolumetricNetwork) GetMethodsJSON() (string, error)
- func (n *VolumetricNetwork) HasMethod(methodName string) bool
- func (n *VolumetricNetwork) InitCNNCell(z, y, x, l int, ltype LayerType, inChannels, filters, kSize int, dtype DType, ...)
- func (n *VolumetricNetwork) InitConvTransposedCell(z, y, x, l int, ltype LayerType, inChannels, filters, kSize int, dtype DType, ...)
- func (n *VolumetricNetwork) InitDenseCell(z, y, x, l int, dModel int, act ActivationType, scale float32)
- func (n *VolumetricNetwork) InitEmbeddingCell(z, y, x, l int, vocabSize, dModel int, dtype DType)
- func (n *VolumetricNetwork) InitKMeansCell(z, y, x, l int, numClusters, dModel int, dtype DType)
- func (n *VolumetricNetwork) InitLSTMCell(z, y, x, l int, dModel int, scale float32)
- func (n *VolumetricNetwork) InitLayerNormCell(z, y, x, l int, size int, dtype DType)
- func (n *VolumetricNetwork) InitMHACell(z, y, x, l int, dModel, numHeads int, scale float32)
- func (n *VolumetricNetwork) InitRNNCell(z, y, x, l int, dModel int, scale float32)
- func (n *VolumetricNetwork) InitWGPU() error
- func (n *VolumetricNetwork) ListMethods() []string
- func (n *VolumetricNetwork) SyncAllToGPU() error
- func (n *VolumetricNetwork) SyncToGPU() error
- type WGPUActivationParams
- type WGPUApplyGradientsParams
- type WGPUCNN1BackwardParams
- type WGPUCNN1Params
- type WGPUCNN2BackwardParams
- type WGPUCNN2Params
- type WGPUCNN3BackwardParams
- type WGPUCNN3Params
- type WGPUContext
- func (c *WGPUContext) BeginFrame() error
- func (c *WGPUContext) CreateComputePipeline(shaderSource string) (*wgpu.ComputePipeline, error)
- func (c *WGPUContext) CreatePersistentBuffer(data []float32, label string) (*wgpu.Buffer, error)
- func (c *WGPUContext) DispatchActivation(size int, act ActivationType, inputBuf, outputBuf *wgpu.Buffer) error
- func (c *WGPUContext) DispatchActivationBackward(size int, act ActivationType, gradOutBuf, preActBuf, gradInBuf *wgpu.Buffer) error
- func (c *WGPUContext) DispatchApplyGradients(size int, lr float32, weightBuf, gradBuf *wgpu.Buffer) error
- func (c *WGPUContext) DispatchBackwardLayer(l *VolumetricLayer, batchSize int, ...) error
- func (c *WGPUContext) DispatchCNN1(batchSize, inC, inL, outC, outL, kSize, stride, padding int, ...) error
- func (c *WGPUContext) DispatchCNN1BackwardDW(batchSize, inC, inL, filters, outL, kSize, stride, padding int, ...) error
- func (c *WGPUContext) DispatchCNN1BackwardDX(batchSize, inC, inL, filters, outL, kSize, stride, padding int, ...) error
- func (c *WGPUContext) DispatchCNN2(...) error
- func (c *WGPUContext) DispatchCNN2BackwardDW(batchSize, inC, inH, inW, filters, outH, outW, kSize, stride, padding int, ...) error
- func (c *WGPUContext) DispatchCNN2BackwardDX(batchSize, inC, inH, inW, filters, outH, outW, kSize, stride, padding int, ...) error
- func (c *WGPUContext) DispatchCNN3(...) error
- func (c *WGPUContext) DispatchCNN3BackwardDW(...) error
- func (c *WGPUContext) DispatchCNN3BackwardDX(...) error
- func (c *WGPUContext) DispatchDense(batchSize, inputSize, outputSize int, ...) error
- func (c *WGPUContext) DispatchDenseBackwardDW(batchSize, inputSize, outputSize int, ...) error
- func (c *WGPUContext) DispatchDenseBackwardDX(batchSize, inputSize, outputSize int, ...) error
- func (c *WGPUContext) DispatchDenseQ4(batchSize, inputSize, outputSize int, ...) error
- func (c *WGPUContext) DispatchEmbedding(vocabSize, hiddenSize, numTokens int, ...) error
- func (c *WGPUContext) DispatchEmbeddingBackward(vocabSize, hiddenSize, numTokens int, ...) error
- func (c *WGPUContext) DispatchForwardLayer(l *VolumetricLayer, batchSize int, inputBuf, outBuf *wgpu.Buffer) error
- func (c *WGPUContext) DispatchKVUpdate(offset, headDim, maxSeqLen, numKVHeads, numTokens int, ...) error
- func (c *WGPUContext) DispatchLSTMStep(batchSize, inputSize, hiddenSize int, ...) error
- func (c *WGPUContext) DispatchMHA(numHeads, numKVHeads, headDim, seqLen, kvOffset, maxSeqLen int, ...) error
- func (c *WGPUContext) DispatchMHABackward(batchSize, numHeads, numKVHeads, headDim, seqLen int, scale float32, ...) error
- func (c *WGPUContext) DispatchMSEGradPartialLoss(size int, outputBuf, targetBuf, gradBuf, partialsBuf *wgpu.Buffer) error
- func (c *WGPUContext) DispatchRMSNorm(batchSize, size int, epsilon float32, ...) error
- func (c *WGPUContext) DispatchRMSNormBackward(batchSize, size int, epsilon float32, ...) error
- func (c *WGPUContext) DispatchRNNStep(batchSize, inputSize, hiddenSize int, ...) error
- func (c *WGPUContext) DispatchResidual(size int, inputBuf, residualBuf *wgpu.Buffer) error
- func (c *WGPUContext) DispatchResidualBackward(size int, gradOutputBuf, gradInputBuf, gradResidualBuf *wgpu.Buffer) error
- func (c *WGPUContext) DispatchRoPE(seqLen, headDim, numHeads, offset int, theta float32, targetBuf *wgpu.Buffer) error
- func (c *WGPUContext) DispatchSwiGLU(batchSize, inputSize, outputSize int, ...) error
- func (c *WGPUContext) DispatchSwiGLUBackward(batchSize, inputSize, outputSize int, ...) error
- func (c *WGPUContext) DispatchSwiGLUQ4(batchSize, inputSize, outputSize int, ...) error
- func (c *WGPUContext) FlushFrame()
- func (c *WGPUContext) GetActivationBuffer(name string, size uint64, usage wgpu.BufferUsage) *wgpu.Buffer
- func (c *WGPUContext) GetBindGroup(pipeline *wgpu.ComputePipeline, buffers ...*wgpu.Buffer) (*wgpu.BindGroup, error)
- func (c *WGPUContext) GetUniformBuffer(size uint64) *wgpu.Buffer
- func (c *WGPUContext) ReadBuffer(buf *wgpu.Buffer) ([]float32, error)
- func (c *WGPUContext) Release()
- func (c *WGPUContext) ResetCache()
- type WGPUDenseParams
- type WGPUEmbeddingParams
- type WGPUKVParams
- type WGPULSTMParams
- type WGPULossParams
- type WGPUMHABackwardParams
- type WGPUMHAParams
- type WGPURMSNormParams
- type WGPURNNParams
- type WGPURoPEParams
- type WeightStore
- func (ws *WeightStore) ApplyGradients(gradWeights *Tensor[float32], lr float32)
- func (ws *WeightStore) GetActive(dtype DType) any
- func (ws *WeightStore) Morph(dtype DType)
- func (ws *WeightStore) Randomize(seed int64, scale float32)
- func (ws *WeightStore) SetVersion(dtype DType, data any)
- func (ws *WeightStore) SizeInBytes(dtype DType) int
- func (ws *WeightStore) Unpack(dtype DType)
Constants ¶
const ShaderActivationBackward = `` /* 972-byte string literal not displayed */
const ShaderActivationForward = `` /* 789-byte string literal not displayed */
const ShaderApplyGradients = `` /* 486-byte string literal not displayed */
const ShaderCNN1 = `` /* 1288-byte string literal not displayed */
const ShaderCNN1BackwardDW = `
struct Params {
batchSize: u32,
inC: u32,
inL: u32,
filters: u32,
outL: u32,
kSize: u32,
stride: u32,
padding: u32,
activation: u32,
};
@group(0) @binding(0) var<uniform> params: Params;
@group(0) @binding(1) var<storage, read> gradOutput: array<f32>;
@group(0) @binding(2) var<storage, read> input: array<f32>;
@group(0) @binding(3) var<storage, read> preAct: array<f32>;
@group(0) @binding(4) var<storage, read_write> gradWeights: array<f32>;
` + wgslActivateDerivative + `
@compute @workgroup_size(64, 1, 1)
fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
let tid = global_id.x;
if (tid >= params.filters * params.inC * params.kSize) { return; }
let f = tid / (params.inC * params.kSize);
let rem = tid % (params.inC * params.kSize);
let ic = rem / params.kSize;
let k = rem % params.kSize;
var sum: f32 = 0.0;
for (var b: u32 = 0u; b < params.batchSize; b++) {
for (var o: u32 = 0u; o < params.outL; o++) {
let inPos = i32(o * params.stride) + i32(k) - i32(params.padding);
if (inPos >= 0 && inPos < i32(params.inL)) {
let outIdx = b * params.filters * params.outL + f * params.outL + o;
let dy = gradOutput[outIdx] * activateDerivative(preAct[outIdx], params.activation);
let inIdx = b * params.inC * params.inL + ic * params.inL + u32(inPos);
sum += dy * input[inIdx];
}
}
}
gradWeights[tid] += sum;
}
`
const ShaderCNN1BackwardDX = `
struct Params {
batchSize: u32,
inC: u32,
inL: u32,
filters: u32,
outL: u32,
kSize: u32,
stride: u32,
padding: u32,
activation: u32,
};
@group(0) @binding(0) var<uniform> params: Params;
@group(0) @binding(1) var<storage, read> gradOutput: array<f32>;
@group(0) @binding(2) var<storage, read> weights: array<f32>;
@group(0) @binding(3) var<storage, read> preAct: array<f32>;
@group(0) @binding(4) var<storage, read_write> gradInput: array<f32>;
` + wgslActivateDerivative + `
@compute @workgroup_size(64, 1, 1)
fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
let tid = global_id.x;
if (tid >= params.batchSize * params.inC * params.inL) { return; }
let b = tid / (params.inC * params.inL);
let rem = tid % (params.inC * params.inL);
let ic = rem / params.inL;
let ip = rem % params.inL;
var sum: f32 = 0.0;
for (var f: u32 = 0u; f < params.filters; f++) {
for (var k: u32 = 0u; k < params.kSize; k++) {
let val = i32(ip) + i32(params.padding) - i32(k);
if (val >= 0 && val % i32(params.stride) == 0) {
let o = u32(val / i32(params.stride));
if (o < params.outL) {
let outIdx = b * params.filters * params.outL + f * params.outL + o;
let dy = gradOutput[outIdx] * activateDerivative(preAct[outIdx], params.activation);
let kWIdx = f * params.inC * params.kSize + ic * params.kSize + k;
sum += dy * weights[kWIdx];
}
}
}
}
gradInput[tid] += sum;
}
`
const ShaderCNN2 = `` /* 2150-byte string literal not displayed */
const ShaderCNN2BackwardDW = `
struct Params {
batchSize: u32,
inC: u32,
inH: u32,
inW: u32,
filters: u32,
outH: u32,
outW: u32,
kSize: u32,
stride: u32,
padding: u32,
activation: u32,
};
@group(0) @binding(0) var<uniform> params: Params;
@group(0) @binding(1) var<storage, read> gradOutput: array<f32>;
@group(0) @binding(2) var<storage, read> input: array<f32>;
@group(0) @binding(3) var<storage, read> preAct: array<f32>;
@group(0) @binding(4) var<storage, read_write> gradWeights: array<f32>;
` + wgslActivateDerivative + `
@compute @workgroup_size(64, 1, 1)
fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
let tid = global_id.x;
let weightSize = params.filters * params.inC * params.kSize * params.kSize;
if (tid >= weightSize) { return; }
let f = tid / (params.inC * params.kSize * params.kSize);
let rem = tid % (params.inC * params.kSize * params.kSize);
let ic = rem / (params.kSize * params.kSize);
let rem2 = rem % (params.kSize * params.kSize);
let kh = rem2 / params.kSize;
let kw = rem2 % params.kSize;
var sum: f32 = 0.0;
for (var b: u32 = 0u; b < params.batchSize; b++) {
for (var oh: u32 = 0u; oh < params.outH; oh++) {
for (var ow: u32 = 0u; ow < params.outW; ow++) {
let ih = i32(oh * params.stride) + i32(kh) - i32(params.padding);
let iw = i32(ow * params.stride) + i32(kw) - i32(params.padding);
if (ih >= 0 && ih < i32(params.inH) && iw >= 0 && iw < i32(params.inW)) {
let outIdx = ((b * params.filters + f) * params.outH + oh) * params.outW + ow;
let dy = gradOutput[outIdx] * activateDerivative(preAct[outIdx], params.activation);
let inIdx = ((b * params.inC + ic) * params.inH + u32(ih)) * params.inW + u32(iw);
sum += dy * input[inIdx];
}
}
}
}
gradWeights[tid] += sum;
}
`
const ShaderCNN2BackwardDX = `
struct Params {
batchSize: u32,
inC: u32,
inH: u32,
inW: u32,
filters: u32,
outH: u32,
outW: u32,
kSize: u32,
stride: u32,
padding: u32,
activation: u32,
};
@group(0) @binding(0) var<uniform> params: Params;
@group(0) @binding(1) var<storage, read> gradOutput: array<f32>;
@group(0) @binding(2) var<storage, read> weights: array<f32>;
@group(0) @binding(3) var<storage, read> preAct: array<f32>;
@group(0) @binding(4) var<storage, read_write> gradInput: array<f32>;
` + wgslActivateDerivative + `
@compute @workgroup_size(64, 1, 1)
fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
let tid = global_id.x;
let size = params.batchSize * params.inC * params.inH * params.inW;
if (tid >= size) { return; }
let b = tid / (params.inC * params.inH * params.inW);
let rem = tid % (params.inC * params.inH * params.inW);
let ic = rem / (params.inH * params.inW);
let rem2 = rem % (params.inH * params.inW);
let ih = rem2 / params.inW;
let iw = rem2 % params.inW;
var sum: f32 = 0.0;
for (var f: u32 = 0u; f < params.filters; f++) {
for (var kh: u32 = 0u; kh < params.kSize; kh++) {
for (var kw: u32 = 0u; kw < params.kSize; kw++) {
let vh = i32(ih) + i32(params.padding) - i32(kh);
let vw = i32(iw) + i32(params.padding) - i32(kw);
if (vh >= 0 && vh % i32(params.stride) == 0 && vw >= 0 && vw % i32(params.stride) == 0) {
let oh = u32(vh / i32(params.stride));
let ow = u32(vw / i32(params.stride));
if (oh < params.outH && ow < params.outW) {
let outIdx = ((b * params.filters + f) * params.outH + oh) * params.outW + ow;
let dy = gradOutput[outIdx] * activateDerivative(preAct[outIdx], params.activation);
let kWIdx = ((f * params.inC + ic) * params.kSize + kh) * params.kSize + kw;
sum += dy * weights[kWIdx];
}
}
}
}
}
gradInput[tid] += sum;
}
`
const ShaderCNN3 = `` /* 2547-byte string literal not displayed */
const ShaderCNN3BackwardDW = `
struct Params {
batchSize: u32,
inC: u32,
inD: u32,
inH: u32,
inW: u32,
filters: u32,
outD: u32,
outH: u32,
outW: u32,
kSize: u32,
stride: u32,
padding: u32,
activation: u32,
};
@group(0) @binding(0) var<uniform> params: Params;
@group(0) @binding(1) var<storage, read> gradOutput: array<f32>;
@group(0) @binding(2) var<storage, read> input: array<f32>;
@group(0) @binding(3) var<storage, read> preAct: array<f32>;
@group(0) @binding(4) var<storage, read_write> gradWeights: array<f32>;
` + wgslActivateDerivative + `
@compute @workgroup_size(64, 1, 1)
fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
let tid = global_id.x;
let kVol = params.kSize * params.kSize * params.kSize;
let weightSize = params.filters * params.inC * kVol;
if (tid >= weightSize) { return; }
let f = tid / (params.inC * kVol);
let rem = tid % (params.inC * kVol);
let ic = rem / kVol;
let rem2 = rem % kVol;
let kd = rem2 / (params.kSize * params.kSize);
let rem3 = rem2 % (params.kSize * params.kSize);
let kh = rem3 / params.kSize;
let kw = rem3 % params.kSize;
var sum: f32 = 0.0;
for (var b: u32 = 0u; b < params.batchSize; b++) {
for (var od: u32 = 0u; od < params.outD; od++) {
for (var oh: u32 = 0u; oh < params.outH; oh++) {
for (var ow: u32 = 0u; ow < params.outW; ow++) {
let id = i32(od * params.stride) + i32(kd) - i32(params.padding);
let ih = i32(oh * params.stride) + i32(kh) - i32(params.padding);
let iw = i32(ow * params.stride) + i32(kw) - i32(params.padding);
if (id >= 0 && id < i32(params.inD) &&
ih >= 0 && ih < i32(params.inH) &&
iw >= 0 && iw < i32(params.inW)) {
let outIdx = (((b * params.filters + f) * params.outD + od) * params.outH + oh) * params.outW + ow;
let dy = gradOutput[outIdx] * activateDerivative(preAct[outIdx], params.activation);
let inIdx = (((b * params.inC + ic) * params.inD + u32(id)) * params.inH + u32(ih)) * params.inW + u32(iw);
sum += dy * input[inIdx];
}
}
}
}
}
gradWeights[tid] += sum;
}
`
const ShaderCNN3BackwardDX = `
struct Params {
batchSize: u32,
inC: u32,
inD: u32,
inH: u32,
inW: u32,
filters: u32,
outD: u32,
outH: u32,
outW: u32,
kSize: u32,
stride: u32,
padding: u32,
activation: u32,
};
@group(0) @binding(0) var<uniform> params: Params;
@group(0) @binding(1) var<storage, read> gradOutput: array<f32>;
@group(0) @binding(2) var<storage, read> weights: array<f32>;
@group(0) @binding(3) var<storage, read> preAct: array<f32>;
@group(0) @binding(4) var<storage, read_write> gradInput: array<f32>;
` + wgslActivateDerivative + `
@compute @workgroup_size(64, 1, 1)
fn main(@builtin(global_invocation_id) global_id: vec3<u32>) {
let tid = global_id.x;
let inVol = params.inD * params.inH * params.inW;
let size = params.batchSize * params.inC * inVol;
if (tid >= size) { return; }
let b = tid / (params.inC * inVol);
let rem = tid % (params.inC * inVol);
let ic = rem / inVol;
let rem2 = rem % inVol;
let id = rem2 / (params.inH * params.inW);
let rem3 = rem2 % (params.inH * params.inW);
let ih = rem3 / params.inW;
let iw = rem3 % params.inW;
var sum: f32 = 0.0;
for (var f: u32 = 0u; f < params.filters; f++) {
for (var kd: u32 = 0u; kd < params.kSize; kd++) {
for (var kh: u32 = 0u; kh < params.kSize; kh++) {
for (var kw: u32 = 0u; kw < params.kSize; kw++) {
let vd = i32(id) + i32(params.padding) - i32(kd);
let vh = i32(ih) + i32(params.padding) - i32(kh);
let vw = i32(iw) + i32(params.padding) - i32(kw);
if (vd >= 0 && vd % i32(params.stride) == 0 &&
vh >= 0 && vh % i32(params.stride) == 0 &&
vw >= 0 && vw % i32(params.stride) == 0) {
let od = u32(vd / i32(params.stride));
let oh = u32(vh / i32(params.stride));
let ow = u32(vw / i32(params.stride));
if (od < params.outD && oh < params.outH && ow < params.outW) {
let outIdx = (((b * params.filters + f) * params.outD + od) * params.outH + oh) * params.outW + ow;
let dy = gradOutput[outIdx] * activateDerivative(preAct[outIdx], params.activation);
let kWIdx = (((f * params.inC + ic) * params.kSize + kd) * params.kSize + kh) * params.kSize + kw;
sum += dy * weights[kWIdx];
}
}
}
}
}
}
gradInput[tid] += sum;
}
`
const ShaderEmbedding = `` /* 806-byte string literal not displayed */
const ShaderEmbeddingBackward = `` /* 1300-byte string literal not displayed */
const ShaderKVUpdate = `` /* 953-byte string literal not displayed */
const ShaderLSTMStep = `` /* 2344-byte string literal not displayed */
const ShaderMHABackward = `` /* 3201-byte string literal not displayed */
const ShaderMSEGradPartialLoss = `` /* 1177-byte string literal not displayed */
ShaderMSEGradPartialLoss computes MSE gradients and partial loss sums entirely on GPU. Each workgroup of 256 threads reduces its elements, writing one partial sum to partials[wg_id.x]. CPU sums the partials array (ceil(N/256) floats) for the total loss — no full-output readback needed.
const ShaderRMSNorm = `` /* 1228-byte string literal not displayed */
const ShaderRMSNormBackward = `` /* 1992-byte string literal not displayed */
const ShaderRNNStep = `` /* 1270-byte string literal not displayed */
const ShaderResidualAdd = `` /* 425-byte string literal not displayed */
const ShaderResidualBackward = `` /* 537-byte string literal not displayed */
const ShaderRoPE = `` /* 1067-byte string literal not displayed */
const ShaderSwiGLUBackward = `` /* 955-byte string literal not displayed */
Variables ¶
var ( // ChatML is used by Qwen, SmolLM2, etc. ChatML = Template{ Name: "chatml", RolePrefixes: map[string]string{ "system": "<|im_start|>system\n", "user": "<|im_start|>user\n", "assistant": "<|im_start|>assistant\n", }, RoleSuffixes: map[string]string{ "system": "<|im_end|>\n", "user": "<|im_end|>\n", "assistant": "<|im_end|>\n", }, } // Llama3 markers Llama3 = Template{ Name: "llama3", RolePrefixes: map[string]string{ "system": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n", "user": "<|start_header_id|>user<|end_header_id|>\n\n", "assistant": "<|start_header_id|>assistant<|end_header_id|>\n\n", }, RoleSuffixes: map[string]string{ "system": "<|eot_id|>", "user": "<|eot_id|>", "assistant": "<|eot_id|>", }, } )
Preset templates
var BrainTypeNames = []string{
"Dense", "MHA", "SwiGLU", "RMSNorm", "RNN", "LSTM", "LayerNorm",
"Embedding", "KMeans", "Softmax", "Parallel", "Sequential",
}
var UserHints = make(map[int]LayerType)
UserHints allows manual mapping for ambiguous tensor indices.
Functions ¶
func Activate ¶
func Activate[T Numeric](v T, act ActivationType) T
Activate applies the activation function to a value.
func ActivateDerivative ¶
func ActivateDerivative[T Numeric](v T, act ActivationType) T
ActivateDerivative returns the derivative of the activation function.
func AlignedFloat32 ¶
AlignedFloat32 allocates a slice of float32 aligned to 64-byte boundaries.
func ApplyRecursiveGradients ¶
func ApplyRecursiveGradients(layer *VolumetricLayer, gradWeights *Tensor[float32], lr float32)
ApplyRecursiveGradients traverses the layer hierarchy and updates weights in all nested WeightStores.
func ApplyTargetPropGaps ¶
func ApplyTargetPropGaps[T Numeric](n *VolumetricNetwork, s *TargetPropState[T], lr float32)
ApplyTargetPropGaps assigns weight updates based on configuration.
func BindGroupKeyHash ¶
func BindGroupKeyHash(pipeline *wgpu.ComputePipeline, buffers ...*wgpu.Buffer) uint64
BindGroupKeyHash generates a stable hash for a set of buffers and a pipeline.
func CalculateLoss ¶
CalculateLoss computes the loss between output and target.
func CalculateOptimalGPUTileSizeFromLimits ¶
CalculateOptimalGPUTileSizeFromLimits derives the best GPU tiling size from raw WebGPU Limits.
sharedMemBytes = adapter.GetLimits().Limits.MaxComputeWorkgroupStorageSize maxInvocations = adapter.GetLimits().Limits.MaxComputeInvocationsPerWorkgroup headDim = model head dimension (e.g. 64, 128)
Logic: each tile row costs headDim*2*4 bytes (K+V, float32). We use at most half of shared mem so the driver has spill room. Result is clamped to [8, 64] and aligned to 8 to match the WGSL shader workgroup size.
func CalculateOptimalTileSize ¶
CalculateOptimalTileSize returns a tile size that fits the working set in L1/L2. For MHA: Working set = TileSize * headDim * 2 * 4 (K and V tiles in float32)
func CastWeights ¶
CastWeights is a universal utility to extract and cast weight slices from the polymorphic WeightStore. It is the "Universal Converter" that allows any layer type (Dense, CNN, MHA) to access weights in their required numeric type on-the-fly.
func ComputeSilhouetteScore ¶
ComputeSilhouetteScore calculates the mean Silhouette Coefficient of all samples.
func ConvertSlice ¶
convertSlice is a private helper for the CastWeights generic engine.
func CosineDistance ¶
CosineDistance computes the semantic distance (1 - cosine similarity) between vectors.
func CosineSimilarity ¶
func CosineSimilarity(s1, s2 LayerSignature) float32
CosineSimilarity acts as the "slider" (-1.0 to 1.0) for comparing two layer signatures.
func DequantizeQ4_0 ¶
DequantizeQ4_0 converts Q4_0 blocks back to f32.
func EuclideanDistance ¶
EuclideanDistance computes the distance between a Numeric slice and a float32 centroid.
func EuclideanDistanceT ¶
EuclideanDistanceT computes distance between two Numeric slices.
func GetDeviceDescription ¶
func GetDeviceDescription(net *VolumetricNetwork) string
GetDeviceDescription returns a human-readable string of the running OS, CPU, RAM, and GPU.
func GroupRelatedTensors ¶
func GroupRelatedTensors(detected []DetectedTensor) map[string][]DetectedTensor
GroupRelatedTensors identifies groups of tensors that belong to the same complex layer.
func HierarchicalGroup ¶
HierarchicalGroup performs a simple agglomerative grouping until a distance threshold is met.
func KMeansCluster ¶
func KMeansCluster[T Numeric](data []*Tensor[T], k int, maxIter int, parallel bool) (centroids [][]float32, assignments []int)
KMeansCluster performs K-means clustering on a set of tensors.
func LoadSafetensors ¶
LoadSafetensors reads a safetensors file and returns tensors by name
func LoadSafetensorsFromBytes ¶
LoadSafetensorsFromBytes reads safetensors data from a byte slice and returns tensors by name
func LoadSafetensorsWithShapes ¶
func LoadSafetensorsWithShapes(data []byte) (map[string]TensorWithShape, error)
LoadSafetensorsWithShapes loads safetensors and returns both values and shapes
func LoadUniversalDetailed ¶
func LoadUniversalDetailed(path string) (int, []LayerArchetype, []int, []TensorMeta, error)
LoadUniversalDetailed performs a deep analysis of a safetensors file.
func LoadWithPrefixes ¶
func LoadWithPrefixes(net *VolumetricNetwork, tensors map[string][]float32) error
LoadWithPrefixes loads weights into a VolumetricNetwork by interpreting layer indices and prefixes
func MajorityVote ¶
MajorityVote performs hard-voting across multiple model outputs (class indices).
func MorphLayer ¶
func MorphLayer(layer *VolumetricLayer, target DType) error
MorphLayer performs an on-the-fly conversion of a layer's weights to a new DType.
func MultiNetworkEvaluation ¶
func MultiNetworkEvaluation[T Numeric](models map[string]*VolumetricNetwork, inputs []*Tensor[T], expected []float64) (map[string]*DeviationMetrics, error)
MultiNetworkEvaluation benchmarks multiple models on the same data.
func PerformanceSimilarity ¶
func PerformanceSimilarity(mA, mB ModelPerformance) float64
PerformanceSimilarity calculates cosine similarity between two model masks.
func PrintEnsembleReport ¶
func PrintEnsembleReport(matches []EnsembleMatch, topN int)
PrintEnsembleReport generates a human-readable summary of the best matches.
func PrintMultiNetworkSummary ¶
func PrintMultiNetworkSummary(results map[string]*DeviationMetrics)
func SampleTopK ¶
SampleTopK performs top-K sampling with temperature and optional determinism
func SerializeNetwork ¶
func SerializeNetwork(net *VolumetricNetwork) ([]byte, error)
SerializeNetwork converts a VolumetricNetwork into a JSON byte slice.
func ShaderDenseBackwardDW ¶ added in v0.73.0
ShaderDenseBackwardDW calculates gradWeights = gradOutput^T * input dw = dy^T * x => dw[o, i] = sum_b dy[b, o] * x[b, i]
func ShaderDenseBackwardDX ¶ added in v0.73.0
ShaderDenseBackwardDX calculates gradInput = gradOutput * weights dx = dy * W^T => dx[b, i] = sum_o dy[b, o] * W[o, i]
func ShaderTiledDenseN ¶
func ShaderTiledDenseQ4 ¶
ShaderTiledDenseN generates a tiled dense (matmul) shader for the given tile size. The tile size is baked into the WGSL workgroup array and @workgroup_size. ShaderTiledDenseQ4 generates a tiled dense shader that dequantizes 4-bit weights on the fly. Block size is 32: 1 f32 scale + 16 bytes (32 nibbles).
func ShaderTiledMHAN ¶
ShaderTiledMHAN generates a tiled MHA shader for the given tile size and headDim. Both are baked in as WGSL compile-time constants.
func ShaderTiledSwiGLUN ¶
ShaderTiledSwiGLUN generates a tiled SwiGLU shader for the given tile size.
func ShaderTiledSwiGLUQ4 ¶
ShaderTiledSwiGLUQ4 generates a tiled SwiGLU shader with Q4_0 weights.
func SimulatePrecision ¶
SimulatePrecision handles the numerical simulation of low-bit and non-standard types. It is the universal "Metamorphosis" engine used across Dense, CNN, and RNN layers.
func SoftmaxBackward ¶
SoftmaxBackward is a helper for Softmax Jacobian
func SoftmaxEntmaxHelper ¶
SoftmaxEntmaxHelper implements entmax-1.5 approximation
func SoftmaxSparseHelper ¶
SoftmaxSparseHelper implements sparsemax
func SystolicApplyTargetProp ¶
func SystolicApplyTargetProp[T Numeric](n *VolumetricNetwork, s *SystolicState[T], globalTarget *Tensor[T], lr float32)
SystolicApplyTargetProp bridges the Systolic state with the Target Propagation machinery. It uses the core 'Gap-Bridging' logic to update weights across the volumetric mesh.
func SystolicForward ¶
func SystolicForward[T Numeric](n *VolumetricNetwork, s *SystolicState[T], captureHistory bool) time.Duration
SystolicForward executes one "Clock Cycle" across the entire 3D grid. Every layer processes its current input buffer and writes to the next buffer.
func TargetPropBackward ¶
func TargetPropBackward[T Numeric](n *VolumetricNetwork, s *TargetPropState[T], target *Tensor[T])
TargetPropBackward generates targets or gradients from the output back to the input.
func TargetPropBackwardChainRule ¶
func TargetPropBackwardChainRule[T Numeric](n *VolumetricNetwork, s *TargetPropState[T], target *Tensor[T])
TargetPropBackwardChainRule uses standard gradients to shift targets.
func TargetPropBackwardTargetProp ¶
func TargetPropBackwardTargetProp[T Numeric](n *VolumetricNetwork, s *TargetPropState[T], target *Tensor[T])
TargetPropBackwardTargetProp uses true Target Propagation (without derivatives).
Types ¶
type ActivationType ¶
type ActivationType int
ActivationType defines the activation function
const ( ActivationReLU ActivationType = 0 ActivationSilu ActivationType = 1 ActivationGELU ActivationType = 2 ActivationTanh ActivationType = 3 ActivationSigmoid ActivationType = 4 ActivationLinear ActivationType = -1 )
func ParseActivationType ¶
func ParseActivationType(s string) ActivationType
ParseActivationType converts a string to an ActivationType.
func (ActivationType) String ¶
func (a ActivationType) String() string
type AdaptationResult ¶
type AdaptationResult struct {
ModelName string `json:"model_name"`
ModeName string `json:"mode_name"`
TotalOutputs int `json:"total_outputs"`
AvgAccuracy float64 `json:"avg_accuracy"`
Windows []TimeWindow `json:"windows"`
TaskChanges []TaskChange `json:"task_changes"`
Duration time.Duration `json:"duration"`
}
type AdaptationTracker ¶
type AdaptationTracker struct {
// contains filtered or unexported fields
}
func NewAdaptationTracker ¶
func NewAdaptationTracker(winDur, totalDur time.Duration) *AdaptationTracker
func (*AdaptationTracker) Finalize ¶
func (at *AdaptationTracker) Finalize() *AdaptationResult
func (*AdaptationTracker) RecordOutput ¶
func (at *AdaptationTracker) RecordOutput(correct bool)
func (*AdaptationTracker) Start ¶
func (at *AdaptationTracker) Start(initialTask string, initialTaskID int)
type AggregatingObserver ¶
type AggregatingObserver struct {
WindowSize int
History []LayerStats
Events []PolyLayerEvent
// contains filtered or unexported fields
}
AggregatingObserver collects statistics over time windows.
func NewAggregatingObserver ¶
func NewAggregatingObserver(windowSize int) *AggregatingObserver
func (*AggregatingObserver) OnBackward ¶
func (o *AggregatingObserver) OnBackward(e PolyLayerEvent)
func (*AggregatingObserver) OnForward ¶
func (o *AggregatingObserver) OnForward(e PolyLayerEvent)
type ArchConfig ¶
type ArchConfig struct {
ID int `json:"id"`
Name string `json:"name"`
GridDepth int `json:"gridDepth"`
GridRows int `json:"gridRows"`
GridCols int `json:"gridCols"`
LayersPerCell int `json:"layersPerCell"`
DModel int `json:"dModel"`
NumHeads int `json:"numHeads"`
Activation ActivationType `json:"activation"`
DType DType `json:"dtype"`
InitScale float32 `json:"initScale"`
}
type BindGroupKey ¶
type BindGroupKey struct {
Pipeline *wgpu.ComputePipeline
Buffers []*wgpu.Buffer
}
BindGroupKey is used for the BindGroupCache
type ComparisonResult ¶
type ComparisonResult struct {
Name string `json:"name"`
NumLayers int `json:"num_layers"`
Methods map[string]TrainingMetrics `json:"methods"`
}
ComparisonResult holds results from comparing multiple training methods.
func NewComparisonResult ¶
func NewComparisonResult(name string, numLayers int) *ComparisonResult
NewComparisonResult initializes aComparisonResult.
func (*ComparisonResult) DetermineBest ¶
func (cr *ComparisonResult) DetermineBest() string
DetermineBest returns the name of the best performing training method.
type ConsoleObserver ¶
type ConsoleObserver struct{}
ConsoleObserver prints events to stdout.
func (*ConsoleObserver) OnBackward ¶
func (o *ConsoleObserver) OnBackward(e PolyLayerEvent)
func (*ConsoleObserver) OnForward ¶
func (o *ConsoleObserver) OnForward(e PolyLayerEvent)
type DType ¶
type DType int
DType defines the numerical type stored in a Tensor or WeightStore
const ( DTypeFloat64 DType = 0 // 64-bit double DTypeFloat32 DType = 1 // Standard 32-bit float DTypeFloat16 DType = 2 // 16-bit float DTypeBFloat16 DType = 3 // 16-bit Brain Float DTypeFP8E4M3 DType = 4 // 8-bit FP8 (E4M3) DTypeFP8E5M2 DType = 5 // 8-bit FP8 (E5M2) DTypeInt64 DType = 6 // 64-bit integer DTypeInt32 DType = 7 // 32-bit integer DTypeInt16 DType = 8 // 16-bit integer DTypeInt8 DType = 9 // 8-bit integer DTypeUint64 DType = 10 // 64-bit unsigned DTypeUint32 DType = 11 // 32-bit unsigned DTypeUint16 DType = 12 // 16-bit unsigned DTypeUint8 DType = 13 // 8-bit unsigned DTypeInt4 DType = 14 // 4-bit integer DTypeUint4 DType = 15 // 4-bit unsigned DTypeFP4 DType = 16 // 4-bit E2M1 DTypeInt2 DType = 17 // 2-bit integer DTypeUint2 DType = 18 // 2-bit unsigned DTypeTernary DType = 19 // 2-bit (Ternary: -1, 0, 1) DTypeBinary DType = 20 // 1-bit (XNOR-Net) )
type DetectedTensor ¶
type DetectedTensor struct {
Name string
Shape []int
DType string
InSize int
OutSize int
CanLoad bool
}
DetectedTensor represents a tensor found in a model file.
type DeviationBucket ¶
type DeviationBucket struct {
RangeMin float64 `json:"range_min"`
RangeMax float64 `json:"range_max"`
Count int `json:"count"`
Samples []int `json:"samples"`
}
DeviationBucket represents a specific deviation percentage range.
type DeviationMetrics ¶
type DeviationMetrics struct {
Buckets map[string]*DeviationBucket `json:"buckets"`
Score float64 `json:"score"` // 0-100 quality score
TotalSamples int `json:"total_samples"`
Failures int `json:"failures"` // 100%+ deviations
Results []PredictionResult `json:"results"`
AverageDeviation float64 `json:"avg_deviation"`
CorrectCount int `json:"correct_count"`
Accuracy float64 `json:"accuracy"`
}
DeviationMetrics stores the model performance breakdown.
func EvaluateNetworkPolymorphic ¶
func EvaluateNetworkPolymorphic[T Numeric](n *VolumetricNetwork, inputs []*Tensor[T], expected []float64) (*DeviationMetrics, error)
EvaluateNetworkPolymorphic evaluates a VolumetricNetwork across multiple inputs.
func NewDeviationMetrics ¶
func NewDeviationMetrics() *DeviationMetrics
NewDeviationMetrics initializes empty metrics.
func (*DeviationMetrics) ComputeFinalMetrics ¶
func (dm *DeviationMetrics) ComputeFinalMetrics()
ComputeFinalMetrics completes the scoring.
func (*DeviationMetrics) PrintSummary ¶
func (dm *DeviationMetrics) PrintSummary()
func (*DeviationMetrics) UpdateMetrics ¶
func (dm *DeviationMetrics) UpdateMetrics(result PredictionResult)
UpdateMetrics adds one prediction to the metrics.
type EnsembleMatch ¶
type EnsembleMatch struct {
ModelA string
ModelB string
Coverage float64 // Combined coverage (0.0 - 1.0)
Overlap float64 // Percentage of samples both got right
}
EnsembleMatch represents a pair of models that complement each other.
func FindComplementaryMatches ¶
func FindComplementaryMatches(models []ModelPerformance, minCoverage float64) []EnsembleMatch
FindComplementaryMatches identifies pairs of models whose combined coverage is maximized.
type GenOptions ¶
type GenOptions struct {
MaxTokens int
Temperature float32
TopK int
Deterministic bool
UseKVCache bool
RepetitionPenalty float32
RepetitionWindow int
EOSTokens []int
}
GenOptions defines the generation parameters
type HTTPObserver ¶
type HTTPObserver struct {
URL string
// contains filtered or unexported fields
}
HTTPObserver sends events to an HTTP endpoint.
func NewHTTPObserver ¶
func NewHTTPObserver(url string) *HTTPObserver
func (*HTTPObserver) OnBackward ¶
func (o *HTTPObserver) OnBackward(e PolyLayerEvent)
func (*HTTPObserver) OnForward ¶
func (o *HTTPObserver) OnForward(e PolyLayerEvent)
type HardwareInfo ¶
type HardwareInfo struct {
L1DataCacheSize int // in bytes
L2CacheSize int // in bytes
L3CacheSize int // in bytes
NumCPU int
}
HardwareInfo stores metadata about the running system to optimize tiling.
func GetHardwareInfo ¶
func GetHardwareInfo() HardwareInfo
GetHardwareInfo attempts to detect cache sizes and CPU info.
type LayerArchetype ¶
type LayerArchetype struct {
Type LayerType
TypeName string
Indices map[string]int
GeomMetrics map[string]int
}
LayerArchetype represents a detected structural unit in the model.
func ProbeDeepGeometry ¶
func ProbeDeepGeometry(geoms []TensorMeta) ([]LayerArchetype, []int)
ProbeDeepGeometry identifies layer patterns within a set of tensors.
type LayerSignature ¶
type LayerSignature struct {
Z, Y, X, L int
Type LayerType
DType DType
Weights []float32 // Normalized, precision-simulated weights
}
LayerSignature represents the unique 3D topological "DNA" of a layer.
type LayerSpec ¶
type LayerSpec struct {
// Position
Z int `json:"z"`
Y int `json:"y"`
X int `json:"x"`
L int `json:"l"`
// Core Type
Type string `json:"type"`
Activation string `json:"activation"`
DType string `json:"dtype"`
// Dimensions & Config
InputHeight int `json:"input_height"`
InputWidth int `json:"input_width"`
InputDepth int `json:"input_depth"`
OutputHeight int `json:"output_height"`
OutputWidth int `json:"output_width"`
OutputDepth int `json:"output_depth"`
InputChannels int `json:"input_channels"`
Filters int `json:"filters"`
KernelSize int `json:"kernel_size"`
Stride int `json:"stride"`
Padding int `json:"padding"`
NumHeads int `json:"num_heads"`
NumKVHeads int `json:"num_kv_heads"`
DModel int `json:"d_model"`
SeqLength int `json:"seq_length"`
VocabSize int `json:"vocab_size"`
EmbeddingDim int `json:"embedding_dim"`
NumClusters int `json:"num_clusters"`
OutputMode string `json:"output_mode"`
// Recursive structures
ParallelBranches []LayerSpec `json:"parallel_branches,omitempty"`
CombineMode string `json:"combine_mode,omitempty"`
SequentialLayers []LayerSpec `json:"sequential_layers,omitempty"`
UseTiling bool `json:"use_tiling,omitempty"`
TileSize int `json:"tile_size,omitempty"`
}
LayerSpec represents the JSON structure for a single layer.
type LayerStats ¶
type LayerStats struct {
Avg float32 `json:"avg"`
Max float32 `json:"max"`
Min float32 `json:"min"`
Active int `json:"active"`
Total int `json:"total"`
}
LayerStats provides summary statistics for a tensor's activations or gradients.
func ComputeLayerStats ¶
func ComputeLayerStats[T Numeric](t *Tensor[T]) LayerStats
ComputeLayerStats calculates summary statistics for a tensor.
type LayerTelemetry ¶
type LayerTelemetry struct {
// Grid position
Z int `json:"z"`
Y int `json:"y"`
X int `json:"x"`
L int `json:"l"`
// Layer info
Type string `json:"type"`
Activation string `json:"activation,omitempty"`
Parameters int `json:"parameters"`
// Dimensions
InputShape []int `json:"input_shape,omitempty"`
OutputShape []int `json:"output_shape,omitempty"`
// For nested/parallel layers
Branches []LayerTelemetry `json:"branches,omitempty"`
CombineMode string `json:"combine_mode,omitempty"`
}
LayerTelemetry contains metadata about a specific layer
func ExtractLayerTelemetry ¶
func ExtractLayerTelemetry(l VolumetricLayer) LayerTelemetry
ExtractLayerTelemetry converts a VolumetricLayer to its telemetry representation.
type LayerType ¶
type LayerType int
LayerType defines the type of neural network layer
const ( LayerDense LayerType = 0 LayerMultiHeadAttention LayerType = 1 LayerSwiGLU LayerType = 2 LayerRMSNorm LayerType = 3 LayerCNN1 LayerType = 4 LayerCNN2 LayerType = 5 LayerCNN3 LayerType = 6 LayerRNN LayerType = 7 LayerLSTM LayerType = 8 LayerLayerNorm LayerType = 9 LayerConvTransposed1D LayerType = 10 LayerConvTransposed2D LayerType = 11 LayerConvTransposed3D LayerType = 12 LayerEmbedding LayerType = 13 LayerKMeans LayerType = 14 LayerSoftmax LayerType = 15 LayerParallel LayerType = 16 LayerSequential LayerType = 17 LayerResidual LayerType = 18 )
func ParseLayerType ¶
ParseLayerType converts a string to a LayerType.
type LogicShift ¶
LogicShift identifies if a specific architectural pattern has moved in space.
type MethodInfo ¶
type MethodInfo struct {
MethodName string `json:"method_name"`
Parameters []ParameterInfo `json:"parameters"`
Returns []string `json:"returns"`
}
MethodInfo represents metadata about a method.
type ModelPerformance ¶
type ModelPerformance struct {
ModelID string
// Mask[i] is true if the model correctly handled sample i.
Mask []bool
}
ModelPerformance holds the correctness mask for a specific model.
type ModelTelemetry ¶
type ModelTelemetry struct {
ID string `json:"id"`
TotalLayers int `json:"total_layers"`
TotalParams int `json:"total_parameters"`
Layers []LayerTelemetry `json:"layers"`
}
ModelTelemetry represents a single network's structure
func ExtractNetworkBlueprint ¶
func ExtractNetworkBlueprint(n *VolumetricNetwork, modelID string) ModelTelemetry
ExtractNetworkBlueprint extracts structural telemetry from a VolumetricNetwork.
type NEATConfig ¶ added in v0.74.0
type NEATConfig struct {
// Probabilities (0.0–1.0)
WeightPerturbRate float64 // Perturb each layer's weights with noise
WeightPerturbScale float32 // Noise magnitude (default 0.05)
NodeMutateRate float64 // Swap a layer's type (and reinitialize its weights)
ConnectionAddRate float64 // Add a remote link (spatial hop) between two layers
ConnectionDropRate float64 // Remove an existing remote link
ActivationMutRate float64 // Swap a layer's activation function
LayerToggleRate float64 // Enable/disable a dormant layer cell
// AllowedLayerTypes for node mutation (nil = use defaults)
AllowedLayerTypes []LayerType
// DModel used when reinitializing a mutated layer's weights
DModel int
// Defaults for layer types that need extra config when reinitializing
DefaultNumHeads int // MHA: number of attention heads (default 4)
DefaultInChannels int // CNN/ConvTransposed: input channels (default 1)
DefaultFilters int // CNN/ConvTransposed: output filters (default 8)
DefaultKernelSize int // CNN/ConvTransposed: kernel size (default 3)
DefaultVocabSize int // Embedding: vocabulary size (default 256)
DefaultNumClusters int // KMeans: number of clusters (default 8)
Seed int64
}
NEATConfig controls which mutations are enabled and their probabilities.
func DefaultNEATConfig ¶ added in v0.74.0
func DefaultNEATConfig(dModel int) NEATConfig
DefaultNEATConfig returns conservative mutation rates supporting all 19 layer types.
type NEATPopulation ¶ added in v0.74.0
type NEATPopulation struct {
Networks []*VolumetricNetwork
Fitnesses []float64
Config NEATConfig
// contains filtered or unexported fields
}
NEATPopulation manages a pool of networks evolving over generations.
func NewNEATPopulation ¶ added in v0.74.0
func NewNEATPopulation(seed *VolumetricNetwork, size int, cfg NEATConfig) *NEATPopulation
NewNEATPopulation creates an initial population by mutating a seed network. Each member starts as a NEATMutate of the seed, giving diversity from day 0.
func (*NEATPopulation) Best ¶ added in v0.74.0
func (p *NEATPopulation) Best() *VolumetricNetwork
Best returns the highest-fitness network from the last Evolve call.
func (*NEATPopulation) BestFitness ¶ added in v0.74.0
func (p *NEATPopulation) BestFitness() float64
BestFitness returns the fitness score of the top network.
func (*NEATPopulation) Evolve ¶ added in v0.74.0
func (p *NEATPopulation) Evolve(fitnessFn func(*VolumetricNetwork) float64)
Evolve runs one generation:
- Evaluate all networks with fitnessFn (higher = better)
- Sort by fitness descending
- Top 25% survive as elites
- Remaining slots filled with SpliceDNA(elite pair) + NEATMutate offspring
fitnessFn should return a positive float64 (e.g., accuracy, reward, 1/loss).
func (*NEATPopulation) Summary ¶ added in v0.74.0
func (p *NEATPopulation) Summary(generation int) string
Summary prints a one-line diagnostic for the population.
type NetworkBlueprint ¶
type NetworkBlueprint struct {
Models []ModelTelemetry `json:"models"`
}
NetworkBlueprint contains the structural information of a network extracted after loading or building.
type NetworkComparisonResult ¶
type NetworkComparisonResult struct {
OverallOverlap float32
LayerOverlaps map[string]float32 // "z,y,x,l" -> score
LogicShifts []LogicShift
}
NetworkComparisonResult holds the hierarchical similarity metrics.
func CompareNetworks ¶
func CompareNetworks(dna1, dna2 NetworkDNA) NetworkComparisonResult
CompareNetworks performs the hierarchical spatial correlation between two blueprints.
type NetworkDNA ¶
type NetworkDNA []LayerSignature
NetworkDNA is the complete genetic blueprint of a VolumetricNetwork.
func ExtractDNA ¶
func ExtractDNA(n *VolumetricNetwork) NetworkDNA
ExtractDNA generates the topological signatures for all layers in a network. It uses SimulatePrecision to ensure that comparison reflects the actual numerical behavior.
All 19 layer types are handled:
- Weighted layers (Dense, RNN, LSTM, MHA, CNN*, ConvTransposed*, SwiGLU, RMSNorm, LayerNorm, Embedding, KMeans): signature derived from WeightStore.Master.
- Structural containers (Parallel, Sequential): weights are collected by recursing into ParallelBranches / SequentialLayers, then concatenated and normalized into a single flat signature vector.
- Weightless layers (Softmax, Residual): neutral signature []float32{1.0}.
type NetworkSpec ¶
type NetworkSpec struct {
ID string `json:"id"`
Depth int `json:"depth"`
Rows int `json:"rows"`
Cols int `json:"cols"`
LayersPerCell int `json:"layers_per_cell"`
Layers []LayerSpec `json:"layers"`
}
NetworkSpec represents the top-level JSON structure for a network.
type Numeric ¶
type Numeric interface {
~int | ~int8 | ~int16 | ~int32 | ~int64 |
~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 |
~float32 | ~float64
}
Numeric is a type constraint for all numeric types that Tensors can hold.
type PairWithIndex ¶
type ParameterInfo ¶
ParameterInfo represents metadata about a parameter.
type PersistenceLayerSpec ¶
type PersistenceLayerSpec struct {
Z int `json:"z"`
Y int `json:"y"`
X int `json:"x"`
L int `json:"l"`
Type string `json:"type"`
Activation string `json:"activation"`
DType string `json:"dtype"`
InputHeight int `json:"input_height,omitempty"`
InputWidth int `json:"input_width,omitempty"`
InputDepth int `json:"input_depth,omitempty"`
OutputHeight int `json:"output_height,omitempty"`
OutputWidth int `json:"output_width,omitempty"`
OutputDepth int `json:"output_depth,omitempty"`
InputChannels int `json:"input_channels,omitempty"`
Filters int `json:"filters,omitempty"`
KernelSize int `json:"kernel_size,omitempty"`
Stride int `json:"stride,omitempty"`
Padding int `json:"padding,omitempty"`
OutputPadding int `json:"output_padding,omitempty"`
NumHeads int `json:"num_heads,omitempty"`
NumKVHeads int `json:"num_kv_heads,omitempty"`
HeadDim int `json:"head_dim,omitempty"`
DModel int `json:"d_model,omitempty"`
SeqLength int `json:"seq_length,omitempty"`
RoPEFreqBase float64 `json:"rope_freq_base,omitempty"`
VocabSize int `json:"vocab_size,omitempty"`
EmbeddingDim int `json:"embedding_dim,omitempty"`
NumClusters int `json:"num_clusters,omitempty"`
KMeansTemperature float64 `json:"kmeans_temperature,omitempty"`
OutputMode string `json:"output_mode,omitempty"`
SoftmaxType string `json:"softmax_type,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
SoftmaxRows int `json:"softmax_rows,omitempty"`
SoftmaxCols int `json:"softmax_cols,omitempty"`
EntmaxAlpha float64 `json:"entmax_alpha,omitempty"`
GumbelNoise bool `json:"gumbel_noise,omitempty"`
// Weights
Weights string `json:"weights,omitempty"` // Base64 encoded weights
Native bool `json:"native,omitempty"` // True if weights are in target DType, False if Master FP32
Scale float32 `json:"scale,omitempty"`
// Recursion
ParallelBranches []PersistenceLayerSpec `json:"parallel_branches,omitempty"`
CombineMode string `json:"combine_mode,omitempty"`
SequentialLayers []PersistenceLayerSpec `json:"sequential_layers,omitempty"`
UseTiling bool `json:"use_tiling,omitempty"`
TileSize int `json:"tile_size,omitempty"`
}
PersistenceLayerSpec represents the serializable state of a VolumetricLayer.
type PersistenceNetworkSpec ¶
type PersistenceNetworkSpec struct {
ID string `json:"id"`
Depth int `json:"depth"`
Rows int `json:"rows"`
Cols int `json:"cols"`
LayersPerCell int `json:"layers_per_cell"`
Layers []PersistenceLayerSpec `json:"layers"`
}
PersistenceNetworkSpec represents the serializable state of a VolumetricNetwork.
type PolyGradientObserver ¶
type PolyGradientObserver interface {
OnGradient(event PolyLayerEvent)
}
PolyGradientObserver tracks gradient flow through layers.
type PolyLayerEvent ¶
type PolyLayerEvent struct {
Mode string `json:"mode"`
Type string `json:"type"` // "forward" or "backward"
Z int `json:"z"`
Y int `json:"y"`
X int `json:"x"`
L int `json:"l"`
LayerType LayerType `json:"layer_type"`
Stats LayerStats `json:"stats"`
StepCount uint64 `json:"step_count"`
ModelID string `json:"model_id"`
}
PolyLayerEvent captures state during a forward or backward pass.
type PolyObserver ¶
type PolyObserver interface {
OnForward(event PolyLayerEvent)
OnBackward(event PolyLayerEvent)
}
PolyObserver defines the interface for tracking neural activity in polymorphic layers.
type PreTokenizer ¶
PreTokenizer handles text splitting before BPE
func (*PreTokenizer) SplitWithSpecialTokens ¶
func (pt *PreTokenizer) SplitWithSpecialTokens(text string, specialTokens map[string]int) []string
type PredictionResult ¶
type PredictionResult struct {
SampleIndex int `json:"sample_index"`
ExpectedOutput float64 `json:"expected"`
ActualOutput float64 `json:"actual"`
Deviation float64 `json:"deviation"` // % error
Bucket string `json:"bucket"`
}
PredictionResult represents model performance on a single prediction.
func EvaluatePrediction ¶
func EvaluatePrediction(sampleIndex int, expected, actual float64) PredictionResult
EvaluatePrediction categorizes expected vs actual results.
type PrefixWeightMapper ¶
PrefixWeightMapper handles mapping tensors with potentially complex prefixes
func NewPrefixWeightMapper ¶
func NewPrefixWeightMapper() *PrefixWeightMapper
NewPrefixWeightMapper creates a default mapper for common LLM architectures
func (*PrefixWeightMapper) Find ¶
func (m *PrefixWeightMapper) Find(tensors map[string][]float32, role string) []float32
Find searches for a tensor based on the patterns registered for a role
func (*PrefixWeightMapper) MapWeights ¶
func (m *PrefixWeightMapper) MapWeights(tensors map[string][]float32) (embeddings, lmHead, finalNorm []float32, hasFinalNorm bool)
MapWeights finds weights for specific roles in the provided tensor map, handling generic prefixes
type Q4_0Block ¶
Q4_0Block represents a block of 32 quantized 4-bit weights. Total size: 4 (f32 scale) + 16 (32 nibbles) = 20 bytes. Bandwidth: 0.625 bytes per weight.
func QuantizeQ4_0 ¶
QuantizeQ4_0 converts a slice of f32 weights into Q4_0 blocks.
type SafetensorsHeader ¶
type SafetensorsHeader struct {
Tensors map[string]TensorInfo `json:"-"`
}
SafetensorsHeader contains metadata about tensors in the file
type SoftmaxType ¶
type SoftmaxType int
SoftmaxType defines the variant of softmax to use
const ( SoftmaxStandard SoftmaxType = 0 SoftmaxGrid SoftmaxType = 1 SoftmaxHierarchical SoftmaxType = 2 SoftmaxTemperature SoftmaxType = 3 SoftmaxGumbel SoftmaxType = 4 SoftmaxMasked SoftmaxType = 5 SoftmaxSparse SoftmaxType = 6 SoftmaxAdaptive SoftmaxType = 7 SoftmaxMixture SoftmaxType = 8 SoftmaxEntmax SoftmaxType = 9 )
func ParseSoftmaxType ¶
func ParseSoftmaxType(s string) SoftmaxType
ParseSoftmaxType converts string to SoftmaxType.
func (SoftmaxType) String ¶
func (s SoftmaxType) String() string
type SpliceConfig ¶ added in v0.74.0
type SpliceConfig struct {
// CrossoverMode: "uniform", "point", or "blend"
CrossoverMode string
// BlendAlpha: interpolation factor for "blend" mode (0=all A, 1=all B)
BlendAlpha float32
// SplitRatio: fraction of weights taken from parent A in "point" mode
SplitRatio float64
// FitnessA/B: optional fitness scores to bias crossover toward fitter parent
FitnessA float64
FitnessB float64
}
SpliceConfig controls how two parent networks are combined.
func DefaultSpliceConfig ¶ added in v0.74.0
func DefaultSpliceConfig() SpliceConfig
DefaultSpliceConfig returns a balanced blend configuration.
type SpliceResult ¶ added in v0.74.0
type SpliceResult struct {
Child *VolumetricNetwork
ParentADNA NetworkDNA
ParentBDNA NetworkDNA
ChildDNA NetworkDNA
Similarities map[string]float32 // "z,y,x,l" -> cosine similarity used
BlendedCount int // number of layers actually blended
}
SpliceResult holds the outcome of a DNA splice operation.
func SpliceDNAWithReport ¶ added in v0.74.0
func SpliceDNAWithReport(parentA, parentB *VolumetricNetwork, cfg SpliceConfig) SpliceResult
SpliceDNAWithReport performs a splice and returns a full diagnostic report. Use this when you want to inspect per-layer similarity scores or log blend stats.
type Streamer ¶
type Streamer struct {
Decode func(tokens []uint32) string
// contains filtered or unexported fields
}
Streamer handles real-time output of generated tokens
func NewStreamer ¶
func (*Streamer) HasNewUserTurn ¶
type SystolicState ¶
type SystolicState[T Numeric] struct { // LayerData holds the current output of every layer in the grid. // Indexing follows VolumetricNetwork.GetIndex(z, y, x, l) LayerData []*Tensor[T] // BackwardContext stores pre-activations and inputs for backpropagation. // These are indexed by [Step][LayerIndex] to allow BPTT across clock cycles. HistoryIn [][]*Tensor[T] HistoryPre [][]*Tensor[T] // Double buffering for simultaneous updates NextBuffer []*Tensor[T] // Grid Metadata StepCount uint64 // contains filtered or unexported fields }
SystolicState holds the temporal snapshot of the 3D grid.
func NewSystolicState ¶
func NewSystolicState[T Numeric](n *VolumetricNetwork) *SystolicState[T]
NewSystolicState initializes a state for a specific Volumetric Network.
func (*SystolicState[T]) SetInput ¶
func (s *SystolicState[T]) SetInput(input *Tensor[T])
SetInput injects data into the starting coordinate (0,0,0,0).
type TargetPropConfig ¶
type TargetPropConfig struct {
BatchSize int
UseChainRule bool // If true, targets = Act + Grad * Scale
GradientScale float32 // Scaling factor for chaining
DepthScaleFactor float32 // Gradient boosting for deeper layers
Momentum float32
LearningRate float32
// Clamping for stability
ActivationClamp float32
}
TargetPropConfig holds tunable parameters for Neural Target Propagation.
func DefaultTargetPropConfig ¶
func DefaultTargetPropConfig() *TargetPropConfig
DefaultTargetPropConfig returns standard settings for the TargetProp engine.
type TargetPropState ¶
type TargetPropState[T Numeric] struct { ForwardActs []*Tensor[T] PreActs []*Tensor[T] // Internal pre-activation states for weight-bearing layers BackwardTargets []*Tensor[T] // Chain Rule storage Gradients []*Tensor[float32] // Diagnostics LinkBudgets []float32 Gaps []float32 Config *TargetPropConfig TotalLayers int }
TargetPropState tracks the bidirectional signal flow.
func NewTargetPropState ¶
func NewTargetPropState[T Numeric](n *VolumetricNetwork, config *TargetPropConfig) *TargetPropState[T]
NewTargetPropState initializes a state for the given volumetric network.
func (*TargetPropState[T]) CalculateLinkBudgets ¶
func (s *TargetPropState[T]) CalculateLinkBudgets()
CalculateLinkBudgets diagnostic: Measures how much informaton is preserved (Cosine Similarity).
type TaskChange ¶
type TaskChange struct {
AtTime time.Duration `json:"at_time"`
FromTask string `json:"from_task"`
ToTask string `json:"to_task"`
PreChangeWindow int `json:"pre_change_window"`
PostChangeWindow int `json:"post_change_window"`
PreAccuracy float64 `json:"pre_accuracy"`
PostAccuracy float64 `json:"post_accuracy"`
RecoveryTime time.Duration `json:"recovery_time"`
}
type Template ¶
type Template struct {
Name string
RolePrefixes map[string]string
RoleSuffixes map[string]string
GlobalPrefix string
GlobalSuffix string
}
Template defines the formatting markers for different chat styles
func (Template) BuildNextTurnSegment ¶
BuildNextTurnSegment returns only the text that is NEW compared to what the KV cache already holds.
type Tensor ¶
type Tensor[T Numeric] struct { Data []T DType DType Shape []int Nested []*Tensor[T] // For recursive activation caching in Parallel/Sequential layers }
Tensor wraps numerical data with metadata.
func BackwardPolymorphic ¶
func BackwardPolymorphic[T Numeric](n *VolumetricNetwork, gradOutput *Tensor[T], inputs, preActs []*Tensor[T]) (gradInput *Tensor[T], layerGradients [][2]*Tensor[T], layerTimes []time.Duration)
BackwardPolymorphic executes a full backward pass through the 3D grid. It propagates gradients from the output back to the input, accumulating weight gradients.
func CNN1BackwardPolymorphic ¶
func CNN1BackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
CNN1BackwardPolymorphic calculates gradients for a 1D convolutional layer.
func CNN1BackwardTiled ¶
func CNN1BackwardTiled[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
CNN1BackwardTiled implements a loop-blocked backward pass for CNN1.
func CNN1ForwardPolymorphic ¶
func CNN1ForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
CNN1ForwardPolymorphic performs a forward pass through a 1D convolutional layer.
func CNN1ForwardTiled ¶
func CNN1ForwardTiled[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
CNN1ForwardTiled implements a loop-blocked forward pass for CNN1.
func CNN2BackwardPolymorphic ¶
func CNN2BackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
CNN2BackwardPolymorphic calculates gradients for a 2D convolutional layer.
func CNN2BackwardTiled ¶
func CNN2BackwardTiled[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
CNN2BackwardTiled implements a loop-blocked backward pass for CNN2.
func CNN2ForwardPolymorphic ¶
func CNN2ForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
CNN2ForwardPolymorphic performs a forward pass through a 2D convolutional layer.
func CNN2ForwardTiled ¶
func CNN2ForwardTiled[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
CNN2ForwardTiled implements a loop-blocked forward pass for CNN2.
func CNN3BackwardPolymorphic ¶
func CNN3BackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
CNN3BackwardPolymorphic calculates gradients for a 3D convolutional layer.
func CNN3BackwardTiled ¶
func CNN3BackwardTiled[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
CNN3BackwardTiled implements a loop-blocked backward pass for CNN3.
func CNN3ForwardPolymorphic ¶
func CNN3ForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
CNN3ForwardPolymorphic performs a forward pass through a 3D convolutional layer.
func CNN3ForwardTiled ¶
func CNN3ForwardTiled[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
CNN3ForwardTiled implements a loop-blocked forward pass for CNN3.
func ComputeLossGradient ¶
ComputeLossGradient computes the gradient of the loss with respect to the output.
func ConvTransposed1DBackwardPolymorphic ¶
func ConvTransposed1DBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
func ConvTransposed1DForwardPolymorphic ¶
func ConvTransposed1DForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
ConvTransposed1DForwardPolymorphic performs a forward pass through a 1D transposed convolutional layer.
func ConvTransposed2DBackwardPolymorphic ¶
func ConvTransposed2DBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
func ConvTransposed2DForwardPolymorphic ¶
func ConvTransposed2DForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
ConvTransposed2DForwardPolymorphic performs a forward pass through a 2D transposed convolutional layer.
func ConvTransposed3DBackwardPolymorphic ¶
func ConvTransposed3DBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
func ConvTransposed3DForwardPolymorphic ¶
func ConvTransposed3DForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
ConvTransposed3DForwardPolymorphic performs a forward pass through a 3D transposed convolutional layer.
func ConvertTensor ¶
ConvertTensor converts a tensor from one numeric type to another.
func DenseBackwardPolymorphic ¶
func DenseBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
DenseBackwardPolymorphic calculates gradients for the dense layer.
func DenseForwardPolymorphic ¶
func DenseForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
DenseForwardPolymorphic performs a forward pass through a dense layer. It handles precision transitions (e.g., FP32 input to FP4 layer).
func DenseForwardTiled ¶
func DenseForwardTiled[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
DenseForwardTiled performs a tiled forward pass for the dense layer.
func DispatchLayer ¶
func DispatchLayer[T Numeric](layer *VolumetricLayer, input, skip *Tensor[T]) (preAct, postAct *Tensor[T])
DispatchLayer acts as the universal routing hub for all layer types. This is the "Jump Table" that handles numerical metamorphosis across 50+ layer types.
func DispatchLayerBackward ¶
func DispatchLayerBackward[T Numeric](layer *VolumetricLayer, gradOutput, input, skip, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
DispatchLayerBackward acts as the universal routing hub for gradients. This handles the backward pass metamorphosis for various layer types.
func EmbeddingBackwardPolymorphic ¶
func EmbeddingBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
EmbeddingBackwardPolymorphic computes gradients for embedding lookup.
func EmbeddingBackwardTiled ¶
func EmbeddingBackwardTiled[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
EmbeddingBackwardTiled implements a loop-blocked gradient calculation for embeddings.
func EmbeddingForwardPolymorphic ¶
func EmbeddingForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
EmbeddingForwardPolymorphic performs an embedding lookup across any numerical type.
func EmbeddingForwardTiled ¶
func EmbeddingForwardTiled[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
EmbeddingForwardTiled implements a loop-blocked embedding lookup for cache efficiency.
func ForwardPolymorphic ¶
func ForwardPolymorphic[T Numeric](n *VolumetricNetwork, input *Tensor[T]) (*Tensor[T], time.Duration, []time.Duration)
ForwardPolymorphic executes the network using a unified generic dispatcher. It iterates through the 3D grid and handles DType transitions between layers.
func KMeansBackwardPolymorphic ¶
func KMeansBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
KMeansBackwardPolymorphic computes gradients for cluster centers and propagates to input.
func KMeansForwardPolymorphic ¶
func KMeansForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
KMeansForwardPolymorphic performs a differentiable K-Means clustering forward pass.
func LSTMBackwardPolymorphic ¶
func LSTMBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
LSTMBackwardPolymorphic calculates gradients for the LSTM layer using BPTT.
func LSTMBackwardTiled ¶
func LSTMBackwardTiled[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
LSTMBackwardTiled implements a tiled (blocked) LSTM backward pass.
func LSTMForwardPolymorphic ¶
func LSTMForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
LSTMForwardPolymorphic performs a forward pass through a polymorphic LSTM layer. preAct stores [iSum, fSum, gSum, oSum, cCurr] (5 * hiddenSize)
func LSTMForwardTiled ¶
func LSTMForwardTiled[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
LSTMForwardTiled implements a tiled (blocked) LSTM forward pass for cache efficiency.
func LayerNormBackwardPolymorphic ¶
func LayerNormBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
LayerNormBackwardPolymorphic calculates gradients for LayerNorm.
func LayerNormForwardPolymorphic ¶
func LayerNormForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
LayerNormForwardPolymorphic performs layer normalization for any numeric type.
func MHABackwardPolymorphic ¶
func MHABackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
MHABackwardPolymorphic handles BPTT-style gradients for MHA.
func MHAForwardPolymorphic ¶
func MHAForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
MHAForwardPolymorphic performs Multi-Head Attention across any numerical type.
func MHAForwardTiled ¶
func MHAForwardTiled[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
MHAForwardTiled performs an optimized, tiled forward pass for MHA.
func NewTensorFromSlice ¶
NewTensorFromSlice creates a tensor from existing data.
func ParallelBackwardPolymorphic ¶
func ParallelBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
ParallelBackwardPolymorphic distributes gradients back to branches.
func ParallelForwardPolymorphic ¶
func ParallelForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
ParallelForwardPolymorphic executes multiple sub-layers in parallel and combines outputs.
func RMSNormBackwardPolymorphic ¶
func RMSNormBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
RMSNormBackwardPolymorphic calculates gradients for RMSNorm.
func RMSNormForwardPolymorphic ¶
func RMSNormForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
RMSNormForwardPolymorphic performs RMS normalization.
func RNNBackwardPolymorphic ¶
func RNNBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
RNNBackwardPolymorphic calculates gradients for the RNN layer using BPTT.
func RNNBackwardTiled ¶
func RNNBackwardTiled[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
RNNBackwardTiled performs a tiled backward pass for RNN using BPTT.
func RNNForwardPolymorphic ¶
func RNNForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
RNNForwardPolymorphic performs a forward pass through an RNN layer. It handles precision transitions and all 21 numerical types.
func RNNForwardTiled ¶
func RNNForwardTiled[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
RNNForwardTiled performs a tiled forward pass for RNN.
func ResidualBackwardPolymorphic ¶
func ResidualBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
ResidualBackwardPolymorphic computes gradients for Residual layer.
func ResidualBackwardTiled ¶
func ResidualBackwardTiled[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
ResidualBackwardTiled performs a tiled backward pass for Residual.
func ResidualForwardPolymorphic ¶
func ResidualForwardPolymorphic[T Numeric](layer *VolumetricLayer, input, skip *Tensor[T]) (preAct, postAct *Tensor[T])
ResidualForwardPolymorphic adds a residual connection: output = input + skip.
func ResidualForwardTiled ¶
func ResidualForwardTiled[T Numeric](layer *VolumetricLayer, input, skip *Tensor[T]) (preAct, postAct *Tensor[T])
ResidualForwardTiled performs a tiled forward pass for Residual.
func SequentialBackwardPolymorphic ¶
func SequentialBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
SequentialBackwardPolymorphic distributes gradients back through the sequence in reverse.
func SequentialForwardPolymorphic ¶
func SequentialForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
SequentialForwardPolymorphic executes multiple sub-layers in sequence.
func SoftmaxBackwardPolymorphic ¶
func SoftmaxBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, postAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
SoftmaxBackwardPolymorphic computes gradients for ALL Softmax variants.
func SoftmaxForwardPolymorphic ¶
func SoftmaxForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
SoftmaxForwardPolymorphic performs a differentiable Softmax forward pass with ALL variants.
func SwiGLUBackwardPolymorphic ¶
func SwiGLUBackwardPolymorphic[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
SwiGLUBackwardPolymorphic calculates gradients for SwiGLU.
func SwiGLUBackwardTiled ¶
func SwiGLUBackwardTiled[T Numeric](layer *VolumetricLayer, gradOutput, input, preAct *Tensor[T]) (gradInput, gradWeights *Tensor[T])
SwiGLUBackwardTiled calculates gradients for SwiGLU using a tiled approach.
func SwiGLUForwardPolymorphic ¶
func SwiGLUForwardPolymorphic[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
SwiGLUForwardPolymorphic performs SwiGLU gated activation: silu(gate) * up then down_proj.
func SwiGLUForwardTiled ¶
func SwiGLUForwardTiled[T Numeric](layer *VolumetricLayer, input *Tensor[T]) (preAct, postAct *Tensor[T])
SwiGLUForwardTiled performs an optimized, tiled forward pass for SwiGLU.
func SystolicBackward ¶
func SystolicBackward[T Numeric](n *VolumetricNetwork, s *SystolicState[T], gradOutput *Tensor[T]) (gradIn *Tensor[T], layerGradients [][2]*Tensor[T], err error)
SystolicBackward propagates gradients backward through the systolic history. It walks backward through clock cycles, accurately routing gradients to their source coordinates.
func TargetPropForward ¶
func TargetPropForward[T Numeric](n *VolumetricNetwork, s *TargetPropState[T], input *Tensor[T]) *Tensor[T]
TargetPropForward executes a standard forward pass but captures ALL activations.
type TensorInfo ¶
type TensorInfo struct {
DType string `json:"dtype"`
Shape []int `json:"shape"`
Offset []int `json:"data_offsets"`
}
TensorInfo describes a tensor's properties
type TensorMeta ¶
type TensorMeta struct {
Idx int
Shape []int
Data []float32
MeanAbs float32
Variance float32
Rank int
OriginalDType DType
}
TensorMeta holds geometric and statistical metadata for a tensor.
type TensorWithShape ¶
TensorWithShape holds tensor data along with its shape
type TimeWindow ¶
type TimeWindow struct {
WindowIndex int `json:"window_index"`
Duration time.Duration `json:"duration"`
Outputs int `json:"outputs"`
Correct int `json:"correct"`
Accuracy float64 `json:"accuracy"`
OutputsPerSec int `json:"outputs_per_sec"`
CurrentTask string `json:"current_task"`
TaskID int `json:"task_id"`
}
type Tokenizer ¶
type Tokenizer struct {
Vocab map[string]int // token -> id
ReverseVocab map[int]string // id -> token
Merges []MergePair // BPE merge rules
SpecialTokens map[string]int // special tokens
AddedTokens map[string]int // added tokens
PreTokenizer *PreTokenizer // pre-tokenization rules
ByteFallback bool // use byte fallback for unknown chars
}
Tokenizer represents a BPE tokenizer
func LoadTokenizer ¶
LoadTokenizer loads a tokenizer from a HuggingFace tokenizer.json file
type TokenizerJSON ¶
type TokenizerJSON struct {
Model struct {
Type string `json:"type"`
Vocab map[string]int `json:"vocab"`
Merges json.RawMessage `json:"merges"`
ByteFallback bool `json:"byte_fallback,omitempty"`
} `json:"model"`
AddedTokens []struct {
ID int `json:"id"`
Content string `json:"content"`
Special bool `json:"special"`
} `json:"added_tokens"`
PreTokenizer struct {
Type string `json:"type"`
Pretokenizers []struct {
Type string `json:"type"`
Pattern struct {
String string `json:"String"`
} `json:"pattern,omitempty"`
} `json:"pretokenizers,omitempty"`
} `json:"pre_tokenizer"`
}
TokenizerJSON represents the HuggingFace tokenizer.json format
type TrainingBatch ¶
TrainingBatch represents a single training batch for the Poly engine.
type TrainingConfig ¶
type TrainingConfig struct {
Epochs int
LearningRate float32
LossType string // "mse" or "cross_entropy"
GradientClip float32 // Max gradient norm (0 = no clipping)
Verbose bool
UseGPU bool
DeviceID int
TrackPerf bool
}
TrainingConfig holds configuration for training in the Volumetric Grid.
func DefaultTrainingConfig ¶
func DefaultTrainingConfig() *TrainingConfig
DefaultTrainingConfig returns sensible defaults for the Bedrock architecture.
type TrainingMetrics ¶
type TrainingMetrics struct {
Steps int `json:"steps"`
Accuracy float64 `json:"accuracy"`
Loss float64 `json:"loss"`
TimeTotal time.Duration `json:"time_total"`
TimeToTarget time.Duration `json:"time_to_target"`
MemoryPeakMB float64 `json:"memory_peak_mb"`
Milestones map[int]time.Duration `json:"milestones"`
}
TrainingMetrics captures performance metrics for a training run.
func NewTrainingMetrics ¶
func NewTrainingMetrics() TrainingMetrics
NewTrainingMetrics creates an initialized TrainingMetrics.
type TrainingResult ¶
type TrainingResult struct {
FinalLoss float64
TotalTime time.Duration
LossHistory []float64
EpochTimes []time.Duration
}
TrainingResult contains training statistics for the Poly engine.
func Train ¶
func Train[T Numeric](n *VolumetricNetwork, batches []TrainingBatch[T], config *TrainingConfig) (*TrainingResult, error)
Train executes the training loop on a VolumetricNetwork.
type Transformer ¶
type Transformer[T Numeric] struct { Network *VolumetricNetwork Embeddings []float32 LMHead []float32 FinalNorm []float32 HiddenSize int VocabSize int Template Template // contains filtered or unexported fields }
Transformer coordinates high-level generation logic using the underlying VolumetricNetwork
func NewTransformer ¶
func NewTransformer[T Numeric](network *VolumetricNetwork, embeddings, lmHead, finalNorm []float32, template Template) *Transformer[T]
NewTransformer creates a new polymorphic transformer
func (*Transformer[T]) EnableTiling ¶
func (t *Transformer[T]) EnableTiling(tileSize int)
EnableTiling enables cache-tiling optimization for all layers in the transformer. If tileSize is <= 0, it dynamically auto-detects the best size for the hardware.
func (*Transformer[T]) ForwardTokenIDsWGPU ¶
func (t *Transformer[T]) ForwardTokenIDsWGPU(tokens []uint32, input *Tensor[T], computeLogits bool, onlyLast bool) (*Tensor[T], error)
ForwardWGPU handles both prefill (multi-token) and decode (single-token) GPU forward passes. All layer dispatches are recorded into a single CommandEncoder (BeginFrame/FlushFrame), reducing GPU submission overhead from ~150+ submits/token to just 1 submit + 1 download. ForwardTokenIDsWGPU is the "true" GPU residency path. If tokens are provided, embedding lookup happens on GPU. If final norm/LM head are synced, they run on GPU too.
func (*Transformer[T]) ForwardWGPU ¶
func (t *Transformer[T]) ForwardWGPU(input *Tensor[T]) (*Tensor[T], error)
func (*Transformer[T]) Generate ¶
func (t *Transformer[T]) Generate( encode func(text string) []uint32, decode func(tokens []uint32) string, turns []Turn, systemPrompt, userMsg string, opts GenOptions, ) string
Generate implements the stateless generation logic
func (*Transformer[T]) Reset ¶
func (t *Transformer[T]) Reset()
Reset clears the KV cache for all layers
func (*Transformer[T]) SyncToGPU ¶
func (t *Transformer[T]) SyncToGPU() error
type VolumetricLayer ¶
type VolumetricLayer struct {
Network *VolumetricNetwork
Type LayerType
Activation ActivationType
DType DType
WeightStore *WeightStore
IsDisabled bool
// 3D Coordinates
Z int // Depth
Y int // Row
X int // Col
L int // Layer index within cell
// Config (Expanding from LayerConfig)
InputHeight int
InputWidth int
InputDepth int
OutputHeight int
OutputWidth int
OutputDepth int
InputChannels int
Filters int
KernelSize int
Stride int
Padding int
OutputPadding int
NumHeads int
NumKVHeads int
HeadDim int
DModel int
SeqLength int
RoPEFreqBase float64
VocabSize int
EmbeddingDim int
NumClusters int
KMeansTemperature float64
KMeansOutputMode string // "probabilities" or "features"
SoftmaxType SoftmaxType
Temperature float64
SoftmaxRows int
SoftmaxCols int
HierarchyLevels []int
EntmaxAlpha float64
Mask []bool
GumbelNoise bool
ParallelBranches []VolumetricLayer
CombineMode string // "concat", "add", "avg", "filter", "grid_scatter"
FilterGateConfig *VolumetricLayer
// Spatial Routing (Remote Links)
IsRemoteLink bool
TargetZ int
TargetY int
TargetX int
TargetL int
SequentialLayers []VolumetricLayer
// Tiling & GPU Config
UseTiling bool
TileSize int
UseGPU bool
IsGPUResident bool
IsKVCacheGPUResident bool
Observer PolyObserver
// KV Cache (for MHA)
KVCacheK *Tensor[float32]
KVCacheV *Tensor[float32]
KVOffset int
MaxSeqLen int
// Persistent GPU KV buffers
GPUKVCacheK any // *wgpu.Buffer
GPUKVCacheV any // *wgpu.Buffer
}
VolumetricLayer represents a processing unit in the 3D volumetric grid.
func CreateResidualGraft ¶
func CreateResidualGraft(main *VolumetricNetwork) *VolumetricLayer
CreateResidualGraft wraps a network in a residual block.
func GraftNetworksPolymorphic ¶
func GraftNetworksPolymorphic(networks []*VolumetricNetwork, combineMode string) (*VolumetricLayer, error)
GraftNetworksPolymorphic takes multiple heterogeneous VolumetricNetworks and grafts their specific layers into a single parallel layer within a new network.
func ReconstructCNNLayer ¶
func ReconstructCNNLayer(name string, tensors []DetectedTensor, ltype LayerType) (*VolumetricLayer, error)
ReconstructCNNLayer attempts to build a VolumetricLayer of type CNN from grouped tensors.
func ReconstructLayerNormLayer ¶
func ReconstructLayerNormLayer(name string, tensors []DetectedTensor, dModel int) (*VolumetricLayer, error)
ReconstructLayerNormLayer builds a LayerNorm layer.
func ReconstructMHALayer ¶
func ReconstructMHALayer(name string, tensors []DetectedTensor, dModel int, numHeads int) (*VolumetricLayer, error)
ReconstructMHALayer attempts to build a VolumetricLayer of type MultiHeadAttention from grouped tensors.
func ReconstructRMSNormLayer ¶
func ReconstructRMSNormLayer(name string, tensors []DetectedTensor, dModel int) (*VolumetricLayer, error)
ReconstructRMSNormLayer builds an RMSNorm layer.
func ReconstructSwiGLULayer ¶
func ReconstructSwiGLULayer(name string, tensors []DetectedTensor, dModel int) (*VolumetricLayer, error)
ReconstructSwiGLULayer builds a SwiGLU layer from gated MLP tensors.
func (*VolumetricLayer) SyncToCPU ¶
func (l *VolumetricLayer) SyncToCPU()
SyncToCPU releases GPU resources.
func (*VolumetricLayer) SyncToGPU ¶
func (l *VolumetricLayer) SyncToGPU() error
SyncToGPU mirrors active weights and KV caches to the GPU.
type VolumetricNetwork ¶
type VolumetricNetwork struct {
Depth int
Rows int
Cols int
LayersPerCell int
Layers []VolumetricLayer
// Global Tiling & GPU Switches
UseTiling bool
UseGPU bool
// GPU Acceleration context
GPUContext *WGPUContext
// Persistent GPU buffers to avoid allocations
GPUHiddenState []any // map[DType]wgpu.Buffer or similar, use any for now
GPULogits any // wgpu.Buffer
GPUEmbeddings any // *wgpu.Buffer
GPULMHead any // *wgpu.Buffer
}
VolumetricNetwork represents a 3D grid neural network.
func BuildCNN ¶
func BuildCNN(inputSize, numClasses int, dtype DType) *VolumetricNetwork
BuildCNN creates a simple convolutional network.
func BuildNetworkFromJSON ¶
func BuildNetworkFromJSON(jsonData []byte) (*VolumetricNetwork, error)
BuildNetworkFromJSON creates a VolumetricNetwork from a JSON string.
func BuildRandomNetwork ¶
func BuildRandomNetwork(depth, rows, cols, lpc int, dModel int) *VolumetricNetwork
BuildRandomNetwork generates a diverse VolumetricNetwork.
func BuildSequentialNetwork ¶
func BuildSequentialNetwork(numLayers int, dModel int, act ActivationType, dtype DType) *VolumetricNetwork
func BuildTransformerNetwork ¶
func BuildTransformerNetwork(numBlocks int, dModel int, numHeads int, dtype DType) *VolumetricNetwork
BuildTransformerNetwork creates a stack of Transformer blocks.
func DeserializeNetwork ¶
func DeserializeNetwork(jsonData []byte) (*VolumetricNetwork, error)
DeserializeNetwork reconstructs a VolumetricNetwork from a JSON byte slice.
func LoadUniversal ¶
func LoadUniversal(path string) (*VolumetricNetwork, error)
LoadUniversal loads a model from a safetensors file and auto-detects its architecture.
func MountGeometrically ¶
func MountGeometrically(archs []LayerArchetype, geoms []TensorMeta) *VolumetricNetwork
MountGeometrically creates a VolumetricNetwork from archetypes and geometries.
func NEATMutate ¶ added in v0.74.0
func NEATMutate(n *VolumetricNetwork, cfg NEATConfig) *VolumetricNetwork
NEATMutate applies NEAT-style structural and weight mutations to a copy of n. The original network is never modified — a clone is returned.
Mutation sequence per layer:
- Weight perturbation — add small Gaussian noise to Master weights
- Activation mutation — randomly swap the activation function
- Node mutation — change layer type, reinitialize weights
- Layer toggle — flip IsDisabled (activate dormant / silence active)
Network-level mutations (applied once after per-layer pass):
- Connection add — insert a remote link (IsRemoteLink spatial hop)
- Connection drop — remove an existing remote link
func NewVolumetricNetwork ¶
func NewVolumetricNetwork(depth, rows, cols, layersPerCell int) *VolumetricNetwork
NewVolumetricNetwork initializes a 3D grid of layers.
func SpliceDNA ¶ added in v0.74.0
func SpliceDNA(parentA, parentB *VolumetricNetwork, cfg SpliceConfig) *VolumetricNetwork
SpliceDNA merges two trained parent networks into a child network.
parentA is the structural template (grid dimensions, layer types are inherited). parentB contributes weights to matching layers, weighted by DNA similarity.
For each layer at coordinate (z,y,x,l):
- If both parents have the layer and their types match, weights are blended.
- If parentB has no matching layer, the child keeps parentA's weights.
The three blend strategies:
"blend" — interpolate: child[i] = wA[i]*(1-α) + wB[i]*α
α is modulated by cosine similarity and relative fitness.
"point" — split at SplitRatio: first N weights from A, rest from B.
"uniform" — per-weight random pick from A or B, biased by fitness.
func (*VolumetricNetwork) CalculateTotalMemory ¶
func (n *VolumetricNetwork) CalculateTotalMemory() int
CalculateTotalMemory returns the total size of all layers in bytes.
func (*VolumetricNetwork) GetIndex ¶
func (n *VolumetricNetwork) GetIndex(z, y, x, l int) int
GetIndex calculates the flattened index for a 3D coordinate.
func (*VolumetricNetwork) GetLayer ¶
func (n *VolumetricNetwork) GetLayer(z, y, x, l int) *VolumetricLayer
GetLayer returns the layer at specific 3D coordinates.
func (*VolumetricNetwork) GetMethodSignature ¶
func (n *VolumetricNetwork) GetMethodSignature(methodName string) (string, error)
GetMethodSignature returns the signature of a specific method.
func (*VolumetricNetwork) GetMethods ¶
func (n *VolumetricNetwork) GetMethods() ([]MethodInfo, error)
GetMethods retrieves all public methods of the VolumetricNetwork struct.
func (*VolumetricNetwork) GetMethodsJSON ¶
func (n *VolumetricNetwork) GetMethodsJSON() (string, error)
GetMethodsJSON returns a JSON string containing all methods attached to the VolumetricNetwork struct.
func (*VolumetricNetwork) HasMethod ¶
func (n *VolumetricNetwork) HasMethod(methodName string) bool
HasMethod checks if a method exists on the VolumetricNetwork.
func (*VolumetricNetwork) InitCNNCell ¶
func (*VolumetricNetwork) InitConvTransposedCell ¶
func (*VolumetricNetwork) InitDenseCell ¶
func (n *VolumetricNetwork) InitDenseCell(z, y, x, l int, dModel int, act ActivationType, scale float32)
func (*VolumetricNetwork) InitEmbeddingCell ¶
func (n *VolumetricNetwork) InitEmbeddingCell(z, y, x, l int, vocabSize, dModel int, dtype DType)
func (*VolumetricNetwork) InitKMeansCell ¶
func (n *VolumetricNetwork) InitKMeansCell(z, y, x, l int, numClusters, dModel int, dtype DType)
func (*VolumetricNetwork) InitLSTMCell ¶
func (n *VolumetricNetwork) InitLSTMCell(z, y, x, l int, dModel int, scale float32)
func (*VolumetricNetwork) InitLayerNormCell ¶
func (n *VolumetricNetwork) InitLayerNormCell(z, y, x, l int, size int, dtype DType)
func (*VolumetricNetwork) InitMHACell ¶
func (n *VolumetricNetwork) InitMHACell(z, y, x, l int, dModel, numHeads int, scale float32)
func (*VolumetricNetwork) InitRNNCell ¶
func (n *VolumetricNetwork) InitRNNCell(z, y, x, l int, dModel int, scale float32)
func (*VolumetricNetwork) InitWGPU ¶
func (n *VolumetricNetwork) InitWGPU() error
InitWGPU initializes the WebGPU context for the network.
func (*VolumetricNetwork) ListMethods ¶
func (n *VolumetricNetwork) ListMethods() []string
ListMethods returns a simple list of all public method names.
func (*VolumetricNetwork) SyncAllToGPU ¶
func (n *VolumetricNetwork) SyncAllToGPU() error
SyncAllToGPU mirrors the entire network state to VRAM.
func (*VolumetricNetwork) SyncToGPU ¶ added in v0.73.0
func (n *VolumetricNetwork) SyncToGPU() error
SyncToGPU mirrors all layers to the GPU.
type WGPUActivationParams ¶ added in v0.73.0
type WGPUApplyGradientsParams ¶ added in v0.73.0
type WGPUCNN1BackwardParams ¶ added in v0.73.0
type WGPUCNN1Params ¶
type WGPUCNN2BackwardParams ¶ added in v0.73.0
type WGPUCNN2Params ¶
type WGPUCNN3BackwardParams ¶ added in v0.73.0
type WGPUCNN3Params ¶
type WGPUContext ¶
type WGPUContext struct {
Instance *wgpu.Instance
Adapter *wgpu.Adapter
Device *wgpu.Device
Queue *wgpu.Queue
PipelineCache map[string]*wgpu.ComputePipeline
ActivationPool map[string]*wgpu.Buffer
// GPUTileSize is the auto-detected optimal tile size for this GPU.
// Can be overridden by the caller after init.
GPUTileSize int
// ActiveEncoder, when non-nil, is used by all Dispatch* calls instead of
// creating their own encoder. This lets the entire forward pass be recorded
// into a single command buffer and submitted once, reducing GPU overhead.
ActiveEncoder *wgpu.CommandEncoder
// PendingDestroys holds temporary uniform buffers that must not be destroyed
// until after FlushFrame() submits the active encoder. When not batching,
// buffers are destroyed immediately instead of queued here.
PendingDestroys []*wgpu.Buffer
// --- Performance Optimization Caches ---
LayoutCache map[string]*wgpu.BindGroupLayout
BindGroupCache map[uint64]*wgpu.BindGroup
// Uniform Pool
UniformPool []*wgpu.Buffer
UniformIdx int
// Negotiated limits
Limits wgpu.Limits
}
WGPUContext manages the GPU device and queue for acceleration.
func (*WGPUContext) BeginFrame ¶
func (c *WGPUContext) BeginFrame() error
BeginFrame creates a shared CommandEncoder that all subsequent Dispatch* calls will record into until FlushFrame is called.
func (*WGPUContext) CreateComputePipeline ¶
func (c *WGPUContext) CreateComputePipeline(shaderSource string) (*wgpu.ComputePipeline, error)
func (*WGPUContext) CreatePersistentBuffer ¶
CreatePersistentBuffer creates a storage buffer that stays in VRAM.
func (*WGPUContext) DispatchActivation ¶ added in v0.73.0
func (c *WGPUContext) DispatchActivation(size int, act ActivationType, inputBuf, outputBuf *wgpu.Buffer) error
func (*WGPUContext) DispatchActivationBackward ¶ added in v0.73.0
func (c *WGPUContext) DispatchActivationBackward(size int, act ActivationType, gradOutBuf, preActBuf, gradInBuf *wgpu.Buffer) error
func (*WGPUContext) DispatchApplyGradients ¶ added in v0.73.0
func (*WGPUContext) DispatchBackwardLayer ¶ added in v0.73.0
func (c *WGPUContext) DispatchBackwardLayer(l *VolumetricLayer, batchSize int, gradOutBuf, inputBuf, preActBuf, dxBuf, dwBuf *wgpu.Buffer) error
func (*WGPUContext) DispatchCNN1 ¶
func (c *WGPUContext) DispatchCNN1( batchSize, inC, inL, outC, outL, kSize, stride, padding int, inputBuf, weightBuf, outputBuf *wgpu.Buffer, ) error
func (*WGPUContext) DispatchCNN1BackwardDW ¶ added in v0.73.0
func (c *WGPUContext) DispatchCNN1BackwardDW( batchSize, inC, inL, filters, outL, kSize, stride, padding int, activation ActivationType, gradOutputBuf, inputBuf, preActBuf, gradWeightBuf *wgpu.Buffer, ) error
func (*WGPUContext) DispatchCNN1BackwardDX ¶ added in v0.73.0
func (c *WGPUContext) DispatchCNN1BackwardDX( batchSize, inC, inL, filters, outL, kSize, stride, padding int, activation ActivationType, gradOutputBuf, weightBuf, preActBuf, gradInputBuf *wgpu.Buffer, ) error
func (*WGPUContext) DispatchCNN2 ¶
func (c *WGPUContext) DispatchCNN2( batchSize, inC, inH, inW, outC, outH, outW, kH, kW, strideH, strideW, padH, padW int, inputBuf, weightBuf, outputBuf *wgpu.Buffer, ) error
func (*WGPUContext) DispatchCNN2BackwardDW ¶ added in v0.73.0
func (c *WGPUContext) DispatchCNN2BackwardDW( batchSize, inC, inH, inW, filters, outH, outW, kSize, stride, padding int, activation ActivationType, gradOutputBuf, inputBuf, preActBuf, gradWeightBuf *wgpu.Buffer, ) error
func (*WGPUContext) DispatchCNN2BackwardDX ¶ added in v0.73.0
func (c *WGPUContext) DispatchCNN2BackwardDX( batchSize, inC, inH, inW, filters, outH, outW, kSize, stride, padding int, activation ActivationType, gradOutputBuf, weightBuf, preActBuf, gradInputBuf *wgpu.Buffer, ) error
func (*WGPUContext) DispatchCNN3 ¶
func (c *WGPUContext) DispatchCNN3( batchSize, inC, inD, inH, inW, outC, outD, outH, outW, kD, kH, kW, sD, sH, sW, pD, pH, pW int, inputBuf, weightBuf, outputBuf *wgpu.Buffer, ) error
func (*WGPUContext) DispatchCNN3BackwardDW ¶ added in v0.73.0
func (c *WGPUContext) DispatchCNN3BackwardDW( batchSize, inC, inD, inH, inW, filters, outD, outH, outW, kSize, stride, padding int, activation ActivationType, gradOutputBuf, inputBuf, preActBuf, gradWeightBuf *wgpu.Buffer, ) error
func (*WGPUContext) DispatchCNN3BackwardDX ¶ added in v0.73.0
func (c *WGPUContext) DispatchCNN3BackwardDX( batchSize, inC, inD, inH, inW, filters, outD, outH, outW, kSize, stride, padding int, activation ActivationType, gradOutputBuf, weightBuf, preActBuf, gradInputBuf *wgpu.Buffer, ) error
func (*WGPUContext) DispatchDense ¶
func (c *WGPUContext) DispatchDense( batchSize, inputSize, outputSize int, inputBuf, weightBuf, outputBuf *wgpu.Buffer, tileSize int, ) error
DispatchDense dispatches a tiled dense matrix-multiply kernel.
func (*WGPUContext) DispatchDenseBackwardDW ¶ added in v0.73.0
func (c *WGPUContext) DispatchDenseBackwardDW( batchSize, inputSize, outputSize int, gradOutputBuf, inputBuf, gradWeightBuf *wgpu.Buffer, tileSize int, ) error
DispatchDenseBackwardDW calculates gradWeights = gradOutput^T * input
func (*WGPUContext) DispatchDenseBackwardDX ¶ added in v0.73.0
func (c *WGPUContext) DispatchDenseBackwardDX( batchSize, inputSize, outputSize int, gradOutputBuf, weightBuf, gradInputBuf *wgpu.Buffer, tileSize int, ) error
DispatchDenseBackwardDX calculates gradInput = gradOutput * weights
func (*WGPUContext) DispatchDenseQ4 ¶
func (c *WGPUContext) DispatchDenseQ4( batchSize, inputSize, outputSize int, inputBuf, scaleBuf, weightBuf, outputBuf *wgpu.Buffer, tileSize int, ) error
DispatchDenseQ4 dispatches a tiled dense kernel that dequantizes Q4_0 weights.
func (*WGPUContext) DispatchEmbedding ¶
func (c *WGPUContext) DispatchEmbedding( vocabSize, hiddenSize, numTokens int, indicesBuf, weightsBuf, outputBuf *wgpu.Buffer, ) error
func (*WGPUContext) DispatchEmbeddingBackward ¶ added in v0.73.0
func (c *WGPUContext) DispatchEmbeddingBackward( vocabSize, hiddenSize, numTokens int, indicesBuf, gradOutputBuf, gradWeightBuf *wgpu.Buffer, ) error
func (*WGPUContext) DispatchForwardLayer ¶ added in v0.73.0
func (c *WGPUContext) DispatchForwardLayer(l *VolumetricLayer, batchSize int, inputBuf, outBuf *wgpu.Buffer) error
func (*WGPUContext) DispatchKVUpdate ¶
func (c *WGPUContext) DispatchKVUpdate( offset, headDim, maxSeqLen, numKVHeads, numTokens int, kCache, vCache, newK, newV *wgpu.Buffer, ) error
func (*WGPUContext) DispatchLSTMStep ¶
func (c *WGPUContext) DispatchLSTMStep( batchSize, inputSize, hiddenSize int, inputBuf, hPrevBuf, cPrevBuf, weightBuf, hCurrBuf, cCurrBuf *wgpu.Buffer, ) error
func (*WGPUContext) DispatchMHA ¶
func (c *WGPUContext) DispatchMHA( numHeads, numKVHeads, headDim, seqLen, kvOffset, maxSeqLen int, qBuf, kBuf, vBuf, oBuf *wgpu.Buffer, tileSize int, ) error
DispatchMHA dispatches the tiled multi-head attention kernel.
func (*WGPUContext) DispatchMHABackward ¶ added in v0.73.0
func (*WGPUContext) DispatchMSEGradPartialLoss ¶ added in v0.73.0
func (c *WGPUContext) DispatchMSEGradPartialLoss( size int, outputBuf, targetBuf, gradBuf, partialsBuf *wgpu.Buffer, ) error
DispatchMSEGradPartialLoss computes MSE gradients on GPU and writes partial loss sums. numWG = ceil(size/256) partial sums are written to partialsBuf. CPU sums them for total loss.
func (*WGPUContext) DispatchRMSNorm ¶
func (c *WGPUContext) DispatchRMSNorm( batchSize, size int, epsilon float32, inputBuf, weightBuf, outputBuf *wgpu.Buffer, ) error
DispatchRMSNorm dispatches the RMSNorm kernel.
func (*WGPUContext) DispatchRMSNormBackward ¶ added in v0.73.0
func (*WGPUContext) DispatchRNNStep ¶
func (c *WGPUContext) DispatchRNNStep( batchSize, inputSize, hiddenSize int, inputBuf, hPrevBuf, wIHBuf, wHHBuf, biasBuf, hCurrBuf *wgpu.Buffer, ) error
func (*WGPUContext) DispatchResidual ¶
func (c *WGPUContext) DispatchResidual( size int, inputBuf, residualBuf *wgpu.Buffer, ) error
DispatchResidual dispatches the element-wise addition kernel.
func (*WGPUContext) DispatchResidualBackward ¶ added in v0.73.0
func (c *WGPUContext) DispatchResidualBackward( size int, gradOutputBuf, gradInputBuf, gradResidualBuf *wgpu.Buffer, ) error
func (*WGPUContext) DispatchRoPE ¶
func (*WGPUContext) DispatchSwiGLU ¶
func (c *WGPUContext) DispatchSwiGLU( batchSize, inputSize, outputSize int, inputBuf, gateBuf, upBuf, outputBuf *wgpu.Buffer, tileSize int, ) error
DispatchSwiGLU dispatches the tiled SwiGLU MLP kernel.
func (*WGPUContext) DispatchSwiGLUBackward ¶ added in v0.73.0
func (c *WGPUContext) DispatchSwiGLUBackward( batchSize, inputSize, outputSize int, gradOutputBuf, gateInBuf, upInBuf, gradGateBuf, gradUpBuf *wgpu.Buffer, ) error
func (*WGPUContext) DispatchSwiGLUQ4 ¶
func (c *WGPUContext) DispatchSwiGLUQ4( batchSize, inputSize, outputSize int, inputBuf, gateScaleBuf, gateWeightBuf, upScaleBuf, upWeightBuf, outputBuf *wgpu.Buffer, tileSize int, ) error
DispatchSwiGLUQ4 dispatches a tiled SwiGLU kernel with Q4_0 weights.
func (*WGPUContext) FlushFrame ¶
func (c *WGPUContext) FlushFrame()
FlushFrame finishes and submits the shared CommandEncoder, then destroys any temporary uniform buffers that were kept alive for the duration of recording.
func (*WGPUContext) GetActivationBuffer ¶
func (c *WGPUContext) GetActivationBuffer(name string, size uint64, usage wgpu.BufferUsage) *wgpu.Buffer
GetActivationBuffer retrieves or creates a persistent activation buffer.
func (*WGPUContext) GetBindGroup ¶
func (c *WGPUContext) GetBindGroup(pipeline *wgpu.ComputePipeline, buffers ...*wgpu.Buffer) (*wgpu.BindGroup, error)
GetBindGroup retrieves or creates a BindGroup for the given pipeline and buffers.
func (*WGPUContext) GetUniformBuffer ¶
func (c *WGPUContext) GetUniformBuffer(size uint64) *wgpu.Buffer
GetUniformBuffer provides a pre-allocated uniform buffer from the pool.
func (*WGPUContext) ReadBuffer ¶
func (c *WGPUContext) ReadBuffer(buf *wgpu.Buffer) ([]float32, error)
ReadBuffer reads data from a GPU buffer back to a float32 slice.
func (*WGPUContext) Release ¶
func (c *WGPUContext) Release()
Release releases all WebGPU resources.
func (*WGPUContext) ResetCache ¶
func (c *WGPUContext) ResetCache()
ResetCache clears all BindGroups and Pipelines. Should be called when model architecture or precision changes.
type WGPUDenseParams ¶
WGPUDenseParams matches the WGSL struct
type WGPUEmbeddingParams ¶
type WGPUKVParams ¶
type WGPULSTMParams ¶
type WGPULossParams ¶ added in v0.73.0
type WGPULossParams struct {
Size uint32
// contains filtered or unexported fields
}
type WGPUMHABackwardParams ¶ added in v0.73.0
type WGPUMHAParams ¶
type WGPUMHAParams struct {
NumHeads uint32
NumKVHeads uint32
HeadDim uint32
SeqLen uint32
KVOffset uint32
MaxSeqLen uint32
TileSize uint32
Padding uint32
}
WGPUMHAParams matches the attention WGSL struct
type WGPURMSNormParams ¶
type WGPURMSNormParams struct {
Size uint32
Epsilon float32
// contains filtered or unexported fields
}
WGPURMSNormParams matches the WGSL struct
type WGPURNNParams ¶
type WGPURoPEParams ¶
type WeightStore ¶
type WeightStore struct {
Master []float32 // Master FP32 weights (Source of Truth)
Versions map[DType]any // Active versions (e.g., map[DTypeFP4][]byte)
GPUWeights map[DType]any // VRAM-resident versions (wgpu.Buffer)
GPUScales map[DType]*wgpu.Buffer // VRAM-resident scales for quantized types
Scale float32 // Quantization scale factor
}
WeightStore manages multiple numerical versions of the same weights. This is the core of "Polymorphic Layer-Morphing".
func NewWeightStore ¶
func NewWeightStore(size int) *WeightStore
NewWeightStore creates a new storage for weights.
func (*WeightStore) ApplyGradients ¶
func (ws *WeightStore) ApplyGradients(gradWeights *Tensor[float32], lr float32)
ApplyGradients performs a simple SGD update (weight = weight - lr * gradient). This is the "Learning" step that mutates the actual weights in the Master store.
func (*WeightStore) GetActive ¶
func (ws *WeightStore) GetActive(dtype DType) any
GetActive returns the data for the given DType if it exists.
func (*WeightStore) Morph ¶
func (ws *WeightStore) Morph(dtype DType)
Morph converts master weights into the target DType and caches the result.
func (*WeightStore) Randomize ¶
func (ws *WeightStore) Randomize(seed int64, scale float32)
Randomize fills the master weights with small random values to break symmetry.
func (*WeightStore) SetVersion ¶
func (ws *WeightStore) SetVersion(dtype DType, data any)
SetVersion stores a converted version of weights.
func (*WeightStore) SizeInBytes ¶
func (ws *WeightStore) SizeInBytes(dtype DType) int
SizeInBytes calculates the memory footprint of the currently active version.
func (*WeightStore) Unpack ¶
func (ws *WeightStore) Unpack(dtype DType)
Unpack reconstructs master weights from a bit-packed native version.
Source Files
¶
- architecture.go
- backward.go
- bpe.go
- clustering.go
- cnn1.go
- cnn2.go
- cnn3.go
- conv_transposed1.go
- conv_transposed2.go
- conv_transposed3.go
- dense.go
- dna.go
- embedding.go
- ensemble.go
- evaluation.go
- evolution.go
- forward.go
- grafting.go
- grouping.go
- hardware.go
- introspection.go
- kmeans.go
- lstm.go
- mha.go
- norm.go
- observer.go
- parallel.go
- persistence.go
- poly.go
- prefix_safetensor.go
- quantization.go
- residual.go
- rnn.go
- safetensors.go
- sampling.go
- sequential.go
- serialization.go
- softmax.go
- swiglu.go
- systolic.go
- target_prop.go
- telemetry.go
- templates.go
- training.go
- transformer.go
- universal_loader.go
- weights.go
- wgpu_backward_shaders.go
- wgpu_context.go
- wgpu_forward.go
- wgpu_kernels.go
- wgpu_shaders.go
- wgpu_usage.go