Documentation ¶
Index ¶
- func BuildCompoundKernel(typeArgs []acc.TypeArg, computeCapability [2]int) (string, string)
- func Flatten(lst ...interface{}) []int
- type BatchNormLayer
- func (n *BatchNormLayer) Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor
- func (n *BatchNormLayer) Bsum() bool
- func (n *BatchNormLayer) C() int
- func (n *BatchNormLayer) Dim2() []int
- func (n *BatchNormLayer) DimI() []int
- func (n *BatchNormLayer) DimO() []int
- func (n *BatchNormLayer) DimO2() []int
- func (n *BatchNormLayer) Eps() float64
- func (n *BatchNormLayer) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor
- func (n *BatchNormLayer) Init(self Layer, lib *CudaGenerator, dtype base.Dtype, params *BatchNormParams)
- func (n *BatchNormLayer) InitActivations(fpropOut backends.Tensor)
- func (n *BatchNormLayer) InitDeltas(shared []backends.Tensor)
- func (n *BatchNormLayer) InitWeights(loc float64, scale float64, shared backends.Tensor, zeros bool)
- func (n *BatchNormLayer) K() int
- func (n *BatchNormLayer) M() int
- func (n *BatchNormLayer) NOut() int
- func (n *BatchNormLayer) P() int
- func (n *BatchNormLayer) Q() int
- func (n *BatchNormLayer) RcpDepth() float64
- func (n *BatchNormLayer) Relu() bool
- func (n *BatchNormLayer) Rho() float64
- func (n *BatchNormLayer) String() string
- type BatchNormParams
- type BpropCuda
- type CompoundOps
- type ConvBpropKernels
- type ConvFpropKernels
- type ConvKernelBuilder
- type ConvLayer
- type ConvLayerBase
- func (n *ConvLayerBase) Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor
- func (n *ConvLayerBase) C() int
- func (n *ConvLayerBase) DHW() []int
- func (n *ConvLayerBase) DimF() []int
- func (n *ConvLayerBase) DimF2() []int
- func (n *ConvLayerBase) DimF2t() []int
- func (n *ConvLayerBase) DimFb() []int
- func (n *ConvLayerBase) DimI() []int
- func (n *ConvLayerBase) DimI2() []int
- func (n *ConvLayerBase) DimO() []int
- func (n *ConvLayerBase) DimO2() []int
- func (n *ConvLayerBase) DimS() []int
- func (n *ConvLayerBase) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor
- func (n *ConvLayerBase) Init(self Layer, lib *CudaGenerator, dtype base.Dtype, params *ConvParams, ...)
- func (n *ConvLayerBase) InitActivations(fpropOut backends.Tensor)
- func (n *ConvLayerBase) K() int
- func (n *ConvLayerBase) M() int
- func (n *ConvLayerBase) MPQ() []int
- func (n *ConvLayerBase) NCK() []int
- func (n *ConvLayerBase) NOut() int
- func (n *ConvLayerBase) P() int
- func (n *ConvLayerBase) Padding() []int
- func (n *ConvLayerBase) Q() int
- func (n *ConvLayerBase) SetBsum(bsum bool)
- func (n *ConvLayerBase) SetRelu(relu bool)
- func (n *ConvLayerBase) Strides() []int
- func (n *ConvLayerBase) TRS() []int
- type ConvParams
- type ConvUpdateKernels
- type CudaDeviceAllocation
- type CudaGenerator
- func (b *CudaGenerator) Assign(out *acc.AccTensor, value backends.Value) backends.Value
- func (b *CudaGenerator) Binarize(ary backends.Tensor, out backends.Tensor, stochastic bool) backends.Tensor
- func (b *CudaGenerator) BpropConv(layer backends.ConvLayerBase, f backends.Tensor, e backends.Tensor, ...)
- func (b *CudaGenerator) BpropLrn(layer backends.LrnLayer, i backends.Tensor, o backends.Tensor, ...)
- func (b *CudaGenerator) BpropPool(layer backends.PoolLayer, i backends.Tensor, o backends.Tensor, ...)
- func (b *CudaGenerator) BpropRelu(layer backends.Layer, x backends.Tensor, errors backends.Tensor, ...) backends.Value
- func (b *CudaGenerator) BpropSkipNode(errors backends.Tensor, deltas backends.Tensor, alpha float64, beta float64)
- func (b *CudaGenerator) BpropTransform(nglayer backends.Layer, transform backends.Transform, outputs backends.Tensor, ...)
- func (b *CudaGenerator) BuildConvKernels(dtype base.Dtype, a *ConvParams) (ConvFpropKernels, ConvBpropKernels, ConvUpdateKernels)
- func (b *CudaGenerator) BuildProlog()
- func (b *CudaGenerator) CompoundBpropBn(deltaOut backends.Tensor, gradGamma backends.Tensor, gradBeta backends.Tensor, ...)
- func (b *CudaGenerator) CompoundDot(x backends.Tensor, y backends.Tensor, z backends.Tensor, alpha float64, ...) backends.Tensor
- func (b *CudaGenerator) CompoundDotCuda(x *acc.AccTensor, y *acc.AccTensor, z *acc.AccTensor, alpha float64, ...) *acc.AccTensor
- func (b *CudaGenerator) CompoundFpropBn(x backends.Tensor, xsum backends.Tensor, xvar backends.Tensor, ...)
- func (b *CudaGenerator) CompoundKernel(args []backends.Value) backends.Value
- func (b *CudaGenerator) ConfigureCodeOutput(buildMainCpp bool, filePrefix string, hostNamespace string, ...)
- func (b *CudaGenerator) FormatBufferRef(tensor backends.Tensor, paren bool) string
- func (b *CudaGenerator) FpropConv(layer backends.ConvLayerBase, i backends.Tensor, f backends.Tensor, ...)
- func (b *CudaGenerator) FpropLrn(layer backends.LrnLayer, i backends.Tensor, o backends.Tensor, ...)
- func (b *CudaGenerator) FpropPool(layer backends.PoolLayer, i backends.Tensor, o backends.Tensor, ...)
- func (b *CudaGenerator) FpropRelu(layer backends.Layer, x backends.Tensor, slope float64) backends.Value
- func (b *CudaGenerator) FpropSkipNode(x backends.Tensor, y backends.Tensor, beta float64)
- func (b *CudaGenerator) FpropSoftmax(x backends.Value, axis int) backends.Value
- func (b *CudaGenerator) FpropTransform(nglayer backends.Layer, transform backends.Transform, inputs backends.Tensor, ...)
- func (b *CudaGenerator) GetData(dest string, start string, stop string, x backends.Tensor) string
- func (b *CudaGenerator) GetFloat(src acc.DeviceAllocation, size int) string
- func (b *CudaGenerator) GetInt(src acc.DeviceAllocation, size int) string
- func (b *CudaGenerator) GetMetricSum(x backends.Tensor, start string, stop string) string
- func (b *CudaGenerator) GetSmCount() int
- func (b *CudaGenerator) Init(self generators.Generator, rngSeed int, defaultDtype base.Dtype, ...)
- func (b *CudaGenerator) LookupKernel(key string) *Kernel
- func (b *CudaGenerator) MakeBinaryMask(out backends.Tensor, keepThresh float64)
- func (b *CudaGenerator) MapStringToFunc(funcname string, dtype base.Dtype) *Kernel
- func (b *CudaGenerator) MemAlloc(nbytes int) acc.DeviceAllocation
- func (b *CudaGenerator) MemcpyDtodAsync(dest acc.DeviceAllocation, src acc.DeviceAllocation, size int)
- func (b *CudaGenerator) MemsetD16Async(dest acc.DeviceAllocation, data uint16, count int)
- func (b *CudaGenerator) MemsetD32Async(dest acc.DeviceAllocation, data uint32, count int)
- func (b *CudaGenerator) MemsetD8Async(dest acc.DeviceAllocation, data uint8, count int)
- func (b *CudaGenerator) NewBatchNormLayer(inShape []int) backends.BatchNormLayer
- func (b *CudaGenerator) NewConvLayer(dtype base.Dtype, params *backends.ConvParams) backends.ConvLayer
- func (b *CudaGenerator) NewDeconvLayer(dtype base.Dtype, params *backends.DeconvParams) backends.DeconvLayer
- func (b *CudaGenerator) NewLrnLayer(dtype base.Dtype, params *backends.LrnParams) backends.LrnLayer
- func (b *CudaGenerator) NewPoolLayer(dtype base.Dtype, params *backends.PoolParams) backends.PoolLayer
- func (b *CudaGenerator) NewReluLayer() backends.Layer
- func (b *CudaGenerator) OutputCode(outDir string) error
- func (b *CudaGenerator) RegisterKernel(key string, kernel *Kernel)
- func (b *CudaGenerator) RngNormal(out backends.Tensor, loc float64, scale float64, size []int)
- func (b *CudaGenerator) RngUniform(out backends.Tensor, low float64, high float64, size []int)
- func (b *CudaGenerator) ScratchBufferInit()
- func (b *CudaGenerator) ScratchBufferOffset(size int) acc.DeviceAllocation
- func (b *CudaGenerator) ScratchBufferReset()
- func (b *CudaGenerator) SetScratchSize(args ...int)
- func (b *CudaGenerator) SetSmCount(smCount int)
- func (b *CudaGenerator) UpdateConv(layer backends.ConvLayerBase, i backends.Tensor, e backends.Tensor, ...)
- type DataLayer
- func (n *DataLayer) C() int
- func (n *DataLayer) DHW() []int
- func (n *DataLayer) DimI() []int
- func (n *DataLayer) DimI2() []int
- func (n *DataLayer) DimO() []int
- func (n *DataLayer) DimO2() []int
- func (n *DataLayer) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor
- func (n *DataLayer) Init(self Layer, lib *CudaGenerator, dtype base.Dtype, params *DataParams)
- func (n *DataLayer) InitDataUniform(low float64, high float64)
- func (n *DataLayer) InitDataZero()
- func (n *DataLayer) InitDeltas(shared []backends.Tensor)
- func (n *DataLayer) InitWeights(loc float64, scale float64, shared backends.Tensor, zeros bool)
- func (n *DataLayer) K() int
- func (n *DataLayer) M() int
- func (n *DataLayer) P() int
- func (n *DataLayer) Q() int
- func (n *DataLayer) String() string
- type DataParams
- type DeconvLayer
- type FilterDimShuffle
- type FpropCuda
- type FullLayer
- func (n *FullLayer) Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor
- func (n *FullLayer) DimF() []int
- func (n *FullLayer) DimF2() []int
- func (n *FullLayer) DimI() []int
- func (n *FullLayer) DimI2() []int
- func (n *FullLayer) DimO() []int
- func (n *FullLayer) DimO2() []int
- func (n *FullLayer) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor
- func (n *FullLayer) Init(self Layer, lib *CudaGenerator, dtype base.Dtype, params *FullParams)
- func (n *FullLayer) NIn() int
- func (n *FullLayer) NOut() int
- func (n *FullLayer) Relu() bool
- func (n *FullLayer) String() string
- type FullParams
- type InceptionLayer
- func (n *InceptionLayer) Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor
- func (n *InceptionLayer) C() int
- func (n *InceptionLayer) DHW() []int
- func (n *InceptionLayer) DimF() []int
- func (n *InceptionLayer) DimI() []int
- func (n *InceptionLayer) DimI2() []int
- func (n *InceptionLayer) DimO() []int
- func (n *InceptionLayer) DimO2() []int
- func (n *InceptionLayer) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor
- func (n *InceptionLayer) Init(self Layer, lib *CudaGenerator, dtype base.Dtype, partitions [][]Layer, ...)
- func (n *InceptionLayer) InitActivations(fpropOut backends.Tensor)
- func (n *InceptionLayer) InitDeltas(shared []backends.Tensor)
- func (n *InceptionLayer) InitWeights(loc float64, scale float64, shared backends.Tensor, zeros bool)
- func (n *InceptionLayer) K() int
- func (n *InceptionLayer) M() int
- func (n *InceptionLayer) MPQ() []int
- func (n *InceptionLayer) NCK() []int
- func (n *InceptionLayer) NOut() int
- func (n *InceptionLayer) P() int
- func (n *InceptionLayer) Partitions() [][]Layer
- func (n *InceptionLayer) Q() int
- func (n *InceptionLayer) String() string
- type InceptionParams
- type Kernel
- type KernelGroup
- type Layer
- type LayerBase
- func (n *LayerBase) Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor
- func (n *LayerBase) BpropRelu(bpropIn backends.Tensor) backends.Tensor
- func (n *LayerBase) C() int
- func (n *LayerBase) DimF() []int
- func (n *LayerBase) DimF2() []int
- func (n *LayerBase) DimI() []int
- func (n *LayerBase) DimI2() []int
- func (n *LayerBase) DimO() []int
- func (n *LayerBase) DimO2() []int
- func (n *LayerBase) Dtype() base.Dtype
- func (n *LayerBase) Flops() float64
- func (n *LayerBase) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor
- func (n *LayerBase) GradDescent()
- func (n *LayerBase) Init(self Layer, lib *CudaGenerator, dtype base.Dtype, N int, dtypeU base.Dtype)
- func (n *LayerBase) InitActivations(fpropOut backends.Tensor)
- func (n *LayerBase) InitDataUniform(low float64, high float64)
- func (n *LayerBase) InitDataZero()
- func (n *LayerBase) InitDeltas(shared []backends.Tensor)
- func (n *LayerBase) InitWeights(loc float64, scale float64, shared backends.Tensor, zeros bool)
- func (n *LayerBase) K() int
- func (n *LayerBase) M() int
- func (n *LayerBase) N() int
- func (n *LayerBase) NOut() int
- func (n *LayerBase) P() int
- func (n *LayerBase) Q() int
- func (n *LayerBase) ScaleWeights(scale float64)
- func (n *LayerBase) SetBpropOut(bpropOut backends.Tensor)
- func (n *LayerBase) SetDeltaStats(deltaStats backends.Tensor)
- func (n *LayerBase) SizeF() int
- func (n *LayerBase) SizeI() int
- func (n *LayerBase) SizeO() int
- type LrnLayer
- type PoolKernel
- type PoolLayer
- func (n *PoolLayer) Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor
- func (n *PoolLayer) C() int
- func (n *PoolLayer) DHW() []int
- func (n *PoolLayer) DimF2() []int
- func (n *PoolLayer) DimI() []int
- func (n *PoolLayer) DimI2() []int
- func (n *PoolLayer) DimO() []int
- func (n *PoolLayer) DimO2() []int
- func (n *PoolLayer) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor
- func (n *PoolLayer) Init(self Layer, lib *CudaGenerator, dtype base.Dtype, params *backends.PoolParams)
- func (n *PoolLayer) InitActivations(fpropOut backends.Tensor)
- func (n *PoolLayer) JTRS() []int
- func (n *PoolLayer) K() int
- func (n *PoolLayer) M() int
- func (n *PoolLayer) MPQ() []int
- func (n *PoolLayer) NOut() int
- func (n *PoolLayer) Op() backends.PoolOp
- func (n *PoolLayer) P() int
- func (n *PoolLayer) Padding() []int
- func (n *PoolLayer) Q() int
- func (n *PoolLayer) Strides() []int
- func (n *PoolLayer) String() string
- type UpdateConvReduce
- type UpdateCuda
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func BuildCompoundKernel ¶
Types ¶
type BatchNormLayer ¶
type BatchNormLayer struct { LayerBase // contains filtered or unexported fields }
func NewBatchNormLayer ¶
func NewBatchNormLayer( lib *CudaGenerator, dtype base.Dtype, params *BatchNormParams) *BatchNormLayer
func (*BatchNormLayer) Bsum ¶
func (n *BatchNormLayer) Bsum() bool
func (*BatchNormLayer) C ¶
func (n *BatchNormLayer) C() int
func (*BatchNormLayer) Dim2 ¶
func (n *BatchNormLayer) Dim2() []int
func (*BatchNormLayer) DimI ¶
func (n *BatchNormLayer) DimI() []int
func (*BatchNormLayer) DimO ¶
func (n *BatchNormLayer) DimO() []int
func (*BatchNormLayer) DimO2 ¶
func (n *BatchNormLayer) DimO2() []int
func (*BatchNormLayer) Eps ¶
func (n *BatchNormLayer) Eps() float64
func (*BatchNormLayer) Init ¶
func (n *BatchNormLayer) Init( self Layer, lib *CudaGenerator, dtype base.Dtype, params *BatchNormParams)
func (*BatchNormLayer) InitActivations ¶
func (n *BatchNormLayer) InitActivations(fpropOut backends.Tensor)
func (*BatchNormLayer) InitDeltas ¶
func (n *BatchNormLayer) InitDeltas(shared []backends.Tensor)
func (*BatchNormLayer) InitWeights ¶
func (*BatchNormLayer) K ¶
func (n *BatchNormLayer) K() int
func (*BatchNormLayer) M ¶
func (n *BatchNormLayer) M() int
func (*BatchNormLayer) NOut ¶
func (n *BatchNormLayer) NOut() int
func (*BatchNormLayer) P ¶
func (n *BatchNormLayer) P() int
func (*BatchNormLayer) Q ¶
func (n *BatchNormLayer) Q() int
func (*BatchNormLayer) RcpDepth ¶
func (n *BatchNormLayer) RcpDepth() float64
func (*BatchNormLayer) Relu ¶
func (n *BatchNormLayer) Relu() bool
func (*BatchNormLayer) Rho ¶
func (n *BatchNormLayer) Rho() float64
func (*BatchNormLayer) String ¶
func (n *BatchNormLayer) String() string
type BatchNormParams ¶
type BatchNormParams struct { N int C int D int H int W int NIn int Rho float64 Eps float64 Relu bool Bsum bool }
func (*BatchNormParams) Init ¶
func (a *BatchNormParams) Init()
func (*BatchNormParams) Resolve ¶
func (a *BatchNormParams) Resolve()
type BpropCuda ¶
type BpropCuda struct { KernelGroup // contains filtered or unexported fields }
func NewBpropCuda ¶
func NewBpropCuda( lib *CudaGenerator, dtype base.Dtype, params *ConvParams) *BpropCuda
func (*BpropCuda) BindParams ¶
func (*BpropCuda) Init ¶
func (g *BpropCuda) Init( lib *CudaGenerator, dtype base.Dtype, params *ConvParams)
type CompoundOps ¶
type CompoundOps struct {
// contains filtered or unexported fields
}
for kernels that can't compound these ops internally, use an external kernel
func NewCompoundOps ¶
func NewCompoundOps(lib *CudaGenerator, dtype base.Dtype, k int, n int) *CompoundOps
func (*CompoundOps) BindParams ¶
func (*CompoundOps) Execute ¶
func (z *CompoundOps) Execute()
func (*CompoundOps) Init ¶
func (z *CompoundOps) Init(lib *CudaGenerator, dtype base.Dtype, k int, n int)
func (*CompoundOps) Unbind ¶
func (z *CompoundOps) Unbind()
ACHTUNG: Is this method necessary in generator scenario?
type ConvBpropKernels ¶
type ConvFpropKernels ¶
type ConvKernelBuilder ¶
type ConvKernelBuilder interface { BuildConvKernels(dtype base.Dtype, a *ConvParams) ( ConvFpropKernels, ConvBpropKernels, ConvUpdateKernels) }
type ConvLayer ¶
type ConvLayer struct {
ConvLayerBase
}
func NewConvLayer ¶
func NewConvLayer( lib *CudaGenerator, dtype base.Dtype, params *backends.ConvParams, kernelBuilder ConvKernelBuilder) *ConvLayer
func (*ConvLayer) Init ¶
func (n *ConvLayer) Init( self Layer, lib *CudaGenerator, dtype base.Dtype, params *backends.ConvParams, kernelBuilder ConvKernelBuilder)
type ConvLayerBase ¶
type ConvLayerBase struct { LayerBase // contains filtered or unexported fields }
func (*ConvLayerBase) C ¶
func (n *ConvLayerBase) C() int
func (*ConvLayerBase) DHW ¶
func (n *ConvLayerBase) DHW() []int
func (*ConvLayerBase) DimF ¶
func (n *ConvLayerBase) DimF() []int
func (*ConvLayerBase) DimF2 ¶
func (n *ConvLayerBase) DimF2() []int
func (*ConvLayerBase) DimF2t ¶
func (n *ConvLayerBase) DimF2t() []int
func (*ConvLayerBase) DimFb ¶
func (n *ConvLayerBase) DimFb() []int
func (*ConvLayerBase) DimI ¶
func (n *ConvLayerBase) DimI() []int
func (*ConvLayerBase) DimI2 ¶
func (n *ConvLayerBase) DimI2() []int
func (*ConvLayerBase) DimO ¶
func (n *ConvLayerBase) DimO() []int
func (*ConvLayerBase) DimO2 ¶
func (n *ConvLayerBase) DimO2() []int
func (*ConvLayerBase) DimS ¶
func (n *ConvLayerBase) DimS() []int
func (*ConvLayerBase) Init ¶
func (n *ConvLayerBase) Init( self Layer, lib *CudaGenerator, dtype base.Dtype, params *ConvParams, kernelBuilder ConvKernelBuilder)
func (*ConvLayerBase) InitActivations ¶
func (n *ConvLayerBase) InitActivations(fpropOut backends.Tensor)
func (*ConvLayerBase) K ¶
func (n *ConvLayerBase) K() int
func (*ConvLayerBase) M ¶
func (n *ConvLayerBase) M() int
func (*ConvLayerBase) MPQ ¶
func (n *ConvLayerBase) MPQ() []int
func (*ConvLayerBase) NCK ¶
func (n *ConvLayerBase) NCK() []int
func (*ConvLayerBase) NOut ¶
func (n *ConvLayerBase) NOut() int
func (*ConvLayerBase) P ¶
func (n *ConvLayerBase) P() int
func (*ConvLayerBase) Padding ¶
func (n *ConvLayerBase) Padding() []int
func (*ConvLayerBase) Q ¶
func (n *ConvLayerBase) Q() int
func (*ConvLayerBase) SetBsum ¶
func (n *ConvLayerBase) SetBsum(bsum bool)
func (*ConvLayerBase) SetRelu ¶
func (n *ConvLayerBase) SetRelu(relu bool)
func (*ConvLayerBase) Strides ¶
func (n *ConvLayerBase) Strides() []int
func (*ConvLayerBase) TRS ¶
func (n *ConvLayerBase) TRS() []int
type ConvParams ¶
type ConvParams struct { N int C int K int D int H int W int T int R int S int M int P int Q int PadD int PadH int PadW int StrD int StrH int StrW int DilD int DilH int DilW int }
func (*ConvParams) InitConv ¶
func (a *ConvParams) InitConv(params *backends.ConvParams)
func (*ConvParams) InitDeconv ¶
func (a *ConvParams) InitDeconv(params *backends.DeconvParams)
type ConvUpdateKernels ¶
type CudaDeviceAllocation ¶
type CudaDeviceAllocation struct {
// contains filtered or unexported fields
}
func NewCudaDeviceAllocation ¶
func NewCudaDeviceAllocation(index int, nbytes int, offset int) *CudaDeviceAllocation
func (*CudaDeviceAllocation) Add ¶
func (a *CudaDeviceAllocation) Add(offset int) acc.DeviceAllocation
type CudaGenerator ¶
type CudaGenerator struct { acc.AccGeneratorBase // contains filtered or unexported fields }
func NewCudaGenerator ¶
func (*CudaGenerator) BpropConv ¶
func (b *CudaGenerator) BpropConv( layer backends.ConvLayerBase, f backends.Tensor, e backends.Tensor, gradI backends.Tensor, x backends.Tensor, bias backends.Tensor, bsum backends.Tensor, alpha float64, beta float64, relu bool, brelu bool, slope float64)
SKIPPED: repeat (gpu only), layerOp
func (*CudaGenerator) BpropLrn ¶
func (b *CudaGenerator) BpropLrn( layer backends.LrnLayer, i backends.Tensor, o backends.Tensor, e backends.Tensor, delta backends.Tensor, denom backends.Tensor, alpha float64, beta float64, ascale float64, bpower float64)
SKIPPED: Support for 'repeat'
func (*CudaGenerator) BpropPool ¶
func (b *CudaGenerator) BpropPool( layer backends.PoolLayer, i backends.Tensor, o backends.Tensor, argmax backends.Tensor, alpha float64, beta float64)
SKIPPED: repeat (gpu only)
func (*CudaGenerator) BpropSkipNode ¶
func (*CudaGenerator) BpropTransform ¶
func (*CudaGenerator) BuildConvKernels ¶
func (b *CudaGenerator) BuildConvKernels(dtype base.Dtype, a *ConvParams) ( ConvFpropKernels, ConvBpropKernels, ConvUpdateKernels)
func (*CudaGenerator) BuildProlog ¶
func (b *CudaGenerator) BuildProlog()
func (*CudaGenerator) CompoundBpropBn ¶
func (b *CudaGenerator) CompoundBpropBn( deltaOut backends.Tensor, gradGamma backends.Tensor, gradBeta backends.Tensor, deltaIn backends.Tensor, x backends.Tensor, xsum backends.Tensor, xvar backends.Tensor, gamma backends.Tensor, eps float64, binary bool, layer backends.BatchNormLayer)
SKIPPED: threads, repeat (both gpu only)
func (*CudaGenerator) CompoundDot ¶
func (*CudaGenerator) CompoundDotCuda ¶
func (*CudaGenerator) CompoundFpropBn ¶
func (b *CudaGenerator) CompoundFpropBn( x backends.Tensor, xsum backends.Tensor, xvar backends.Tensor, gmean backends.Tensor, gvar backends.Tensor, gamma backends.Tensor, beta backends.Tensor, y backends.Tensor, eps float64, rho float64, computeBatchSum bool, accumbeta float64, relu bool, binary bool, inference bool, outputs backends.Tensor, layer backends.BatchNormLayer)
SKIPPED: threads, repeat (both gpu only)
func (*CudaGenerator) CompoundKernel ¶
func (b *CudaGenerator) CompoundKernel(args []backends.Value) backends.Value
func (*CudaGenerator) ConfigureCodeOutput ¶
func (b *CudaGenerator) ConfigureCodeOutput( buildMainCpp bool, filePrefix string, hostNamespace string, kernelPrefix string)
func (*CudaGenerator) FormatBufferRef ¶
func (b *CudaGenerator) FormatBufferRef(tensor backends.Tensor, paren bool) string
func (*CudaGenerator) FpropConv ¶
func (b *CudaGenerator) FpropConv( layer backends.ConvLayerBase, i backends.Tensor, f backends.Tensor, o backends.Tensor, x backends.Tensor, bias backends.Tensor, bsum backends.Tensor, alpha float64, beta float64, relu bool, brelu bool, slope float64)
SKIPPED: repeat (gpu only), layerOp
func (*CudaGenerator) FpropLrn ¶
func (b *CudaGenerator) FpropLrn( layer backends.LrnLayer, i backends.Tensor, o backends.Tensor, denom backends.Tensor, alpha float64, beta float64, ascale float64, bpower float64)
SKIPPED: Support for 'repeat'
func (*CudaGenerator) FpropPool ¶
func (b *CudaGenerator) FpropPool( layer backends.PoolLayer, i backends.Tensor, o backends.Tensor, argmax backends.Tensor, alpha float64, beta float64)
SKIPPED: Support for 'repeat'
func (*CudaGenerator) FpropSkipNode ¶
func (*CudaGenerator) FpropSoftmax ¶
func (*CudaGenerator) FpropTransform ¶
func (*CudaGenerator) GetFloat ¶
func (b *CudaGenerator) GetFloat(src acc.DeviceAllocation, size int) string
func (*CudaGenerator) GetInt ¶
func (b *CudaGenerator) GetInt(src acc.DeviceAllocation, size int) string
func (*CudaGenerator) GetMetricSum ¶
func (*CudaGenerator) GetSmCount ¶
func (b *CudaGenerator) GetSmCount() int
func (*CudaGenerator) Init ¶
func (b *CudaGenerator) Init( self generators.Generator, rngSeed int, defaultDtype base.Dtype, stochasticRound int, deviceId int, computeCapability [2]int, bench bool, scratchSize int, histBins int, histOffset int, compatMode backends.CompatMode)
func (*CudaGenerator) LookupKernel ¶
func (b *CudaGenerator) LookupKernel(key string) *Kernel
func (*CudaGenerator) MakeBinaryMask ¶
func (b *CudaGenerator) MakeBinaryMask(out backends.Tensor, keepThresh float64)
func (*CudaGenerator) MapStringToFunc ¶
func (b *CudaGenerator) MapStringToFunc(funcname string, dtype base.Dtype) *Kernel
func (*CudaGenerator) MemAlloc ¶
func (b *CudaGenerator) MemAlloc(nbytes int) acc.DeviceAllocation
func (*CudaGenerator) MemcpyDtodAsync ¶
func (b *CudaGenerator) MemcpyDtodAsync( dest acc.DeviceAllocation, src acc.DeviceAllocation, size int)
func (*CudaGenerator) MemsetD16Async ¶
func (b *CudaGenerator) MemsetD16Async(dest acc.DeviceAllocation, data uint16, count int)
func (*CudaGenerator) MemsetD32Async ¶
func (b *CudaGenerator) MemsetD32Async(dest acc.DeviceAllocation, data uint32, count int)
func (*CudaGenerator) MemsetD8Async ¶
func (b *CudaGenerator) MemsetD8Async(dest acc.DeviceAllocation, data uint8, count int)
func (*CudaGenerator) NewBatchNormLayer ¶
func (b *CudaGenerator) NewBatchNormLayer(inShape []int) backends.BatchNormLayer
func (*CudaGenerator) NewConvLayer ¶
func (b *CudaGenerator) NewConvLayer( dtype base.Dtype, params *backends.ConvParams) backends.ConvLayer
func (*CudaGenerator) NewDeconvLayer ¶
func (b *CudaGenerator) NewDeconvLayer( dtype base.Dtype, params *backends.DeconvParams) backends.DeconvLayer
func (*CudaGenerator) NewLrnLayer ¶
func (*CudaGenerator) NewPoolLayer ¶
func (b *CudaGenerator) NewPoolLayer( dtype base.Dtype, params *backends.PoolParams) backends.PoolLayer
func (*CudaGenerator) NewReluLayer ¶
func (b *CudaGenerator) NewReluLayer() backends.Layer
func (*CudaGenerator) OutputCode ¶
func (b *CudaGenerator) OutputCode(outDir string) error
func (*CudaGenerator) RegisterKernel ¶
func (b *CudaGenerator) RegisterKernel(key string, kernel *Kernel)
func (*CudaGenerator) RngUniform ¶
func (*CudaGenerator) ScratchBufferInit ¶
func (b *CudaGenerator) ScratchBufferInit()
func (*CudaGenerator) ScratchBufferOffset ¶
func (b *CudaGenerator) ScratchBufferOffset(size int) acc.DeviceAllocation
func (*CudaGenerator) ScratchBufferReset ¶
func (b *CudaGenerator) ScratchBufferReset()
func (*CudaGenerator) SetScratchSize ¶
func (b *CudaGenerator) SetScratchSize(args ...int)
func (*CudaGenerator) SetSmCount ¶
func (b *CudaGenerator) SetSmCount(smCount int)
func (*CudaGenerator) UpdateConv ¶
func (b *CudaGenerator) UpdateConv( layer backends.ConvLayerBase, i backends.Tensor, e backends.Tensor, gradF backends.Tensor, alpha float64, beta float64, gradBias backends.Tensor)
SKIPPED: repeat (gpu only), layerOp
type DataLayer ¶
type DataLayer struct { LayerBase // contains filtered or unexported fields }
func NewDataLayer ¶
func NewDataLayer( lib *CudaGenerator, dtype base.Dtype, params *DataParams) *DataLayer
func (*DataLayer) Init ¶
func (n *DataLayer) Init( self Layer, lib *CudaGenerator, dtype base.Dtype, params *DataParams)
func (*DataLayer) InitDataUniform ¶
func (*DataLayer) InitDataZero ¶
func (n *DataLayer) InitDataZero()
func (*DataLayer) InitDeltas ¶
func (*DataLayer) InitWeights ¶
type DataParams ¶
func (*DataParams) Init ¶
func (a *DataParams) Init()
func (*DataParams) Resolve ¶
func (a *DataParams) Resolve()
type DeconvLayer ¶
type DeconvLayer struct {
ConvLayerBase
}
func NewDeconvLayer ¶
func NewDeconvLayer( lib *CudaGenerator, dtype base.Dtype, params *backends.DeconvParams, kernelBuilder ConvKernelBuilder) *DeconvLayer
func (*DeconvLayer) Init ¶
func (n *DeconvLayer) Init( self Layer, lib *CudaGenerator, dtype base.Dtype, params *backends.DeconvParams, kernelBuilder ConvKernelBuilder)
func (*DeconvLayer) String ¶
func (n *DeconvLayer) String() string
type FilterDimShuffle ¶
type FilterDimShuffle struct {
// contains filtered or unexported fields
}
func NewFilterDimShuffle ¶
func NewFilterDimShuffle( lib *CudaGenerator, dtype base.Dtype, c int, t int, r int, s int, k int) *FilterDimShuffle
func (*FilterDimShuffle) BindParams ¶
func (z *FilterDimShuffle) BindParams(f backends.Tensor) acc.DeviceAllocation
func (*FilterDimShuffle) Execute ¶
func (z *FilterDimShuffle) Execute()
func (*FilterDimShuffle) Init ¶
func (z *FilterDimShuffle) Init( lib *CudaGenerator, dtype base.Dtype, c int, t int, r int, s int, k int)
func (*FilterDimShuffle) Unbind ¶
func (z *FilterDimShuffle) Unbind()
ACHTUNG: Is this method necessary in generator scenario?
type FpropCuda ¶
type FpropCuda struct { KernelGroup // contains filtered or unexported fields }
func NewFpropCuda ¶
func NewFpropCuda( lib *CudaGenerator, dtype base.Dtype, params *ConvParams) *FpropCuda
func (*FpropCuda) BindParams ¶
func (*FpropCuda) Init ¶
func (g *FpropCuda) Init( lib *CudaGenerator, dtype base.Dtype, params *ConvParams)
type FullLayer ¶
type FullLayer struct { LayerBase // contains filtered or unexported fields }
func NewFullLayer ¶
func NewFullLayer( lib *CudaGenerator, dtype base.Dtype, params *FullParams) *FullLayer
func (*FullLayer) Init ¶
func (n *FullLayer) Init( self Layer, lib *CudaGenerator, dtype base.Dtype, params *FullParams)
type FullParams ¶
func (*FullParams) Init ¶
func (a *FullParams) Init()
func (*FullParams) Resolve ¶
func (a *FullParams) Resolve()
type InceptionLayer ¶
type InceptionLayer struct { LayerBase // contains filtered or unexported fields }
func NewInceptionLayer ¶
func NewInceptionLayer( lib *CudaGenerator, dtype base.Dtype, partitions [][]Layer, params *InceptionParams) *InceptionLayer
func (*InceptionLayer) C ¶
func (n *InceptionLayer) C() int
func (*InceptionLayer) DHW ¶
func (n *InceptionLayer) DHW() []int
func (*InceptionLayer) DimF ¶
func (n *InceptionLayer) DimF() []int
func (*InceptionLayer) DimI ¶
func (n *InceptionLayer) DimI() []int
func (*InceptionLayer) DimI2 ¶
func (n *InceptionLayer) DimI2() []int
func (*InceptionLayer) DimO ¶
func (n *InceptionLayer) DimO() []int
func (*InceptionLayer) DimO2 ¶
func (n *InceptionLayer) DimO2() []int
func (*InceptionLayer) Init ¶
func (n *InceptionLayer) Init( self Layer, lib *CudaGenerator, dtype base.Dtype, partitions [][]Layer, params *InceptionParams)
func (*InceptionLayer) InitActivations ¶
func (n *InceptionLayer) InitActivations(fpropOut backends.Tensor)
func (*InceptionLayer) InitDeltas ¶
func (n *InceptionLayer) InitDeltas(shared []backends.Tensor)
func (*InceptionLayer) InitWeights ¶
func (*InceptionLayer) K ¶
func (n *InceptionLayer) K() int
func (*InceptionLayer) M ¶
func (n *InceptionLayer) M() int
func (*InceptionLayer) MPQ ¶
func (n *InceptionLayer) MPQ() []int
func (*InceptionLayer) NCK ¶
func (n *InceptionLayer) NCK() []int
func (*InceptionLayer) NOut ¶
func (n *InceptionLayer) NOut() int
func (*InceptionLayer) P ¶
func (n *InceptionLayer) P() int
func (*InceptionLayer) Partitions ¶
func (n *InceptionLayer) Partitions() [][]Layer
func (*InceptionLayer) Q ¶
func (n *InceptionLayer) Q() int
func (*InceptionLayer) String ¶
func (n *InceptionLayer) String() string
type InceptionParams ¶
func (*InceptionParams) Init ¶
func (a *InceptionParams) Init()
func (*InceptionParams) Resolve ¶
func (a *InceptionParams) Resolve()
type KernelGroup ¶
type KernelGroup struct {
// contains filtered or unexported fields
}
func (*KernelGroup) Init ¶
func (g *KernelGroup) Init( lib *CudaGenerator, dtype base.Dtype, params *ConvParams)
type Layer ¶
type Layer interface { Dtype() base.Dtype N() int Flops() float64 SizeI() int SizeF() int SizeO() int // optional properties C() int K() int M() int P() int Q() int DimI() []int DimF() []int DimO() []int DimI2() []int DimF2() []int DimO2() []int NOut() int // setters SetBpropOut(bpropOut backends.Tensor) SetDeltaStats(deltaStats backends.Tensor) // operational methods InitActivations(fpropOut backends.Tensor) InitDeltas(shared []backends.Tensor) InitWeights(loc float64, scale float64, shared backends.Tensor, zeros bool) InitDataZero() InitDataUniform(low float64, high float64) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor String() string }
type LayerBase ¶
type LayerBase struct {
// contains filtered or unexported fields
}
func (*LayerBase) GradDescent ¶
func (n *LayerBase) GradDescent()
func (*LayerBase) InitActivations ¶
func (*LayerBase) InitDataUniform ¶
func (*LayerBase) InitDataZero ¶
func (n *LayerBase) InitDataZero()
func (*LayerBase) InitDeltas ¶
func (*LayerBase) InitWeights ¶
func (*LayerBase) ScaleWeights ¶
func (*LayerBase) SetBpropOut ¶
func (*LayerBase) SetDeltaStats ¶
type PoolKernel ¶
type PoolKernel struct {
// contains filtered or unexported fields
}
func NewPoolKernel ¶
func NewPoolKernel( name string, grid []int, block []int, args ...interface{}) *PoolKernel
type PoolLayer ¶
type PoolLayer struct { LayerBase // contains filtered or unexported fields }
func NewPoolLayer ¶
func NewPoolLayer( lib *CudaGenerator, dtype base.Dtype, params *backends.PoolParams) *PoolLayer
func (*PoolLayer) Init ¶
func (n *PoolLayer) Init( self Layer, lib *CudaGenerator, dtype base.Dtype, params *backends.PoolParams)
func (*PoolLayer) InitActivations ¶
type UpdateConvReduce ¶
type UpdateConvReduce struct {
// contains filtered or unexported fields
}
fast axis=0 reduction kernel used for deterministic update
func NewUpdateConvReduce ¶
func NewUpdateConvReduce(lib *CudaGenerator, gridMpq int, crstk int) *UpdateConvReduce
func (*UpdateConvReduce) BindParams ¶
func (z *UpdateConvReduce) BindParams( u backends.Tensor, alpha float64, beta float64, noOp bool) acc.DeviceAllocation
func (*UpdateConvReduce) Execute ¶
func (z *UpdateConvReduce) Execute()
func (*UpdateConvReduce) Init ¶
func (z *UpdateConvReduce) Init(lib *CudaGenerator, gridMpq int, crstk int)
func (*UpdateConvReduce) Unbind ¶
func (z *UpdateConvReduce) Unbind()
ACHTUNG: Is this method necessary in generator scenario?
type UpdateCuda ¶
type UpdateCuda struct { KernelGroup // contains filtered or unexported fields }
func NewUpdateCuda ¶
func NewUpdateCuda( lib *CudaGenerator, dtype base.Dtype, params *ConvParams) *UpdateCuda
func (*UpdateCuda) BindParams ¶
func (*UpdateCuda) Execute ¶
func (g *UpdateCuda) Execute()
SKIPPED: Arguments 'repeat' and 'unbind'
func (*UpdateCuda) Init ¶
func (g *UpdateCuda) Init( lib *CudaGenerator, dtype base.Dtype, params *ConvParams)