cuda

package

v0.0.0-...-2fe1034 Latest Latest Go to latest Published: Apr 23, 2022 License: Apache-2.0 Imports: 11 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/fragata-ai/arhat

Links

Open Source Insights

Documentation ¶

Index ¶

func BuildCompoundKernel(typeArgs []acc.TypeArg, computeCapability [2]int) (string, string)
func Flatten(lst ...interface{}) []int
type BatchNormLayer
- func NewBatchNormLayer(lib *CudaGenerator, dtype base.Dtype, params *BatchNormParams) *BatchNormLayer
- func (n *BatchNormLayer) Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor
- func (n *BatchNormLayer) Bsum() bool
- func (n *BatchNormLayer) C() int
- func (n *BatchNormLayer) Dim2() []int
- func (n *BatchNormLayer) DimI() []int
- func (n *BatchNormLayer) DimO() []int
- func (n *BatchNormLayer) DimO2() []int
- func (n *BatchNormLayer) Eps() float64
- func (n *BatchNormLayer) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor
- func (n *BatchNormLayer) Init(self Layer, lib *CudaGenerator, dtype base.Dtype, params *BatchNormParams)
- func (n *BatchNormLayer) InitActivations(fpropOut backends.Tensor)
- func (n *BatchNormLayer) InitDeltas(shared []backends.Tensor)
- func (n *BatchNormLayer) InitWeights(loc float64, scale float64, shared backends.Tensor, zeros bool)
- func (n *BatchNormLayer) K() int
- func (n *BatchNormLayer) M() int
- func (n *BatchNormLayer) NOut() int
- func (n *BatchNormLayer) P() int
- func (n *BatchNormLayer) Q() int
- func (n *BatchNormLayer) RcpDepth() float64
- func (n *BatchNormLayer) Relu() bool
- func (n *BatchNormLayer) Rho() float64
- func (n *BatchNormLayer) String() string
type BatchNormParams
- func (a *BatchNormParams) Init()
- func (a *BatchNormParams) Resolve()
type BpropCuda
- func NewBpropCuda(lib *CudaGenerator, dtype base.Dtype, params *ConvParams) *BpropCuda
- func (g *BpropCuda) BindParams(i backends.Tensor, f backends.Tensor, o backends.Tensor, x backends.Tensor, ...)
- func (g *BpropCuda) Execute()
- func (g *BpropCuda) Init(lib *CudaGenerator, dtype base.Dtype, params *ConvParams)
type CompoundOps
- func NewCompoundOps(lib *CudaGenerator, dtype base.Dtype, k int, n int) *CompoundOps
- func (z *CompoundOps) BindParams(o backends.Tensor, x backends.Tensor, bias backends.Tensor, ...) acc.DeviceAllocation
- func (z *CompoundOps) Execute()
- func (z *CompoundOps) Init(lib *CudaGenerator, dtype base.Dtype, k int, n int)
- func (z *CompoundOps) Unbind()
type ConvBpropKernels
type ConvFpropKernels
type ConvKernelBuilder
type ConvLayer
- func NewConvLayer(lib *CudaGenerator, dtype base.Dtype, params *backends.ConvParams, ...) *ConvLayer
- func (n *ConvLayer) Init(self Layer, lib *CudaGenerator, dtype base.Dtype, params *backends.ConvParams, ...)
- func (n *ConvLayer) String() string
type ConvLayerBase
- func (n *ConvLayerBase) Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor
- func (n *ConvLayerBase) C() int
- func (n *ConvLayerBase) DHW() []int
- func (n *ConvLayerBase) DimF() []int
- func (n *ConvLayerBase) DimF2() []int
- func (n *ConvLayerBase) DimF2t() []int
- func (n *ConvLayerBase) DimFb() []int
- func (n *ConvLayerBase) DimI() []int
- func (n *ConvLayerBase) DimI2() []int
- func (n *ConvLayerBase) DimO() []int
- func (n *ConvLayerBase) DimO2() []int
- func (n *ConvLayerBase) DimS() []int
- func (n *ConvLayerBase) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor
- func (n *ConvLayerBase) Init(self Layer, lib *CudaGenerator, dtype base.Dtype, params *ConvParams, ...)
- func (n *ConvLayerBase) InitActivations(fpropOut backends.Tensor)
- func (n *ConvLayerBase) K() int
- func (n *ConvLayerBase) M() int
- func (n *ConvLayerBase) MPQ() []int
- func (n *ConvLayerBase) NCK() []int
- func (n *ConvLayerBase) NOut() int
- func (n *ConvLayerBase) P() int
- func (n *ConvLayerBase) Padding() []int
- func (n *ConvLayerBase) Q() int
- func (n *ConvLayerBase) SetBsum(bsum bool)
- func (n *ConvLayerBase) SetRelu(relu bool)
- func (n *ConvLayerBase) Strides() []int
- func (n *ConvLayerBase) TRS() []int
type ConvParams
- func (a *ConvParams) InitConv(params *backends.ConvParams)
- func (a *ConvParams) InitDeconv(params *backends.DeconvParams)
type ConvUpdateKernels
type CudaDeviceAllocation
- func NewCudaDeviceAllocation(index int, nbytes int, offset int) *CudaDeviceAllocation
- func (a *CudaDeviceAllocation) Add(offset int) acc.DeviceAllocation
type CudaGenerator
- func NewCudaGenerator(rngSeed int, defaultDtype base.Dtype, stochasticRound int, deviceId int, ...) *CudaGenerator
- func (b *CudaGenerator) Assign(out *acc.AccTensor, value backends.Value) backends.Value
- func (b *CudaGenerator) Binarize(ary backends.Tensor, out backends.Tensor, stochastic bool) backends.Tensor
- func (b *CudaGenerator) BpropConv(layer backends.ConvLayerBase, f backends.Tensor, e backends.Tensor, ...)
- func (b *CudaGenerator) BpropLrn(layer backends.LrnLayer, i backends.Tensor, o backends.Tensor, ...)
- func (b *CudaGenerator) BpropPool(layer backends.PoolLayer, i backends.Tensor, o backends.Tensor, ...)
- func (b *CudaGenerator) BpropRelu(layer backends.Layer, x backends.Tensor, errors backends.Tensor, ...) backends.Value
- func (b *CudaGenerator) BpropSkipNode(errors backends.Tensor, deltas backends.Tensor, alpha float64, beta float64)
- func (b *CudaGenerator) BpropTransform(nglayer backends.Layer, transform backends.Transform, outputs backends.Tensor, ...)
- func (b *CudaGenerator) BuildConvKernels(dtype base.Dtype, a *ConvParams) (ConvFpropKernels, ConvBpropKernels, ConvUpdateKernels)
- func (b *CudaGenerator) BuildProlog()
- func (b *CudaGenerator) CompoundBpropBn(deltaOut backends.Tensor, gradGamma backends.Tensor, gradBeta backends.Tensor, ...)
- func (b *CudaGenerator) CompoundDot(x backends.Tensor, y backends.Tensor, z backends.Tensor, alpha float64, ...) backends.Tensor
- func (b *CudaGenerator) CompoundDotCuda(x *acc.AccTensor, y *acc.AccTensor, z *acc.AccTensor, alpha float64, ...) *acc.AccTensor
- func (b *CudaGenerator) CompoundFpropBn(x backends.Tensor, xsum backends.Tensor, xvar backends.Tensor, ...)
- func (b *CudaGenerator) CompoundKernel(args []backends.Value) backends.Value
- func (b *CudaGenerator) ConfigureCodeOutput(buildMainCpp bool, filePrefix string, hostNamespace string, ...)
- func (b *CudaGenerator) FormatBufferRef(tensor backends.Tensor, paren bool) string
- func (b *CudaGenerator) FpropConv(layer backends.ConvLayerBase, i backends.Tensor, f backends.Tensor, ...)
- func (b *CudaGenerator) FpropLrn(layer backends.LrnLayer, i backends.Tensor, o backends.Tensor, ...)
- func (b *CudaGenerator) FpropPool(layer backends.PoolLayer, i backends.Tensor, o backends.Tensor, ...)
- func (b *CudaGenerator) FpropRelu(layer backends.Layer, x backends.Tensor, slope float64) backends.Value
- func (b *CudaGenerator) FpropSkipNode(x backends.Tensor, y backends.Tensor, beta float64)
- func (b *CudaGenerator) FpropSoftmax(x backends.Value, axis int) backends.Value
- func (b *CudaGenerator) FpropTransform(nglayer backends.Layer, transform backends.Transform, inputs backends.Tensor, ...)
- func (b *CudaGenerator) GetData(dest string, start string, stop string, x backends.Tensor) string
- func (b *CudaGenerator) GetFloat(src acc.DeviceAllocation, size int) string
- func (b *CudaGenerator) GetInt(src acc.DeviceAllocation, size int) string
- func (b *CudaGenerator) GetMetricSum(x backends.Tensor, start string, stop string) string
- func (b *CudaGenerator) GetSmCount() int
- func (b *CudaGenerator) Init(self generators.Generator, rngSeed int, defaultDtype base.Dtype, ...)
- func (b *CudaGenerator) LookupKernel(key string) *Kernel
- func (b *CudaGenerator) MakeBinaryMask(out backends.Tensor, keepThresh float64)
- func (b *CudaGenerator) MapStringToFunc(funcname string, dtype base.Dtype) *Kernel
- func (b *CudaGenerator) MemAlloc(nbytes int) acc.DeviceAllocation
- func (b *CudaGenerator) MemcpyDtodAsync(dest acc.DeviceAllocation, src acc.DeviceAllocation, size int)
- func (b *CudaGenerator) MemsetD16Async(dest acc.DeviceAllocation, data uint16, count int)
- func (b *CudaGenerator) MemsetD32Async(dest acc.DeviceAllocation, data uint32, count int)
- func (b *CudaGenerator) MemsetD8Async(dest acc.DeviceAllocation, data uint8, count int)
- func (b *CudaGenerator) NewBatchNormLayer(inShape []int) backends.BatchNormLayer
- func (b *CudaGenerator) NewConvLayer(dtype base.Dtype, params *backends.ConvParams) backends.ConvLayer
- func (b *CudaGenerator) NewDeconvLayer(dtype base.Dtype, params *backends.DeconvParams) backends.DeconvLayer
- func (b *CudaGenerator) NewLrnLayer(dtype base.Dtype, params *backends.LrnParams) backends.LrnLayer
- func (b *CudaGenerator) NewPoolLayer(dtype base.Dtype, params *backends.PoolParams) backends.PoolLayer
- func (b *CudaGenerator) NewReluLayer() backends.Layer
- func (b *CudaGenerator) OutputCode(outDir string) error
- func (b *CudaGenerator) RegisterKernel(key string, kernel *Kernel)
- func (b *CudaGenerator) RngNormal(out backends.Tensor, loc float64, scale float64, size []int)
- func (b *CudaGenerator) RngUniform(out backends.Tensor, low float64, high float64, size []int)
- func (b *CudaGenerator) ScratchBufferInit()
- func (b *CudaGenerator) ScratchBufferOffset(size int) acc.DeviceAllocation
- func (b *CudaGenerator) ScratchBufferReset()
- func (b *CudaGenerator) SetScratchSize(args ...int)
- func (b *CudaGenerator) SetSmCount(smCount int)
- func (b *CudaGenerator) UpdateConv(layer backends.ConvLayerBase, i backends.Tensor, e backends.Tensor, ...)
type DataLayer
- func NewDataLayer(lib *CudaGenerator, dtype base.Dtype, params *DataParams) *DataLayer
- func (n *DataLayer) C() int
- func (n *DataLayer) DHW() []int
- func (n *DataLayer) DimI() []int
- func (n *DataLayer) DimI2() []int
- func (n *DataLayer) DimO() []int
- func (n *DataLayer) DimO2() []int
- func (n *DataLayer) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor
- func (n *DataLayer) Init(self Layer, lib *CudaGenerator, dtype base.Dtype, params *DataParams)
- func (n *DataLayer) InitDataUniform(low float64, high float64)
- func (n *DataLayer) InitDataZero()
- func (n *DataLayer) InitDeltas(shared []backends.Tensor)
- func (n *DataLayer) InitWeights(loc float64, scale float64, shared backends.Tensor, zeros bool)
- func (n *DataLayer) K() int
- func (n *DataLayer) M() int
- func (n *DataLayer) P() int
- func (n *DataLayer) Q() int
- func (n *DataLayer) String() string
type DataParams
- func (a *DataParams) Init()
- func (a *DataParams) Resolve()
type DeconvLayer
- func NewDeconvLayer(lib *CudaGenerator, dtype base.Dtype, params *backends.DeconvParams, ...) *DeconvLayer
- func (n *DeconvLayer) Init(self Layer, lib *CudaGenerator, dtype base.Dtype, ...)
- func (n *DeconvLayer) String() string
type FilterDimShuffle
- func NewFilterDimShuffle(lib *CudaGenerator, dtype base.Dtype, c int, t int, r int, s int, k int) *FilterDimShuffle
- func (z *FilterDimShuffle) BindParams(f backends.Tensor) acc.DeviceAllocation
- func (z *FilterDimShuffle) Execute()
- func (z *FilterDimShuffle) Init(lib *CudaGenerator, dtype base.Dtype, c int, t int, r int, s int, k int)
- func (z *FilterDimShuffle) Unbind()
type FpropCuda
- func NewFpropCuda(lib *CudaGenerator, dtype base.Dtype, params *ConvParams) *FpropCuda
- func (g *FpropCuda) BindParams(i backends.Tensor, f backends.Tensor, o backends.Tensor, x backends.Tensor, ...)
- func (g *FpropCuda) Execute()
- func (g *FpropCuda) Init(lib *CudaGenerator, dtype base.Dtype, params *ConvParams)
type FullLayer
- func NewFullLayer(lib *CudaGenerator, dtype base.Dtype, params *FullParams) *FullLayer
- func (n *FullLayer) Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor
- func (n *FullLayer) DimF() []int
- func (n *FullLayer) DimF2() []int
- func (n *FullLayer) DimI() []int
- func (n *FullLayer) DimI2() []int
- func (n *FullLayer) DimO() []int
- func (n *FullLayer) DimO2() []int
- func (n *FullLayer) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor
- func (n *FullLayer) Init(self Layer, lib *CudaGenerator, dtype base.Dtype, params *FullParams)
- func (n *FullLayer) NIn() int
- func (n *FullLayer) NOut() int
- func (n *FullLayer) Relu() bool
- func (n *FullLayer) String() string
type FullParams
- func (a *FullParams) Init()
- func (a *FullParams) Resolve()
type InceptionLayer
- func NewInceptionLayer(lib *CudaGenerator, dtype base.Dtype, partitions [][]Layer, ...) *InceptionLayer
- func (n *InceptionLayer) Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor
- func (n *InceptionLayer) C() int
- func (n *InceptionLayer) DHW() []int
- func (n *InceptionLayer) DimF() []int
- func (n *InceptionLayer) DimI() []int
- func (n *InceptionLayer) DimI2() []int
- func (n *InceptionLayer) DimO() []int
- func (n *InceptionLayer) DimO2() []int
- func (n *InceptionLayer) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor
- func (n *InceptionLayer) Init(self Layer, lib *CudaGenerator, dtype base.Dtype, partitions [][]Layer, ...)
- func (n *InceptionLayer) InitActivations(fpropOut backends.Tensor)
- func (n *InceptionLayer) InitDeltas(shared []backends.Tensor)
- func (n *InceptionLayer) InitWeights(loc float64, scale float64, shared backends.Tensor, zeros bool)
- func (n *InceptionLayer) K() int
- func (n *InceptionLayer) M() int
- func (n *InceptionLayer) MPQ() []int
- func (n *InceptionLayer) NCK() []int
- func (n *InceptionLayer) NOut() int
- func (n *InceptionLayer) P() int
- func (n *InceptionLayer) Partitions() [][]Layer
- func (n *InceptionLayer) Q() int
- func (n *InceptionLayer) String() string
type InceptionParams
- func (a *InceptionParams) Init()
- func (a *InceptionParams) Resolve()
type Kernel
- func NewKernel(lib *CudaGenerator, name string, code string) *Kernel
- func (k *Kernel) Launch(grid []int, block []int, shared int, launchArgs []acc.KernelArgument, ...)
type KernelGroup
- func (g *KernelGroup) GetKernel(dtype base.Dtype, filterSize int, bsum bool, operation string) *Kernel
- func (g *KernelGroup) Init(lib *CudaGenerator, dtype base.Dtype, params *ConvParams)
type Layer
type LayerBase
- func (n *LayerBase) Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor
- func (n *LayerBase) BpropRelu(bpropIn backends.Tensor) backends.Tensor
- func (n *LayerBase) C() int
- func (n *LayerBase) DimF() []int
- func (n *LayerBase) DimF2() []int
- func (n *LayerBase) DimI() []int
- func (n *LayerBase) DimI2() []int
- func (n *LayerBase) DimO() []int
- func (n *LayerBase) DimO2() []int
- func (n *LayerBase) Dtype() base.Dtype
- func (n *LayerBase) Flops() float64
- func (n *LayerBase) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor
- func (n *LayerBase) GradDescent()
- func (n *LayerBase) Init(self Layer, lib *CudaGenerator, dtype base.Dtype, N int, dtypeU base.Dtype)
- func (n *LayerBase) InitActivations(fpropOut backends.Tensor)
- func (n *LayerBase) InitDataUniform(low float64, high float64)
- func (n *LayerBase) InitDataZero()
- func (n *LayerBase) InitDeltas(shared []backends.Tensor)
- func (n *LayerBase) InitWeights(loc float64, scale float64, shared backends.Tensor, zeros bool)
- func (n *LayerBase) K() int
- func (n *LayerBase) M() int
- func (n *LayerBase) N() int
- func (n *LayerBase) NOut() int
- func (n *LayerBase) P() int
- func (n *LayerBase) Q() int
- func (n *LayerBase) ScaleWeights(scale float64)
- func (n *LayerBase) SetBpropOut(bpropOut backends.Tensor)
- func (n *LayerBase) SetDeltaStats(deltaStats backends.Tensor)
- func (n *LayerBase) SizeF() int
- func (n *LayerBase) SizeI() int
- func (n *LayerBase) SizeO() int
type LrnLayer
- func NewLrnLayer(lib *CudaGenerator, dtype base.Dtype, params *backends.LrnParams) *LrnLayer
- func (n *LrnLayer) Init(self Layer, lib *CudaGenerator, dtype base.Dtype, params *backends.LrnParams)
type PoolKernel
- func NewPoolKernel(name string, grid []int, block []int, args ...interface{}) *PoolKernel
type PoolLayer
- func NewPoolLayer(lib *CudaGenerator, dtype base.Dtype, params *backends.PoolParams) *PoolLayer
- func (n *PoolLayer) Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor
- func (n *PoolLayer) C() int
- func (n *PoolLayer) DHW() []int
- func (n *PoolLayer) DimF2() []int
- func (n *PoolLayer) DimI() []int
- func (n *PoolLayer) DimI2() []int
- func (n *PoolLayer) DimO() []int
- func (n *PoolLayer) DimO2() []int
- func (n *PoolLayer) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor
- func (n *PoolLayer) Init(self Layer, lib *CudaGenerator, dtype base.Dtype, params *backends.PoolParams)
- func (n *PoolLayer) InitActivations(fpropOut backends.Tensor)
- func (n *PoolLayer) JTRS() []int
- func (n *PoolLayer) K() int
- func (n *PoolLayer) M() int
- func (n *PoolLayer) MPQ() []int
- func (n *PoolLayer) NOut() int
- func (n *PoolLayer) Op() backends.PoolOp
- func (n *PoolLayer) P() int
- func (n *PoolLayer) Padding() []int
- func (n *PoolLayer) Q() int
- func (n *PoolLayer) Strides() []int
- func (n *PoolLayer) String() string
type UpdateConvReduce
- func NewUpdateConvReduce(lib *CudaGenerator, gridMpq int, crstk int) *UpdateConvReduce
- func (z *UpdateConvReduce) BindParams(u backends.Tensor, alpha float64, beta float64, noOp bool) acc.DeviceAllocation
- func (z *UpdateConvReduce) Execute()
- func (z *UpdateConvReduce) Init(lib *CudaGenerator, gridMpq int, crstk int)
- func (z *UpdateConvReduce) Unbind()
type UpdateCuda
- func NewUpdateCuda(lib *CudaGenerator, dtype base.Dtype, params *ConvParams) *UpdateCuda
- func (g *UpdateCuda) BindParams(i backends.Tensor, e backends.Tensor, o backends.Tensor, alpha float64, ...)
- func (g *UpdateCuda) Execute()
- func (g *UpdateCuda) Init(lib *CudaGenerator, dtype base.Dtype, params *ConvParams)

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func BuildCompoundKernel ¶

func BuildCompoundKernel(typeArgs []acc.TypeArg, computeCapability [2]int) (string, string)

func Flatten ¶

func Flatten(lst ...interface{}) []int

flatten a nested list of lists or values only one level of nesting and int values supported here; this should be sufficient

Types ¶

type BatchNormLayer ¶

type BatchNormLayer struct {
	LayerBase
	// contains filtered or unexported fields
}

func NewBatchNormLayer ¶

func NewBatchNormLayer(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *BatchNormParams) *BatchNormLayer

func (*BatchNormLayer) Bprop ¶

func (n *BatchNormLayer) Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor

func (*BatchNormLayer) Bsum ¶

func (n *BatchNormLayer) Bsum() bool

func (*BatchNormLayer) C ¶

func (n *BatchNormLayer) C() int

func (*BatchNormLayer) Dim2 ¶

func (n *BatchNormLayer) Dim2() []int

func (*BatchNormLayer) DimI ¶

func (n *BatchNormLayer) DimI() []int

func (*BatchNormLayer) DimO ¶

func (n *BatchNormLayer) DimO() []int

func (*BatchNormLayer) DimO2 ¶

func (n *BatchNormLayer) DimO2() []int

func (*BatchNormLayer) Eps ¶

func (n *BatchNormLayer) Eps() float64

func (*BatchNormLayer) Fprop ¶

func (n *BatchNormLayer) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor

func (*BatchNormLayer) Init ¶

func (n *BatchNormLayer) Init(
	self Layer,
	lib *CudaGenerator,
	dtype base.Dtype,
	params *BatchNormParams)

func (*BatchNormLayer) InitActivations ¶

func (n *BatchNormLayer) InitActivations(fpropOut backends.Tensor)

func (*BatchNormLayer) InitDeltas ¶

func (n *BatchNormLayer) InitDeltas(shared []backends.Tensor)

func (*BatchNormLayer) InitWeights ¶

func (n *BatchNormLayer) InitWeights(
	loc float64, scale float64, shared backends.Tensor, zeros bool)

func (*BatchNormLayer) K ¶

func (n *BatchNormLayer) K() int

func (*BatchNormLayer) M ¶

func (n *BatchNormLayer) M() int

func (*BatchNormLayer) NOut ¶

func (n *BatchNormLayer) NOut() int

func (*BatchNormLayer) P ¶

func (n *BatchNormLayer) P() int

func (*BatchNormLayer) Q ¶

func (n *BatchNormLayer) Q() int

func (*BatchNormLayer) RcpDepth ¶

func (n *BatchNormLayer) RcpDepth() float64

func (*BatchNormLayer) Relu ¶

func (n *BatchNormLayer) Relu() bool

func (*BatchNormLayer) Rho ¶

func (n *BatchNormLayer) Rho() float64

func (*BatchNormLayer) String ¶

func (n *BatchNormLayer) String() string

type BatchNormParams ¶

type BatchNormParams struct {
	N    int
	C    int
	D    int
	H    int
	W    int
	NIn  int
	Rho  float64
	Eps  float64
	Relu bool
	Bsum bool
}

func (*BatchNormParams) Init ¶

func (a *BatchNormParams) Init()

func (*BatchNormParams) Resolve ¶

func (a *BatchNormParams) Resolve()

type BpropCuda ¶

type BpropCuda struct {
	KernelGroup
	// contains filtered or unexported fields
}

func NewBpropCuda ¶

func NewBpropCuda(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *ConvParams) *BpropCuda

func (*BpropCuda) BindParams ¶

func (g *BpropCuda) BindParams(
	i backends.Tensor,
	f backends.Tensor,
	o backends.Tensor,
	x backends.Tensor,
	bias backends.Tensor,
	bsum backends.Tensor,
	alpha float64,
	beta float64,
	relu bool,
	brelu bool,
	slope float64)

func (*BpropCuda) Execute ¶

func (g *BpropCuda) Execute()

SKIPPED: Arguments 'repeat' and 'unbind'

func (*BpropCuda) Init ¶

func (g *BpropCuda) Init(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *ConvParams)

type CompoundOps ¶

type CompoundOps struct {
	// contains filtered or unexported fields
}

for kernels that can't compound these ops internally, use an external kernel

func NewCompoundOps ¶

func NewCompoundOps(lib *CudaGenerator, dtype base.Dtype, k int, n int) *CompoundOps

func (*CompoundOps) BindParams ¶

func (z *CompoundOps) BindParams(
	o backends.Tensor,
	x backends.Tensor,
	bias backends.Tensor,
	bsum backends.Tensor,
	alpha float64,
	beta float64,
	relu bool,
	brelu bool) acc.DeviceAllocation

func (*CompoundOps) Execute ¶

func (z *CompoundOps) Execute()

func (*CompoundOps) Init ¶

func (z *CompoundOps) Init(lib *CudaGenerator, dtype base.Dtype, k int, n int)

func (*CompoundOps) Unbind ¶

func (z *CompoundOps) Unbind()

ACHTUNG: Is this method necessary in generator scenario?

type ConvBpropKernels ¶

type ConvBpropKernels interface {
	BindParams(
		i backends.Tensor,
		f backends.Tensor,
		o backends.Tensor,
		x backends.Tensor,
		bias backends.Tensor,
		bsum backends.Tensor,
		alpha float64,
		beta float64,
		relu bool,
		brelu bool,
		slope float64)
	Execute()
}

type ConvFpropKernels ¶

type ConvFpropKernels interface {
	BindParams(
		i backends.Tensor,
		f backends.Tensor,
		o backends.Tensor,
		x backends.Tensor,
		bias backends.Tensor,
		bsum backends.Tensor,
		alpha float64,
		beta float64,
		relu bool,
		brelu bool,
		slope float64)
	Execute()
}

type ConvKernelBuilder ¶

type ConvKernelBuilder interface {
	BuildConvKernels(dtype base.Dtype, a *ConvParams) (
		ConvFpropKernels, ConvBpropKernels, ConvUpdateKernels)
}

type ConvLayer ¶

type ConvLayer struct {
	ConvLayerBase
}

func NewConvLayer ¶

func NewConvLayer(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *backends.ConvParams,
	kernelBuilder ConvKernelBuilder) *ConvLayer

func (*ConvLayer) Init ¶

func (n *ConvLayer) Init(
	self Layer,
	lib *CudaGenerator,
	dtype base.Dtype,
	params *backends.ConvParams,
	kernelBuilder ConvKernelBuilder)

func (*ConvLayer) String ¶

func (n *ConvLayer) String() string

type ConvLayerBase ¶

type ConvLayerBase struct {
	LayerBase
	// contains filtered or unexported fields
}

func (*ConvLayerBase) Bprop ¶

func (n *ConvLayerBase) Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor

func (*ConvLayerBase) C ¶

func (n *ConvLayerBase) C() int

func (*ConvLayerBase) DHW ¶

func (n *ConvLayerBase) DHW() []int

func (*ConvLayerBase) DimF ¶

func (n *ConvLayerBase) DimF() []int

func (*ConvLayerBase) DimF2 ¶

func (n *ConvLayerBase) DimF2() []int

func (*ConvLayerBase) DimF2t ¶

func (n *ConvLayerBase) DimF2t() []int

func (*ConvLayerBase) DimFb ¶

func (n *ConvLayerBase) DimFb() []int

func (*ConvLayerBase) DimI ¶

func (n *ConvLayerBase) DimI() []int

func (*ConvLayerBase) DimI2 ¶

func (n *ConvLayerBase) DimI2() []int

func (*ConvLayerBase) DimO ¶

func (n *ConvLayerBase) DimO() []int

func (*ConvLayerBase) DimO2 ¶

func (n *ConvLayerBase) DimO2() []int

func (*ConvLayerBase) DimS ¶

func (n *ConvLayerBase) DimS() []int

func (*ConvLayerBase) Fprop ¶

func (n *ConvLayerBase) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor

func (*ConvLayerBase) Init ¶

func (n *ConvLayerBase) Init(
	self Layer,
	lib *CudaGenerator,
	dtype base.Dtype,
	params *ConvParams,
	kernelBuilder ConvKernelBuilder)

func (*ConvLayerBase) InitActivations ¶

func (n *ConvLayerBase) InitActivations(fpropOut backends.Tensor)

func (*ConvLayerBase) K ¶

func (n *ConvLayerBase) K() int

func (*ConvLayerBase) M ¶

func (n *ConvLayerBase) M() int

func (*ConvLayerBase) MPQ ¶

func (n *ConvLayerBase) MPQ() []int

func (*ConvLayerBase) NCK ¶

func (n *ConvLayerBase) NCK() []int

func (*ConvLayerBase) NOut ¶

func (n *ConvLayerBase) NOut() int

func (*ConvLayerBase) P ¶

func (n *ConvLayerBase) P() int

func (*ConvLayerBase) Padding ¶

func (n *ConvLayerBase) Padding() []int

func (*ConvLayerBase) Q ¶

func (n *ConvLayerBase) Q() int

func (*ConvLayerBase) SetBsum ¶

func (n *ConvLayerBase) SetBsum(bsum bool)

func (*ConvLayerBase) SetRelu ¶

func (n *ConvLayerBase) SetRelu(relu bool)

func (*ConvLayerBase) Strides ¶

func (n *ConvLayerBase) Strides() []int

func (*ConvLayerBase) TRS ¶

func (n *ConvLayerBase) TRS() []int

type ConvParams ¶

type ConvParams struct {
	N    int
	C    int
	K    int
	D    int
	H    int
	W    int
	T    int
	R    int
	S    int
	M    int
	P    int
	Q    int
	PadD int
	PadH int
	PadW int
	StrD int
	StrH int
	StrW int
	DilD int
	DilH int
	DilW int
}

func (*ConvParams) InitConv ¶

func (a *ConvParams) InitConv(params *backends.ConvParams)

func (*ConvParams) InitDeconv ¶

func (a *ConvParams) InitDeconv(params *backends.DeconvParams)

type ConvUpdateKernels ¶

type ConvUpdateKernels interface {
	BindParams(
		i backends.Tensor,
		e backends.Tensor,
		o backends.Tensor,
		alpha float64,
		beta float64,
		noOp bool)
	Execute()
}

type CudaDeviceAllocation ¶

type CudaDeviceAllocation struct {
	// contains filtered or unexported fields
}

func NewCudaDeviceAllocation ¶

func NewCudaDeviceAllocation(index int, nbytes int, offset int) *CudaDeviceAllocation

func (*CudaDeviceAllocation) Add ¶

func (a *CudaDeviceAllocation) Add(offset int) acc.DeviceAllocation

type CudaGenerator ¶

type CudaGenerator struct {
	acc.AccGeneratorBase
	// contains filtered or unexported fields
}

func NewCudaGenerator ¶

func NewCudaGenerator(
	rngSeed int,
	defaultDtype base.Dtype,
	stochasticRound int,
	deviceId int,
	computeCapability [2]int,
	bench bool,
	scratchSize int,
	histBins int,
	histOffset int,
	compatMode backends.CompatMode) *CudaGenerator

func (*CudaGenerator) Assign ¶

func (b *CudaGenerator) Assign(out *acc.AccTensor, value backends.Value) backends.Value

func (*CudaGenerator) Binarize ¶

func (b *CudaGenerator) Binarize(
	ary backends.Tensor, out backends.Tensor, stochastic bool) backends.Tensor

func (*CudaGenerator) BpropConv ¶

func (b *CudaGenerator) BpropConv(
	layer backends.ConvLayerBase,
	f backends.Tensor,
	e backends.Tensor,
	gradI backends.Tensor,
	x backends.Tensor,
	bias backends.Tensor,
	bsum backends.Tensor,
	alpha float64,
	beta float64,
	relu bool,
	brelu bool,
	slope float64)

SKIPPED: repeat (gpu only), layerOp

func (*CudaGenerator) BpropLrn ¶

func (b *CudaGenerator) BpropLrn(
	layer backends.LrnLayer,
	i backends.Tensor,
	o backends.Tensor,
	e backends.Tensor,
	delta backends.Tensor,
	denom backends.Tensor,
	alpha float64,
	beta float64,
	ascale float64,
	bpower float64)

SKIPPED: Support for 'repeat'

func (*CudaGenerator) BpropPool ¶

func (b *CudaGenerator) BpropPool(
	layer backends.PoolLayer,
	i backends.Tensor,
	o backends.Tensor,
	argmax backends.Tensor,
	alpha float64,
	beta float64)

SKIPPED: repeat (gpu only)

func (*CudaGenerator) BpropRelu ¶

func (b *CudaGenerator) BpropRelu(
	layer backends.Layer,
	x backends.Tensor,
	errors backends.Tensor,
	deltas backends.Tensor,
	slope float64) backends.Value

func (*CudaGenerator) BpropSkipNode ¶

func (b *CudaGenerator) BpropSkipNode(
	errors backends.Tensor, deltas backends.Tensor, alpha float64, beta float64)

func (*CudaGenerator) BpropTransform ¶

func (b *CudaGenerator) BpropTransform(
	nglayer backends.Layer,
	transform backends.Transform,
	outputs backends.Tensor,
	errors backends.Tensor,
	deltas backends.Tensor,
	relu bool)

func (*CudaGenerator) BuildConvKernels ¶

func (b *CudaGenerator) BuildConvKernels(dtype base.Dtype, a *ConvParams) (
	ConvFpropKernels, ConvBpropKernels, ConvUpdateKernels)

func (*CudaGenerator) BuildProlog ¶

func (b *CudaGenerator) BuildProlog()

func (*CudaGenerator) CompoundBpropBn ¶

func (b *CudaGenerator) CompoundBpropBn(
	deltaOut backends.Tensor,
	gradGamma backends.Tensor,
	gradBeta backends.Tensor,
	deltaIn backends.Tensor,
	x backends.Tensor,
	xsum backends.Tensor,
	xvar backends.Tensor,
	gamma backends.Tensor,
	eps float64,
	binary bool,
	layer backends.BatchNormLayer)

SKIPPED: threads, repeat (both gpu only)

func (*CudaGenerator) CompoundDot ¶

func (b *CudaGenerator) CompoundDot(
	x backends.Tensor,
	y backends.Tensor,
	z backends.Tensor,
	alpha float64,
	beta float64,
	relu bool,
	bsum backends.Tensor) backends.Tensor

func (*CudaGenerator) CompoundDotCuda ¶

func (b *CudaGenerator) CompoundDotCuda(
	x *acc.AccTensor,
	y *acc.AccTensor,
	z *acc.AccTensor,
	alpha float64,
	beta float64,
	relu bool,
	bsum *acc.AccTensor,
	repeat int,
	size []int) *acc.AccTensor

func (*CudaGenerator) CompoundFpropBn ¶

func (b *CudaGenerator) CompoundFpropBn(
	x backends.Tensor,
	xsum backends.Tensor,
	xvar backends.Tensor,
	gmean backends.Tensor,
	gvar backends.Tensor,
	gamma backends.Tensor,
	beta backends.Tensor,
	y backends.Tensor,
	eps float64,
	rho float64,
	computeBatchSum bool,
	accumbeta float64,
	relu bool,
	binary bool,
	inference bool,
	outputs backends.Tensor,
	layer backends.BatchNormLayer)

SKIPPED: threads, repeat (both gpu only)

func (*CudaGenerator) CompoundKernel ¶

func (b *CudaGenerator) CompoundKernel(args []backends.Value) backends.Value

func (*CudaGenerator) ConfigureCodeOutput ¶

func (b *CudaGenerator) ConfigureCodeOutput(
	buildMainCpp bool,
	filePrefix string,
	hostNamespace string,
	kernelPrefix string)

func (*CudaGenerator) FormatBufferRef ¶

func (b *CudaGenerator) FormatBufferRef(tensor backends.Tensor, paren bool) string

func (*CudaGenerator) FpropConv ¶

func (b *CudaGenerator) FpropConv(
	layer backends.ConvLayerBase,
	i backends.Tensor,
	f backends.Tensor,
	o backends.Tensor,
	x backends.Tensor,
	bias backends.Tensor,
	bsum backends.Tensor,
	alpha float64,
	beta float64,
	relu bool,
	brelu bool,
	slope float64)

SKIPPED: repeat (gpu only), layerOp

func (*CudaGenerator) FpropLrn ¶

func (b *CudaGenerator) FpropLrn(
	layer backends.LrnLayer,
	i backends.Tensor,
	o backends.Tensor,
	denom backends.Tensor,
	alpha float64,
	beta float64,
	ascale float64,
	bpower float64)

SKIPPED: Support for 'repeat'

func (*CudaGenerator) FpropPool ¶

func (b *CudaGenerator) FpropPool(
	layer backends.PoolLayer,
	i backends.Tensor,
	o backends.Tensor,
	argmax backends.Tensor,
	alpha float64,
	beta float64)

SKIPPED: Support for 'repeat'

func (*CudaGenerator) FpropRelu ¶

func (b *CudaGenerator) FpropRelu(
	layer backends.Layer,
	x backends.Tensor,
	slope float64) backends.Value

func (*CudaGenerator) FpropSkipNode ¶

func (b *CudaGenerator) FpropSkipNode(x backends.Tensor, y backends.Tensor, beta float64)

func (*CudaGenerator) FpropSoftmax ¶

func (b *CudaGenerator) FpropSoftmax(x backends.Value, axis int) backends.Value

func (*CudaGenerator) FpropTransform ¶

func (b *CudaGenerator) FpropTransform(
	nglayer backends.Layer,
	transform backends.Transform,
	inputs backends.Tensor,
	outputs backends.Tensor,
	relu bool)

func (*CudaGenerator) GetData ¶

func (b *CudaGenerator) GetData(
	dest string, start string, stop string, x backends.Tensor) string

func (*CudaGenerator) GetFloat ¶

func (b *CudaGenerator) GetFloat(src acc.DeviceAllocation, size int) string

func (*CudaGenerator) GetInt ¶

func (b *CudaGenerator) GetInt(src acc.DeviceAllocation, size int) string

func (*CudaGenerator) GetMetricSum ¶

func (b *CudaGenerator) GetMetricSum(x backends.Tensor, start string, stop string) string

func (*CudaGenerator) GetSmCount ¶

func (b *CudaGenerator) GetSmCount() int

func (*CudaGenerator) Init ¶

func (b *CudaGenerator) Init(
	self generators.Generator,
	rngSeed int,
	defaultDtype base.Dtype,
	stochasticRound int,
	deviceId int,
	computeCapability [2]int,
	bench bool,
	scratchSize int,
	histBins int,
	histOffset int,
	compatMode backends.CompatMode)

func (*CudaGenerator) LookupKernel ¶

func (b *CudaGenerator) LookupKernel(key string) *Kernel

func (*CudaGenerator) MakeBinaryMask ¶

func (b *CudaGenerator) MakeBinaryMask(out backends.Tensor, keepThresh float64)

func (*CudaGenerator) MapStringToFunc ¶

func (b *CudaGenerator) MapStringToFunc(funcname string, dtype base.Dtype) *Kernel

func (*CudaGenerator) MemAlloc ¶

func (b *CudaGenerator) MemAlloc(nbytes int) acc.DeviceAllocation

func (*CudaGenerator) MemcpyDtodAsync ¶

func (b *CudaGenerator) MemcpyDtodAsync(
	dest acc.DeviceAllocation, src acc.DeviceAllocation, size int)

func (*CudaGenerator) MemsetD16Async ¶

func (b *CudaGenerator) MemsetD16Async(dest acc.DeviceAllocation, data uint16, count int)

func (*CudaGenerator) MemsetD32Async ¶

func (b *CudaGenerator) MemsetD32Async(dest acc.DeviceAllocation, data uint32, count int)

func (*CudaGenerator) MemsetD8Async ¶

func (b *CudaGenerator) MemsetD8Async(dest acc.DeviceAllocation, data uint8, count int)

func (*CudaGenerator) NewBatchNormLayer ¶

func (b *CudaGenerator) NewBatchNormLayer(inShape []int) backends.BatchNormLayer

func (*CudaGenerator) NewConvLayer ¶

func (b *CudaGenerator) NewConvLayer(
	dtype base.Dtype, params *backends.ConvParams) backends.ConvLayer

func (*CudaGenerator) NewDeconvLayer ¶

func (b *CudaGenerator) NewDeconvLayer(
	dtype base.Dtype, params *backends.DeconvParams) backends.DeconvLayer

func (*CudaGenerator) NewLrnLayer ¶

func (b *CudaGenerator) NewLrnLayer(
	dtype base.Dtype, params *backends.LrnParams) backends.LrnLayer

func (*CudaGenerator) NewPoolLayer ¶

func (b *CudaGenerator) NewPoolLayer(
	dtype base.Dtype, params *backends.PoolParams) backends.PoolLayer

func (*CudaGenerator) NewReluLayer ¶

func (b *CudaGenerator) NewReluLayer() backends.Layer

func (*CudaGenerator) OutputCode ¶

func (b *CudaGenerator) OutputCode(outDir string) error

func (*CudaGenerator) RegisterKernel ¶

func (b *CudaGenerator) RegisterKernel(key string, kernel *Kernel)

func (*CudaGenerator) RngNormal ¶

func (b *CudaGenerator) RngNormal(out backends.Tensor, loc float64, scale float64, size []int)

func (*CudaGenerator) RngUniform ¶

func (b *CudaGenerator) RngUniform(out backends.Tensor, low float64, high float64, size []int)

func (*CudaGenerator) ScratchBufferInit ¶

func (b *CudaGenerator) ScratchBufferInit()

func (*CudaGenerator) ScratchBufferOffset ¶

func (b *CudaGenerator) ScratchBufferOffset(size int) acc.DeviceAllocation

func (*CudaGenerator) ScratchBufferReset ¶

func (b *CudaGenerator) ScratchBufferReset()

func (*CudaGenerator) SetScratchSize ¶

func (b *CudaGenerator) SetScratchSize(args ...int)

func (*CudaGenerator) SetSmCount ¶

func (b *CudaGenerator) SetSmCount(smCount int)

func (*CudaGenerator) UpdateConv ¶

func (b *CudaGenerator) UpdateConv(
	layer backends.ConvLayerBase,
	i backends.Tensor,
	e backends.Tensor,
	gradF backends.Tensor,
	alpha float64,
	beta float64,
	gradBias backends.Tensor)

SKIPPED: repeat (gpu only), layerOp

type DataLayer ¶

type DataLayer struct {
	LayerBase
	// contains filtered or unexported fields
}

func NewDataLayer ¶

func NewDataLayer(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *DataParams) *DataLayer

func (*DataLayer) C ¶

func (n *DataLayer) C() int

func (*DataLayer) DHW ¶

func (n *DataLayer) DHW() []int

func (*DataLayer) DimI ¶

func (n *DataLayer) DimI() []int

func (*DataLayer) DimI2 ¶

func (n *DataLayer) DimI2() []int

func (*DataLayer) DimO ¶

func (n *DataLayer) DimO() []int

func (*DataLayer) DimO2 ¶

func (n *DataLayer) DimO2() []int

func (*DataLayer) Fprop ¶

func (n *DataLayer) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor

func (*DataLayer) Init ¶

func (n *DataLayer) Init(
	self Layer,
	lib *CudaGenerator,
	dtype base.Dtype,
	params *DataParams)

func (*DataLayer) InitDataUniform ¶

func (n *DataLayer) InitDataUniform(low float64, high float64)

func (*DataLayer) InitDataZero ¶

func (n *DataLayer) InitDataZero()

func (*DataLayer) InitDeltas ¶

func (n *DataLayer) InitDeltas(shared []backends.Tensor)

func (*DataLayer) InitWeights ¶

func (n *DataLayer) InitWeights(loc float64, scale float64, shared backends.Tensor, zeros bool)

func (*DataLayer) K ¶

func (n *DataLayer) K() int

func (*DataLayer) M ¶

func (n *DataLayer) M() int

func (*DataLayer) P ¶

func (n *DataLayer) P() int

func (*DataLayer) Q ¶

func (n *DataLayer) Q() int

func (*DataLayer) String ¶

func (n *DataLayer) String() string

type DataParams ¶

type DataParams struct {
	N int
	C int
	D int
	H int
	W int
}

func (*DataParams) Init ¶

func (a *DataParams) Init()

func (*DataParams) Resolve ¶

func (a *DataParams) Resolve()

type DeconvLayer ¶

type DeconvLayer struct {
	ConvLayerBase
}

func NewDeconvLayer ¶

func NewDeconvLayer(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *backends.DeconvParams,
	kernelBuilder ConvKernelBuilder) *DeconvLayer

func (*DeconvLayer) Init ¶

func (n *DeconvLayer) Init(
	self Layer,
	lib *CudaGenerator,
	dtype base.Dtype,
	params *backends.DeconvParams,
	kernelBuilder ConvKernelBuilder)

func (*DeconvLayer) String ¶

func (n *DeconvLayer) String() string

type FilterDimShuffle ¶

type FilterDimShuffle struct {
	// contains filtered or unexported fields
}

func NewFilterDimShuffle ¶

func NewFilterDimShuffle(
	lib *CudaGenerator,
	dtype base.Dtype,
	c int,
	t int,
	r int,
	s int,
	k int) *FilterDimShuffle

func (*FilterDimShuffle) BindParams ¶

func (z *FilterDimShuffle) BindParams(f backends.Tensor) acc.DeviceAllocation

func (*FilterDimShuffle) Execute ¶

func (z *FilterDimShuffle) Execute()

func (*FilterDimShuffle) Init ¶

func (z *FilterDimShuffle) Init(
	lib *CudaGenerator,
	dtype base.Dtype,
	c int,
	t int,
	r int,
	s int,
	k int)

func (*FilterDimShuffle) Unbind ¶

func (z *FilterDimShuffle) Unbind()

ACHTUNG: Is this method necessary in generator scenario?

type FpropCuda ¶

type FpropCuda struct {
	KernelGroup
	// contains filtered or unexported fields
}

func NewFpropCuda ¶

func NewFpropCuda(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *ConvParams) *FpropCuda

func (*FpropCuda) BindParams ¶

func (g *FpropCuda) BindParams(
	i backends.Tensor,
	f backends.Tensor,
	o backends.Tensor,
	x backends.Tensor,
	bias backends.Tensor,
	bsum backends.Tensor,
	alpha float64,
	beta float64,
	relu bool,
	brelu bool,
	slope float64)

func (*FpropCuda) Execute ¶

func (g *FpropCuda) Execute()

SKIPPED: Arguments 'repeat' and 'unbind'

func (*FpropCuda) Init ¶

func (g *FpropCuda) Init(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *ConvParams)

type FullLayer ¶

type FullLayer struct {
	LayerBase
	// contains filtered or unexported fields
}

func NewFullLayer ¶

func NewFullLayer(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *FullParams) *FullLayer

func (*FullLayer) Bprop ¶

func (n *FullLayer) Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor

func (*FullLayer) DimF ¶

func (n *FullLayer) DimF() []int

func (*FullLayer) DimF2 ¶

func (n *FullLayer) DimF2() []int

func (*FullLayer) DimI ¶

func (n *FullLayer) DimI() []int

func (*FullLayer) DimI2 ¶

func (n *FullLayer) DimI2() []int

func (*FullLayer) DimO ¶

func (n *FullLayer) DimO() []int

func (*FullLayer) DimO2 ¶

func (n *FullLayer) DimO2() []int

func (*FullLayer) Fprop ¶

func (n *FullLayer) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor

func (*FullLayer) Init ¶

func (n *FullLayer) Init(
	self Layer,
	lib *CudaGenerator,
	dtype base.Dtype,
	params *FullParams)

func (*FullLayer) NIn ¶

func (n *FullLayer) NIn() int

func (*FullLayer) NOut ¶

func (n *FullLayer) NOut() int

func (*FullLayer) Relu ¶

func (n *FullLayer) Relu() bool

func (*FullLayer) String ¶

func (n *FullLayer) String() string

type FullParams ¶

type FullParams struct {
	N    int
	NIn  int
	NOut int
	Relu bool
}

func (*FullParams) Init ¶

func (a *FullParams) Init()

func (*FullParams) Resolve ¶

func (a *FullParams) Resolve()

type InceptionLayer ¶

type InceptionLayer struct {
	LayerBase
	// contains filtered or unexported fields
}

func NewInceptionLayer ¶

func NewInceptionLayer(
	lib *CudaGenerator,
	dtype base.Dtype,
	partitions [][]Layer,
	params *InceptionParams) *InceptionLayer

func (*InceptionLayer) Bprop ¶

func (n *InceptionLayer) Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor

func (*InceptionLayer) C ¶

func (n *InceptionLayer) C() int

func (*InceptionLayer) DHW ¶

func (n *InceptionLayer) DHW() []int

func (*InceptionLayer) DimF ¶

func (n *InceptionLayer) DimF() []int

func (*InceptionLayer) DimI ¶

func (n *InceptionLayer) DimI() []int

func (*InceptionLayer) DimI2 ¶

func (n *InceptionLayer) DimI2() []int

func (*InceptionLayer) DimO ¶

func (n *InceptionLayer) DimO() []int

func (*InceptionLayer) DimO2 ¶

func (n *InceptionLayer) DimO2() []int

func (*InceptionLayer) Fprop ¶

func (n *InceptionLayer) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor

func (*InceptionLayer) Init ¶

func (n *InceptionLayer) Init(
	self Layer,
	lib *CudaGenerator,
	dtype base.Dtype,
	partitions [][]Layer,
	params *InceptionParams)

func (*InceptionLayer) InitActivations ¶

func (n *InceptionLayer) InitActivations(fpropOut backends.Tensor)

func (*InceptionLayer) InitDeltas ¶

func (n *InceptionLayer) InitDeltas(shared []backends.Tensor)

func (*InceptionLayer) InitWeights ¶

func (n *InceptionLayer) InitWeights(
	loc float64, scale float64, shared backends.Tensor, zeros bool)

func (*InceptionLayer) K ¶

func (n *InceptionLayer) K() int

func (*InceptionLayer) M ¶

func (n *InceptionLayer) M() int

func (*InceptionLayer) MPQ ¶

func (n *InceptionLayer) MPQ() []int

func (*InceptionLayer) NCK ¶

func (n *InceptionLayer) NCK() []int

func (*InceptionLayer) NOut ¶

func (n *InceptionLayer) NOut() int

func (*InceptionLayer) P ¶

func (n *InceptionLayer) P() int

func (*InceptionLayer) Partitions ¶

func (n *InceptionLayer) Partitions() [][]Layer

func (*InceptionLayer) Q ¶

func (n *InceptionLayer) Q() int

func (*InceptionLayer) String ¶

func (n *InceptionLayer) String() string

type InceptionParams ¶

type InceptionParams struct {
	N int
	C int
	K int
	D int
	H int
	W int
	M int
	P int
	Q int
}

func (*InceptionParams) Init ¶

func (a *InceptionParams) Init()

func (*InceptionParams) Resolve ¶

func (a *InceptionParams) Resolve()

type Kernel ¶

type Kernel struct {
	// contains filtered or unexported fields
}

func NewKernel ¶

func NewKernel(lib *CudaGenerator, name string, code string) *Kernel

func (*Kernel) Launch ¶

func (k *Kernel) Launch(
	grid []int,
	block []int,
	shared int,
	launchArgs []acc.KernelArgument,
	staticArgs []int)

type KernelGroup ¶

type KernelGroup struct {
	// contains filtered or unexported fields
}

func (*KernelGroup) GetKernel ¶

func (g *KernelGroup) GetKernel(
	dtype base.Dtype,
	filterSize int,
	bsum bool,
	operation string) *Kernel

func (*KernelGroup) Init ¶

func (g *KernelGroup) Init(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *ConvParams)

type Layer ¶

type Layer interface {
	Dtype() base.Dtype
	N() int
	Flops() float64
	SizeI() int
	SizeF() int
	SizeO() int
	// optional properties
	C() int
	K() int
	M() int
	P() int
	Q() int
	DimI() []int
	DimF() []int
	DimO() []int
	DimI2() []int
	DimF2() []int
	DimO2() []int
	NOut() int
	// setters
	SetBpropOut(bpropOut backends.Tensor)
	SetDeltaStats(deltaStats backends.Tensor)
	// operational methods
	InitActivations(fpropOut backends.Tensor)
	InitDeltas(shared []backends.Tensor)
	InitWeights(loc float64, scale float64, shared backends.Tensor, zeros bool)
	InitDataZero()
	InitDataUniform(low float64, high float64)
	Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor
	Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor
	String() string
}

type LayerBase ¶

type LayerBase struct {
	// contains filtered or unexported fields
}

func (*LayerBase) Bprop ¶

func (n *LayerBase) Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor

func (*LayerBase) BpropRelu ¶

func (n *LayerBase) BpropRelu(bpropIn backends.Tensor) backends.Tensor

fprop relu happens inside of the conv and gemm kernels

func (*LayerBase) C ¶

func (n *LayerBase) C() int

func (*LayerBase) DimF ¶

func (n *LayerBase) DimF() []int

func (*LayerBase) DimF2 ¶

func (n *LayerBase) DimF2() []int

func (*LayerBase) DimI ¶

func (n *LayerBase) DimI() []int

func (*LayerBase) DimI2 ¶

func (n *LayerBase) DimI2() []int

func (*LayerBase) DimO ¶

func (n *LayerBase) DimO() []int

func (*LayerBase) DimO2 ¶

func (n *LayerBase) DimO2() []int

func (*LayerBase) Dtype ¶

func (n *LayerBase) Dtype() base.Dtype

func (*LayerBase) Flops ¶

func (n *LayerBase) Flops() float64

func (*LayerBase) Fprop ¶

func (n *LayerBase) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor

func (*LayerBase) GradDescent ¶

func (n *LayerBase) GradDescent()

func (*LayerBase) Init ¶

func (n *LayerBase) Init(
	self Layer,
	lib *CudaGenerator,
	dtype base.Dtype,
	N int,
	dtypeU base.Dtype)

func (*LayerBase) InitActivations ¶

func (n *LayerBase) InitActivations(fpropOut backends.Tensor)

func (*LayerBase) InitDataUniform ¶

func (n *LayerBase) InitDataUniform(low float64, high float64)

func (*LayerBase) InitDataZero ¶

func (n *LayerBase) InitDataZero()

func (*LayerBase) InitDeltas ¶

func (n *LayerBase) InitDeltas(shared []backends.Tensor)

func (*LayerBase) InitWeights ¶

func (n *LayerBase) InitWeights(loc float64, scale float64, shared backends.Tensor, zeros bool)

func (*LayerBase) K ¶

func (n *LayerBase) K() int

func (*LayerBase) M ¶

func (n *LayerBase) M() int

func (*LayerBase) N ¶

func (n *LayerBase) N() int

func (*LayerBase) NOut ¶

func (n *LayerBase) NOut() int

func (*LayerBase) P ¶

func (n *LayerBase) P() int

func (*LayerBase) Q ¶

func (n *LayerBase) Q() int

func (*LayerBase) ScaleWeights ¶

func (n *LayerBase) ScaleWeights(scale float64)

func (*LayerBase) SetBpropOut ¶

func (n *LayerBase) SetBpropOut(bpropOut backends.Tensor)

func (*LayerBase) SetDeltaStats ¶

func (n *LayerBase) SetDeltaStats(deltaStats backends.Tensor)

func (*LayerBase) SizeF ¶

func (n *LayerBase) SizeF() int

func (*LayerBase) SizeI ¶

func (n *LayerBase) SizeI() int

func (*LayerBase) SizeO ¶

func (n *LayerBase) SizeO() int

type LrnLayer ¶

type LrnLayer struct {
	PoolLayer
}

func NewLrnLayer ¶

func NewLrnLayer(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *backends.LrnParams) *LrnLayer

func (*LrnLayer) Init ¶

func (n *LrnLayer) Init(
	self Layer,
	lib *CudaGenerator,
	dtype base.Dtype,
	params *backends.LrnParams)

type PoolKernel ¶

type PoolKernel struct {
	// contains filtered or unexported fields
}

func NewPoolKernel ¶

func NewPoolKernel(
	name string,
	grid []int,
	block []int,
	args ...interface{}) *PoolKernel

type PoolLayer ¶

type PoolLayer struct {
	LayerBase
	// contains filtered or unexported fields
}

func NewPoolLayer ¶

func NewPoolLayer(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *backends.PoolParams) *PoolLayer

func (*PoolLayer) Bprop ¶

func (n *PoolLayer) Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor

func (*PoolLayer) C ¶

func (n *PoolLayer) C() int

func (*PoolLayer) DHW ¶

func (n *PoolLayer) DHW() []int

func (*PoolLayer) DimF2 ¶

func (n *PoolLayer) DimF2() []int

func (*PoolLayer) DimI ¶

func (n *PoolLayer) DimI() []int

func (*PoolLayer) DimI2 ¶

func (n *PoolLayer) DimI2() []int

func (*PoolLayer) DimO ¶

func (n *PoolLayer) DimO() []int

func (*PoolLayer) DimO2 ¶

func (n *PoolLayer) DimO2() []int

func (*PoolLayer) Fprop ¶

func (n *PoolLayer) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor

func (*PoolLayer) Init ¶

func (n *PoolLayer) Init(
	self Layer,
	lib *CudaGenerator,
	dtype base.Dtype,
	params *backends.PoolParams)

func (*PoolLayer) InitActivations ¶

func (n *PoolLayer) InitActivations(fpropOut backends.Tensor)

func (*PoolLayer) JTRS ¶

func (n *PoolLayer) JTRS() []int

func (*PoolLayer) K ¶

func (n *PoolLayer) K() int

func (*PoolLayer) M ¶

func (n *PoolLayer) M() int

func (*PoolLayer) MPQ ¶

func (n *PoolLayer) MPQ() []int

func (*PoolLayer) NOut ¶

func (n *PoolLayer) NOut() int

func (*PoolLayer) Op ¶

func (n *PoolLayer) Op() backends.PoolOp

func (*PoolLayer) P ¶

func (n *PoolLayer) P() int

func (*PoolLayer) Padding ¶

func (n *PoolLayer) Padding() []int

func (*PoolLayer) Q ¶

func (n *PoolLayer) Q() int

func (*PoolLayer) Strides ¶

func (n *PoolLayer) Strides() []int

func (*PoolLayer) String ¶

func (n *PoolLayer) String() string

type UpdateConvReduce ¶

type UpdateConvReduce struct {
	// contains filtered or unexported fields
}

fast axis=0 reduction kernel used for deterministic update

func NewUpdateConvReduce ¶

func NewUpdateConvReduce(lib *CudaGenerator, gridMpq int, crstk int) *UpdateConvReduce

func (*UpdateConvReduce) BindParams ¶

func (z *UpdateConvReduce) BindParams(
	u backends.Tensor,
	alpha float64,
	beta float64,
	noOp bool) acc.DeviceAllocation

func (*UpdateConvReduce) Execute ¶

func (z *UpdateConvReduce) Execute()

func (*UpdateConvReduce) Init ¶

func (z *UpdateConvReduce) Init(lib *CudaGenerator, gridMpq int, crstk int)

func (*UpdateConvReduce) Unbind ¶

func (z *UpdateConvReduce) Unbind()

ACHTUNG: Is this method necessary in generator scenario?

type UpdateCuda ¶

type UpdateCuda struct {
	KernelGroup
	// contains filtered or unexported fields
}

func NewUpdateCuda ¶

func NewUpdateCuda(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *ConvParams) *UpdateCuda

func (*UpdateCuda) BindParams ¶

func (g *UpdateCuda) BindParams(
	i backends.Tensor,
	e backends.Tensor,
	o backends.Tensor,
	alpha float64,
	beta float64,
	noOp bool)

func (*UpdateCuda) Execute ¶

func (g *UpdateCuda) Execute()

SKIPPED: Arguments 'repeat' and 'unbind'

func (*UpdateCuda) Init ¶

func (g *UpdateCuda) Init(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *ConvParams)

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL