cuda

package
v0.0.0-...-2fe1034 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 23, 2022 License: Apache-2.0 Imports: 11 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func BuildCompoundKernel

func BuildCompoundKernel(typeArgs []acc.TypeArg, computeCapability [2]int) (string, string)

func Flatten

func Flatten(lst ...interface{}) []int

flatten a nested list of lists or values only one level of nesting and int values supported here; this should be sufficient

Types

type BatchNormLayer

type BatchNormLayer struct {
	LayerBase
	// contains filtered or unexported fields
}

func NewBatchNormLayer

func NewBatchNormLayer(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *BatchNormParams) *BatchNormLayer

func (*BatchNormLayer) Bprop

func (n *BatchNormLayer) Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor

func (*BatchNormLayer) Bsum

func (n *BatchNormLayer) Bsum() bool

func (*BatchNormLayer) C

func (n *BatchNormLayer) C() int

func (*BatchNormLayer) Dim2

func (n *BatchNormLayer) Dim2() []int

func (*BatchNormLayer) DimI

func (n *BatchNormLayer) DimI() []int

func (*BatchNormLayer) DimO

func (n *BatchNormLayer) DimO() []int

func (*BatchNormLayer) DimO2

func (n *BatchNormLayer) DimO2() []int

func (*BatchNormLayer) Eps

func (n *BatchNormLayer) Eps() float64

func (*BatchNormLayer) Fprop

func (n *BatchNormLayer) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor

func (*BatchNormLayer) Init

func (n *BatchNormLayer) Init(
	self Layer,
	lib *CudaGenerator,
	dtype base.Dtype,
	params *BatchNormParams)

func (*BatchNormLayer) InitActivations

func (n *BatchNormLayer) InitActivations(fpropOut backends.Tensor)

func (*BatchNormLayer) InitDeltas

func (n *BatchNormLayer) InitDeltas(shared []backends.Tensor)

func (*BatchNormLayer) InitWeights

func (n *BatchNormLayer) InitWeights(
	loc float64, scale float64, shared backends.Tensor, zeros bool)

func (*BatchNormLayer) K

func (n *BatchNormLayer) K() int

func (*BatchNormLayer) M

func (n *BatchNormLayer) M() int

func (*BatchNormLayer) NOut

func (n *BatchNormLayer) NOut() int

func (*BatchNormLayer) P

func (n *BatchNormLayer) P() int

func (*BatchNormLayer) Q

func (n *BatchNormLayer) Q() int

func (*BatchNormLayer) RcpDepth

func (n *BatchNormLayer) RcpDepth() float64

func (*BatchNormLayer) Relu

func (n *BatchNormLayer) Relu() bool

func (*BatchNormLayer) Rho

func (n *BatchNormLayer) Rho() float64

func (*BatchNormLayer) String

func (n *BatchNormLayer) String() string

type BatchNormParams

type BatchNormParams struct {
	N    int
	C    int
	D    int
	H    int
	W    int
	NIn  int
	Rho  float64
	Eps  float64
	Relu bool
	Bsum bool
}

func (*BatchNormParams) Init

func (a *BatchNormParams) Init()

func (*BatchNormParams) Resolve

func (a *BatchNormParams) Resolve()

type BpropCuda

type BpropCuda struct {
	KernelGroup
	// contains filtered or unexported fields
}

func NewBpropCuda

func NewBpropCuda(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *ConvParams) *BpropCuda

func (*BpropCuda) BindParams

func (g *BpropCuda) BindParams(
	i backends.Tensor,
	f backends.Tensor,
	o backends.Tensor,
	x backends.Tensor,
	bias backends.Tensor,
	bsum backends.Tensor,
	alpha float64,
	beta float64,
	relu bool,
	brelu bool,
	slope float64)

func (*BpropCuda) Execute

func (g *BpropCuda) Execute()

SKIPPED: Arguments 'repeat' and 'unbind'

func (*BpropCuda) Init

func (g *BpropCuda) Init(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *ConvParams)

type CompoundOps

type CompoundOps struct {
	// contains filtered or unexported fields
}

for kernels that can't compound these ops internally, use an external kernel

func NewCompoundOps

func NewCompoundOps(lib *CudaGenerator, dtype base.Dtype, k int, n int) *CompoundOps

func (*CompoundOps) BindParams

func (z *CompoundOps) BindParams(
	o backends.Tensor,
	x backends.Tensor,
	bias backends.Tensor,
	bsum backends.Tensor,
	alpha float64,
	beta float64,
	relu bool,
	brelu bool) acc.DeviceAllocation

func (*CompoundOps) Execute

func (z *CompoundOps) Execute()

func (*CompoundOps) Init

func (z *CompoundOps) Init(lib *CudaGenerator, dtype base.Dtype, k int, n int)

func (*CompoundOps) Unbind

func (z *CompoundOps) Unbind()

ACHTUNG: Is this method necessary in generator scenario?

type ConvBpropKernels

type ConvBpropKernels interface {
	BindParams(
		i backends.Tensor,
		f backends.Tensor,
		o backends.Tensor,
		x backends.Tensor,
		bias backends.Tensor,
		bsum backends.Tensor,
		alpha float64,
		beta float64,
		relu bool,
		brelu bool,
		slope float64)
	Execute()
}

type ConvFpropKernels

type ConvFpropKernels interface {
	BindParams(
		i backends.Tensor,
		f backends.Tensor,
		o backends.Tensor,
		x backends.Tensor,
		bias backends.Tensor,
		bsum backends.Tensor,
		alpha float64,
		beta float64,
		relu bool,
		brelu bool,
		slope float64)
	Execute()
}

type ConvKernelBuilder

type ConvKernelBuilder interface {
	BuildConvKernels(dtype base.Dtype, a *ConvParams) (
		ConvFpropKernels, ConvBpropKernels, ConvUpdateKernels)
}

type ConvLayer

type ConvLayer struct {
	ConvLayerBase
}

func NewConvLayer

func NewConvLayer(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *backends.ConvParams,
	kernelBuilder ConvKernelBuilder) *ConvLayer

func (*ConvLayer) Init

func (n *ConvLayer) Init(
	self Layer,
	lib *CudaGenerator,
	dtype base.Dtype,
	params *backends.ConvParams,
	kernelBuilder ConvKernelBuilder)

func (*ConvLayer) String

func (n *ConvLayer) String() string

type ConvLayerBase

type ConvLayerBase struct {
	LayerBase
	// contains filtered or unexported fields
}

func (*ConvLayerBase) Bprop

func (n *ConvLayerBase) Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor

func (*ConvLayerBase) C

func (n *ConvLayerBase) C() int

func (*ConvLayerBase) DHW

func (n *ConvLayerBase) DHW() []int

func (*ConvLayerBase) DimF

func (n *ConvLayerBase) DimF() []int

func (*ConvLayerBase) DimF2

func (n *ConvLayerBase) DimF2() []int

func (*ConvLayerBase) DimF2t

func (n *ConvLayerBase) DimF2t() []int

func (*ConvLayerBase) DimFb

func (n *ConvLayerBase) DimFb() []int

func (*ConvLayerBase) DimI

func (n *ConvLayerBase) DimI() []int

func (*ConvLayerBase) DimI2

func (n *ConvLayerBase) DimI2() []int

func (*ConvLayerBase) DimO

func (n *ConvLayerBase) DimO() []int

func (*ConvLayerBase) DimO2

func (n *ConvLayerBase) DimO2() []int

func (*ConvLayerBase) DimS

func (n *ConvLayerBase) DimS() []int

func (*ConvLayerBase) Fprop

func (n *ConvLayerBase) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor

func (*ConvLayerBase) Init

func (n *ConvLayerBase) Init(
	self Layer,
	lib *CudaGenerator,
	dtype base.Dtype,
	params *ConvParams,
	kernelBuilder ConvKernelBuilder)

func (*ConvLayerBase) InitActivations

func (n *ConvLayerBase) InitActivations(fpropOut backends.Tensor)

func (*ConvLayerBase) K

func (n *ConvLayerBase) K() int

func (*ConvLayerBase) M

func (n *ConvLayerBase) M() int

func (*ConvLayerBase) MPQ

func (n *ConvLayerBase) MPQ() []int

func (*ConvLayerBase) NCK

func (n *ConvLayerBase) NCK() []int

func (*ConvLayerBase) NOut

func (n *ConvLayerBase) NOut() int

func (*ConvLayerBase) P

func (n *ConvLayerBase) P() int

func (*ConvLayerBase) Padding

func (n *ConvLayerBase) Padding() []int

func (*ConvLayerBase) Q

func (n *ConvLayerBase) Q() int

func (*ConvLayerBase) SetBsum

func (n *ConvLayerBase) SetBsum(bsum bool)

func (*ConvLayerBase) SetRelu

func (n *ConvLayerBase) SetRelu(relu bool)

func (*ConvLayerBase) Strides

func (n *ConvLayerBase) Strides() []int

func (*ConvLayerBase) TRS

func (n *ConvLayerBase) TRS() []int

type ConvParams

type ConvParams struct {
	N    int
	C    int
	K    int
	D    int
	H    int
	W    int
	T    int
	R    int
	S    int
	M    int
	P    int
	Q    int
	PadD int
	PadH int
	PadW int
	StrD int
	StrH int
	StrW int
	DilD int
	DilH int
	DilW int
}

func (*ConvParams) InitConv

func (a *ConvParams) InitConv(params *backends.ConvParams)

func (*ConvParams) InitDeconv

func (a *ConvParams) InitDeconv(params *backends.DeconvParams)

type ConvUpdateKernels

type ConvUpdateKernels interface {
	BindParams(
		i backends.Tensor,
		e backends.Tensor,
		o backends.Tensor,
		alpha float64,
		beta float64,
		noOp bool)
	Execute()
}

type CudaDeviceAllocation

type CudaDeviceAllocation struct {
	// contains filtered or unexported fields
}

func NewCudaDeviceAllocation

func NewCudaDeviceAllocation(index int, nbytes int, offset int) *CudaDeviceAllocation

func (*CudaDeviceAllocation) Add

type CudaGenerator

type CudaGenerator struct {
	acc.AccGeneratorBase
	// contains filtered or unexported fields
}

func NewCudaGenerator

func NewCudaGenerator(
	rngSeed int,
	defaultDtype base.Dtype,
	stochasticRound int,
	deviceId int,
	computeCapability [2]int,
	bench bool,
	scratchSize int,
	histBins int,
	histOffset int,
	compatMode backends.CompatMode) *CudaGenerator

func (*CudaGenerator) Assign

func (b *CudaGenerator) Assign(out *acc.AccTensor, value backends.Value) backends.Value

func (*CudaGenerator) Binarize

func (b *CudaGenerator) Binarize(
	ary backends.Tensor, out backends.Tensor, stochastic bool) backends.Tensor

func (*CudaGenerator) BpropConv

func (b *CudaGenerator) BpropConv(
	layer backends.ConvLayerBase,
	f backends.Tensor,
	e backends.Tensor,
	gradI backends.Tensor,
	x backends.Tensor,
	bias backends.Tensor,
	bsum backends.Tensor,
	alpha float64,
	beta float64,
	relu bool,
	brelu bool,
	slope float64)

SKIPPED: repeat (gpu only), layerOp

func (*CudaGenerator) BpropLrn

func (b *CudaGenerator) BpropLrn(
	layer backends.LrnLayer,
	i backends.Tensor,
	o backends.Tensor,
	e backends.Tensor,
	delta backends.Tensor,
	denom backends.Tensor,
	alpha float64,
	beta float64,
	ascale float64,
	bpower float64)

SKIPPED: Support for 'repeat'

func (*CudaGenerator) BpropPool

func (b *CudaGenerator) BpropPool(
	layer backends.PoolLayer,
	i backends.Tensor,
	o backends.Tensor,
	argmax backends.Tensor,
	alpha float64,
	beta float64)

SKIPPED: repeat (gpu only)

func (*CudaGenerator) BpropRelu

func (b *CudaGenerator) BpropRelu(
	layer backends.Layer,
	x backends.Tensor,
	errors backends.Tensor,
	deltas backends.Tensor,
	slope float64) backends.Value

func (*CudaGenerator) BpropSkipNode

func (b *CudaGenerator) BpropSkipNode(
	errors backends.Tensor, deltas backends.Tensor, alpha float64, beta float64)

func (*CudaGenerator) BpropTransform

func (b *CudaGenerator) BpropTransform(
	nglayer backends.Layer,
	transform backends.Transform,
	outputs backends.Tensor,
	errors backends.Tensor,
	deltas backends.Tensor,
	relu bool)

func (*CudaGenerator) BuildConvKernels

func (*CudaGenerator) BuildProlog

func (b *CudaGenerator) BuildProlog()

func (*CudaGenerator) CompoundBpropBn

func (b *CudaGenerator) CompoundBpropBn(
	deltaOut backends.Tensor,
	gradGamma backends.Tensor,
	gradBeta backends.Tensor,
	deltaIn backends.Tensor,
	x backends.Tensor,
	xsum backends.Tensor,
	xvar backends.Tensor,
	gamma backends.Tensor,
	eps float64,
	binary bool,
	layer backends.BatchNormLayer)

SKIPPED: threads, repeat (both gpu only)

func (*CudaGenerator) CompoundDot

func (b *CudaGenerator) CompoundDot(
	x backends.Tensor,
	y backends.Tensor,
	z backends.Tensor,
	alpha float64,
	beta float64,
	relu bool,
	bsum backends.Tensor) backends.Tensor

func (*CudaGenerator) CompoundDotCuda

func (b *CudaGenerator) CompoundDotCuda(
	x *acc.AccTensor,
	y *acc.AccTensor,
	z *acc.AccTensor,
	alpha float64,
	beta float64,
	relu bool,
	bsum *acc.AccTensor,
	repeat int,
	size []int) *acc.AccTensor

func (*CudaGenerator) CompoundFpropBn

func (b *CudaGenerator) CompoundFpropBn(
	x backends.Tensor,
	xsum backends.Tensor,
	xvar backends.Tensor,
	gmean backends.Tensor,
	gvar backends.Tensor,
	gamma backends.Tensor,
	beta backends.Tensor,
	y backends.Tensor,
	eps float64,
	rho float64,
	computeBatchSum bool,
	accumbeta float64,
	relu bool,
	binary bool,
	inference bool,
	outputs backends.Tensor,
	layer backends.BatchNormLayer)

SKIPPED: threads, repeat (both gpu only)

func (*CudaGenerator) CompoundKernel

func (b *CudaGenerator) CompoundKernel(args []backends.Value) backends.Value

func (*CudaGenerator) ConfigureCodeOutput

func (b *CudaGenerator) ConfigureCodeOutput(
	buildMainCpp bool,
	filePrefix string,
	hostNamespace string,
	kernelPrefix string)

func (*CudaGenerator) FormatBufferRef

func (b *CudaGenerator) FormatBufferRef(tensor backends.Tensor, paren bool) string

func (*CudaGenerator) FpropConv

func (b *CudaGenerator) FpropConv(
	layer backends.ConvLayerBase,
	i backends.Tensor,
	f backends.Tensor,
	o backends.Tensor,
	x backends.Tensor,
	bias backends.Tensor,
	bsum backends.Tensor,
	alpha float64,
	beta float64,
	relu bool,
	brelu bool,
	slope float64)

SKIPPED: repeat (gpu only), layerOp

func (*CudaGenerator) FpropLrn

func (b *CudaGenerator) FpropLrn(
	layer backends.LrnLayer,
	i backends.Tensor,
	o backends.Tensor,
	denom backends.Tensor,
	alpha float64,
	beta float64,
	ascale float64,
	bpower float64)

SKIPPED: Support for 'repeat'

func (*CudaGenerator) FpropPool

func (b *CudaGenerator) FpropPool(
	layer backends.PoolLayer,
	i backends.Tensor,
	o backends.Tensor,
	argmax backends.Tensor,
	alpha float64,
	beta float64)

SKIPPED: Support for 'repeat'

func (*CudaGenerator) FpropRelu

func (b *CudaGenerator) FpropRelu(
	layer backends.Layer,
	x backends.Tensor,
	slope float64) backends.Value

func (*CudaGenerator) FpropSkipNode

func (b *CudaGenerator) FpropSkipNode(x backends.Tensor, y backends.Tensor, beta float64)

func (*CudaGenerator) FpropSoftmax

func (b *CudaGenerator) FpropSoftmax(x backends.Value, axis int) backends.Value

func (*CudaGenerator) FpropTransform

func (b *CudaGenerator) FpropTransform(
	nglayer backends.Layer,
	transform backends.Transform,
	inputs backends.Tensor,
	outputs backends.Tensor,
	relu bool)

func (*CudaGenerator) GetData

func (b *CudaGenerator) GetData(
	dest string, start string, stop string, x backends.Tensor) string

func (*CudaGenerator) GetFloat

func (b *CudaGenerator) GetFloat(src acc.DeviceAllocation, size int) string

func (*CudaGenerator) GetInt

func (b *CudaGenerator) GetInt(src acc.DeviceAllocation, size int) string

func (*CudaGenerator) GetMetricSum

func (b *CudaGenerator) GetMetricSum(x backends.Tensor, start string, stop string) string

func (*CudaGenerator) GetSmCount

func (b *CudaGenerator) GetSmCount() int

func (*CudaGenerator) Init

func (b *CudaGenerator) Init(
	self generators.Generator,
	rngSeed int,
	defaultDtype base.Dtype,
	stochasticRound int,
	deviceId int,
	computeCapability [2]int,
	bench bool,
	scratchSize int,
	histBins int,
	histOffset int,
	compatMode backends.CompatMode)

func (*CudaGenerator) LookupKernel

func (b *CudaGenerator) LookupKernel(key string) *Kernel

func (*CudaGenerator) MakeBinaryMask

func (b *CudaGenerator) MakeBinaryMask(out backends.Tensor, keepThresh float64)

func (*CudaGenerator) MapStringToFunc

func (b *CudaGenerator) MapStringToFunc(funcname string, dtype base.Dtype) *Kernel

func (*CudaGenerator) MemAlloc

func (b *CudaGenerator) MemAlloc(nbytes int) acc.DeviceAllocation

func (*CudaGenerator) MemcpyDtodAsync

func (b *CudaGenerator) MemcpyDtodAsync(
	dest acc.DeviceAllocation, src acc.DeviceAllocation, size int)

func (*CudaGenerator) MemsetD16Async

func (b *CudaGenerator) MemsetD16Async(dest acc.DeviceAllocation, data uint16, count int)

func (*CudaGenerator) MemsetD32Async

func (b *CudaGenerator) MemsetD32Async(dest acc.DeviceAllocation, data uint32, count int)

func (*CudaGenerator) MemsetD8Async

func (b *CudaGenerator) MemsetD8Async(dest acc.DeviceAllocation, data uint8, count int)

func (*CudaGenerator) NewBatchNormLayer

func (b *CudaGenerator) NewBatchNormLayer(inShape []int) backends.BatchNormLayer

func (*CudaGenerator) NewConvLayer

func (b *CudaGenerator) NewConvLayer(
	dtype base.Dtype, params *backends.ConvParams) backends.ConvLayer

func (*CudaGenerator) NewDeconvLayer

func (b *CudaGenerator) NewDeconvLayer(
	dtype base.Dtype, params *backends.DeconvParams) backends.DeconvLayer

func (*CudaGenerator) NewLrnLayer

func (b *CudaGenerator) NewLrnLayer(
	dtype base.Dtype, params *backends.LrnParams) backends.LrnLayer

func (*CudaGenerator) NewPoolLayer

func (b *CudaGenerator) NewPoolLayer(
	dtype base.Dtype, params *backends.PoolParams) backends.PoolLayer

func (*CudaGenerator) NewReluLayer

func (b *CudaGenerator) NewReluLayer() backends.Layer

func (*CudaGenerator) OutputCode

func (b *CudaGenerator) OutputCode(outDir string) error

func (*CudaGenerator) RegisterKernel

func (b *CudaGenerator) RegisterKernel(key string, kernel *Kernel)

func (*CudaGenerator) RngNormal

func (b *CudaGenerator) RngNormal(out backends.Tensor, loc float64, scale float64, size []int)

func (*CudaGenerator) RngUniform

func (b *CudaGenerator) RngUniform(out backends.Tensor, low float64, high float64, size []int)

func (*CudaGenerator) ScratchBufferInit

func (b *CudaGenerator) ScratchBufferInit()

func (*CudaGenerator) ScratchBufferOffset

func (b *CudaGenerator) ScratchBufferOffset(size int) acc.DeviceAllocation

func (*CudaGenerator) ScratchBufferReset

func (b *CudaGenerator) ScratchBufferReset()

func (*CudaGenerator) SetScratchSize

func (b *CudaGenerator) SetScratchSize(args ...int)

func (*CudaGenerator) SetSmCount

func (b *CudaGenerator) SetSmCount(smCount int)

func (*CudaGenerator) UpdateConv

func (b *CudaGenerator) UpdateConv(
	layer backends.ConvLayerBase,
	i backends.Tensor,
	e backends.Tensor,
	gradF backends.Tensor,
	alpha float64,
	beta float64,
	gradBias backends.Tensor)

SKIPPED: repeat (gpu only), layerOp

type DataLayer

type DataLayer struct {
	LayerBase
	// contains filtered or unexported fields
}

func NewDataLayer

func NewDataLayer(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *DataParams) *DataLayer

func (*DataLayer) C

func (n *DataLayer) C() int

func (*DataLayer) DHW

func (n *DataLayer) DHW() []int

func (*DataLayer) DimI

func (n *DataLayer) DimI() []int

func (*DataLayer) DimI2

func (n *DataLayer) DimI2() []int

func (*DataLayer) DimO

func (n *DataLayer) DimO() []int

func (*DataLayer) DimO2

func (n *DataLayer) DimO2() []int

func (*DataLayer) Fprop

func (n *DataLayer) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor

func (*DataLayer) Init

func (n *DataLayer) Init(
	self Layer,
	lib *CudaGenerator,
	dtype base.Dtype,
	params *DataParams)

func (*DataLayer) InitDataUniform

func (n *DataLayer) InitDataUniform(low float64, high float64)

func (*DataLayer) InitDataZero

func (n *DataLayer) InitDataZero()

func (*DataLayer) InitDeltas

func (n *DataLayer) InitDeltas(shared []backends.Tensor)

func (*DataLayer) InitWeights

func (n *DataLayer) InitWeights(loc float64, scale float64, shared backends.Tensor, zeros bool)

func (*DataLayer) K

func (n *DataLayer) K() int

func (*DataLayer) M

func (n *DataLayer) M() int

func (*DataLayer) P

func (n *DataLayer) P() int

func (*DataLayer) Q

func (n *DataLayer) Q() int

func (*DataLayer) String

func (n *DataLayer) String() string

type DataParams

type DataParams struct {
	N int
	C int
	D int
	H int
	W int
}

func (*DataParams) Init

func (a *DataParams) Init()

func (*DataParams) Resolve

func (a *DataParams) Resolve()

type DeconvLayer

type DeconvLayer struct {
	ConvLayerBase
}

func NewDeconvLayer

func NewDeconvLayer(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *backends.DeconvParams,
	kernelBuilder ConvKernelBuilder) *DeconvLayer

func (*DeconvLayer) Init

func (n *DeconvLayer) Init(
	self Layer,
	lib *CudaGenerator,
	dtype base.Dtype,
	params *backends.DeconvParams,
	kernelBuilder ConvKernelBuilder)

func (*DeconvLayer) String

func (n *DeconvLayer) String() string

type FilterDimShuffle

type FilterDimShuffle struct {
	// contains filtered or unexported fields
}

func NewFilterDimShuffle

func NewFilterDimShuffle(
	lib *CudaGenerator,
	dtype base.Dtype,
	c int,
	t int,
	r int,
	s int,
	k int) *FilterDimShuffle

func (*FilterDimShuffle) BindParams

func (*FilterDimShuffle) Execute

func (z *FilterDimShuffle) Execute()

func (*FilterDimShuffle) Init

func (z *FilterDimShuffle) Init(
	lib *CudaGenerator,
	dtype base.Dtype,
	c int,
	t int,
	r int,
	s int,
	k int)

func (*FilterDimShuffle) Unbind

func (z *FilterDimShuffle) Unbind()

ACHTUNG: Is this method necessary in generator scenario?

type FpropCuda

type FpropCuda struct {
	KernelGroup
	// contains filtered or unexported fields
}

func NewFpropCuda

func NewFpropCuda(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *ConvParams) *FpropCuda

func (*FpropCuda) BindParams

func (g *FpropCuda) BindParams(
	i backends.Tensor,
	f backends.Tensor,
	o backends.Tensor,
	x backends.Tensor,
	bias backends.Tensor,
	bsum backends.Tensor,
	alpha float64,
	beta float64,
	relu bool,
	brelu bool,
	slope float64)

func (*FpropCuda) Execute

func (g *FpropCuda) Execute()

SKIPPED: Arguments 'repeat' and 'unbind'

func (*FpropCuda) Init

func (g *FpropCuda) Init(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *ConvParams)

type FullLayer

type FullLayer struct {
	LayerBase
	// contains filtered or unexported fields
}

func NewFullLayer

func NewFullLayer(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *FullParams) *FullLayer

func (*FullLayer) Bprop

func (n *FullLayer) Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor

func (*FullLayer) DimF

func (n *FullLayer) DimF() []int

func (*FullLayer) DimF2

func (n *FullLayer) DimF2() []int

func (*FullLayer) DimI

func (n *FullLayer) DimI() []int

func (*FullLayer) DimI2

func (n *FullLayer) DimI2() []int

func (*FullLayer) DimO

func (n *FullLayer) DimO() []int

func (*FullLayer) DimO2

func (n *FullLayer) DimO2() []int

func (*FullLayer) Fprop

func (n *FullLayer) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor

func (*FullLayer) Init

func (n *FullLayer) Init(
	self Layer,
	lib *CudaGenerator,
	dtype base.Dtype,
	params *FullParams)

func (*FullLayer) NIn

func (n *FullLayer) NIn() int

func (*FullLayer) NOut

func (n *FullLayer) NOut() int

func (*FullLayer) Relu

func (n *FullLayer) Relu() bool

func (*FullLayer) String

func (n *FullLayer) String() string

type FullParams

type FullParams struct {
	N    int
	NIn  int
	NOut int
	Relu bool
}

func (*FullParams) Init

func (a *FullParams) Init()

func (*FullParams) Resolve

func (a *FullParams) Resolve()

type InceptionLayer

type InceptionLayer struct {
	LayerBase
	// contains filtered or unexported fields
}

func NewInceptionLayer

func NewInceptionLayer(
	lib *CudaGenerator,
	dtype base.Dtype,
	partitions [][]Layer,
	params *InceptionParams) *InceptionLayer

func (*InceptionLayer) Bprop

func (n *InceptionLayer) Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor

func (*InceptionLayer) C

func (n *InceptionLayer) C() int

func (*InceptionLayer) DHW

func (n *InceptionLayer) DHW() []int

func (*InceptionLayer) DimF

func (n *InceptionLayer) DimF() []int

func (*InceptionLayer) DimI

func (n *InceptionLayer) DimI() []int

func (*InceptionLayer) DimI2

func (n *InceptionLayer) DimI2() []int

func (*InceptionLayer) DimO

func (n *InceptionLayer) DimO() []int

func (*InceptionLayer) DimO2

func (n *InceptionLayer) DimO2() []int

func (*InceptionLayer) Fprop

func (n *InceptionLayer) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor

func (*InceptionLayer) Init

func (n *InceptionLayer) Init(
	self Layer,
	lib *CudaGenerator,
	dtype base.Dtype,
	partitions [][]Layer,
	params *InceptionParams)

func (*InceptionLayer) InitActivations

func (n *InceptionLayer) InitActivations(fpropOut backends.Tensor)

func (*InceptionLayer) InitDeltas

func (n *InceptionLayer) InitDeltas(shared []backends.Tensor)

func (*InceptionLayer) InitWeights

func (n *InceptionLayer) InitWeights(
	loc float64, scale float64, shared backends.Tensor, zeros bool)

func (*InceptionLayer) K

func (n *InceptionLayer) K() int

func (*InceptionLayer) M

func (n *InceptionLayer) M() int

func (*InceptionLayer) MPQ

func (n *InceptionLayer) MPQ() []int

func (*InceptionLayer) NCK

func (n *InceptionLayer) NCK() []int

func (*InceptionLayer) NOut

func (n *InceptionLayer) NOut() int

func (*InceptionLayer) P

func (n *InceptionLayer) P() int

func (*InceptionLayer) Partitions

func (n *InceptionLayer) Partitions() [][]Layer

func (*InceptionLayer) Q

func (n *InceptionLayer) Q() int

func (*InceptionLayer) String

func (n *InceptionLayer) String() string

type InceptionParams

type InceptionParams struct {
	N int
	C int
	K int
	D int
	H int
	W int
	M int
	P int
	Q int
}

func (*InceptionParams) Init

func (a *InceptionParams) Init()

func (*InceptionParams) Resolve

func (a *InceptionParams) Resolve()

type Kernel

type Kernel struct {
	// contains filtered or unexported fields
}

func NewKernel

func NewKernel(lib *CudaGenerator, name string, code string) *Kernel

func (*Kernel) Launch

func (k *Kernel) Launch(
	grid []int,
	block []int,
	shared int,
	launchArgs []acc.KernelArgument,
	staticArgs []int)

type KernelGroup

type KernelGroup struct {
	// contains filtered or unexported fields
}

func (*KernelGroup) GetKernel

func (g *KernelGroup) GetKernel(
	dtype base.Dtype,
	filterSize int,
	bsum bool,
	operation string) *Kernel

func (*KernelGroup) Init

func (g *KernelGroup) Init(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *ConvParams)

type Layer

type Layer interface {
	Dtype() base.Dtype
	N() int
	Flops() float64
	SizeI() int
	SizeF() int
	SizeO() int
	// optional properties
	C() int
	K() int
	M() int
	P() int
	Q() int
	DimI() []int
	DimF() []int
	DimO() []int
	DimI2() []int
	DimF2() []int
	DimO2() []int
	NOut() int
	// setters
	SetBpropOut(bpropOut backends.Tensor)
	SetDeltaStats(deltaStats backends.Tensor)
	// operational methods
	InitActivations(fpropOut backends.Tensor)
	InitDeltas(shared []backends.Tensor)
	InitWeights(loc float64, scale float64, shared backends.Tensor, zeros bool)
	InitDataZero()
	InitDataUniform(low float64, high float64)
	Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor
	Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor
	String() string
}

type LayerBase

type LayerBase struct {
	// contains filtered or unexported fields
}

func (*LayerBase) Bprop

func (n *LayerBase) Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor

func (*LayerBase) BpropRelu

func (n *LayerBase) BpropRelu(bpropIn backends.Tensor) backends.Tensor

fprop relu happens inside of the conv and gemm kernels

func (*LayerBase) C

func (n *LayerBase) C() int

func (*LayerBase) DimF

func (n *LayerBase) DimF() []int

func (*LayerBase) DimF2

func (n *LayerBase) DimF2() []int

func (*LayerBase) DimI

func (n *LayerBase) DimI() []int

func (*LayerBase) DimI2

func (n *LayerBase) DimI2() []int

func (*LayerBase) DimO

func (n *LayerBase) DimO() []int

func (*LayerBase) DimO2

func (n *LayerBase) DimO2() []int

func (*LayerBase) Dtype

func (n *LayerBase) Dtype() base.Dtype

func (*LayerBase) Flops

func (n *LayerBase) Flops() float64

func (*LayerBase) Fprop

func (n *LayerBase) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor

func (*LayerBase) GradDescent

func (n *LayerBase) GradDescent()

func (*LayerBase) Init

func (n *LayerBase) Init(
	self Layer,
	lib *CudaGenerator,
	dtype base.Dtype,
	N int,
	dtypeU base.Dtype)

func (*LayerBase) InitActivations

func (n *LayerBase) InitActivations(fpropOut backends.Tensor)

func (*LayerBase) InitDataUniform

func (n *LayerBase) InitDataUniform(low float64, high float64)

func (*LayerBase) InitDataZero

func (n *LayerBase) InitDataZero()

func (*LayerBase) InitDeltas

func (n *LayerBase) InitDeltas(shared []backends.Tensor)

func (*LayerBase) InitWeights

func (n *LayerBase) InitWeights(loc float64, scale float64, shared backends.Tensor, zeros bool)

func (*LayerBase) K

func (n *LayerBase) K() int

func (*LayerBase) M

func (n *LayerBase) M() int

func (*LayerBase) N

func (n *LayerBase) N() int

func (*LayerBase) NOut

func (n *LayerBase) NOut() int

func (*LayerBase) P

func (n *LayerBase) P() int

func (*LayerBase) Q

func (n *LayerBase) Q() int

func (*LayerBase) ScaleWeights

func (n *LayerBase) ScaleWeights(scale float64)

func (*LayerBase) SetBpropOut

func (n *LayerBase) SetBpropOut(bpropOut backends.Tensor)

func (*LayerBase) SetDeltaStats

func (n *LayerBase) SetDeltaStats(deltaStats backends.Tensor)

func (*LayerBase) SizeF

func (n *LayerBase) SizeF() int

func (*LayerBase) SizeI

func (n *LayerBase) SizeI() int

func (*LayerBase) SizeO

func (n *LayerBase) SizeO() int

type LrnLayer

type LrnLayer struct {
	PoolLayer
}

func NewLrnLayer

func NewLrnLayer(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *backends.LrnParams) *LrnLayer

func (*LrnLayer) Init

func (n *LrnLayer) Init(
	self Layer,
	lib *CudaGenerator,
	dtype base.Dtype,
	params *backends.LrnParams)

type PoolKernel

type PoolKernel struct {
	// contains filtered or unexported fields
}

func NewPoolKernel

func NewPoolKernel(
	name string,
	grid []int,
	block []int,
	args ...interface{}) *PoolKernel

type PoolLayer

type PoolLayer struct {
	LayerBase
	// contains filtered or unexported fields
}

func NewPoolLayer

func NewPoolLayer(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *backends.PoolParams) *PoolLayer

func (*PoolLayer) Bprop

func (n *PoolLayer) Bprop(bpropIn backends.Tensor, beta float64) backends.Tensor

func (*PoolLayer) C

func (n *PoolLayer) C() int

func (*PoolLayer) DHW

func (n *PoolLayer) DHW() []int

func (*PoolLayer) DimF2

func (n *PoolLayer) DimF2() []int

func (*PoolLayer) DimI

func (n *PoolLayer) DimI() []int

func (*PoolLayer) DimI2

func (n *PoolLayer) DimI2() []int

func (*PoolLayer) DimO

func (n *PoolLayer) DimO() []int

func (*PoolLayer) DimO2

func (n *PoolLayer) DimO2() []int

func (*PoolLayer) Fprop

func (n *PoolLayer) Fprop(fpropIn backends.Tensor, scaleWeights float64) backends.Tensor

func (*PoolLayer) Init

func (n *PoolLayer) Init(
	self Layer,
	lib *CudaGenerator,
	dtype base.Dtype,
	params *backends.PoolParams)

func (*PoolLayer) InitActivations

func (n *PoolLayer) InitActivations(fpropOut backends.Tensor)

func (*PoolLayer) JTRS

func (n *PoolLayer) JTRS() []int

func (*PoolLayer) K

func (n *PoolLayer) K() int

func (*PoolLayer) M

func (n *PoolLayer) M() int

func (*PoolLayer) MPQ

func (n *PoolLayer) MPQ() []int

func (*PoolLayer) NOut

func (n *PoolLayer) NOut() int

func (*PoolLayer) Op

func (n *PoolLayer) Op() backends.PoolOp

func (*PoolLayer) P

func (n *PoolLayer) P() int

func (*PoolLayer) Padding

func (n *PoolLayer) Padding() []int

func (*PoolLayer) Q

func (n *PoolLayer) Q() int

func (*PoolLayer) Strides

func (n *PoolLayer) Strides() []int

func (*PoolLayer) String

func (n *PoolLayer) String() string

type UpdateConvReduce

type UpdateConvReduce struct {
	// contains filtered or unexported fields
}

fast axis=0 reduction kernel used for deterministic update

func NewUpdateConvReduce

func NewUpdateConvReduce(lib *CudaGenerator, gridMpq int, crstk int) *UpdateConvReduce

func (*UpdateConvReduce) BindParams

func (z *UpdateConvReduce) BindParams(
	u backends.Tensor,
	alpha float64,
	beta float64,
	noOp bool) acc.DeviceAllocation

func (*UpdateConvReduce) Execute

func (z *UpdateConvReduce) Execute()

func (*UpdateConvReduce) Init

func (z *UpdateConvReduce) Init(lib *CudaGenerator, gridMpq int, crstk int)

func (*UpdateConvReduce) Unbind

func (z *UpdateConvReduce) Unbind()

ACHTUNG: Is this method necessary in generator scenario?

type UpdateCuda

type UpdateCuda struct {
	KernelGroup
	// contains filtered or unexported fields
}

func NewUpdateCuda

func NewUpdateCuda(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *ConvParams) *UpdateCuda

func (*UpdateCuda) BindParams

func (g *UpdateCuda) BindParams(
	i backends.Tensor,
	e backends.Tensor,
	o backends.Tensor,
	alpha float64,
	beta float64,
	noOp bool)

func (*UpdateCuda) Execute

func (g *UpdateCuda) Execute()

SKIPPED: Arguments 'repeat' and 'unbind'

func (*UpdateCuda) Init

func (g *UpdateCuda) Init(
	lib *CudaGenerator,
	dtype base.Dtype,
	params *ConvParams)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL