cublas

package

v0.9.4 Latest Latest Go to latest Published: Aug 2, 2021 License: Apache-2.0 Imports: 6 Imported by: 6

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/gorgonia/cu

Links

Open Source Insights

README ¶

cublas

Package cublas implements a Go API for CUDA's cuBLAS. It matches the gonum/blas interface.

How To Use

To install: go get -u gorgonia.org/cu

The CUDA Toolkit 8.0 is required. LDFlags and CFlags may not be quite accurate. File an issue if you find one, thank you.

Bear in mind that cublas only supports FORTRAN ordered matrices. Most Go matrices are created with the C ordering (gonum/matrix, gorgonia/tensor), therefore care must be applied.

For example, here's how to use Dgemm:

func main() {
	dev := cu.Device(0)
	ctx, err := dev.MakeContext(cu.SchedAuto)
	if err != nil {
		log.Fatal(err)
	}
	defer cu.DestroyContext(&ctx)

	dt := tensor.Float64
	s0 := tensor.Shape{5, 10}
	s1 := tensor.Shape{10, 12}
	s2 := tensor.Shape{5, 12}

	memsize0 := calcMemsize(dt, s0)
	mem0, err := cu.MemAllocManaged(memsize0, cu.AttachGlobal)
	if err != nil {
		log.Fatal(err)
	}
	mat0 := tensor.New(tensor.Of(dt), tensor.WithShape(s0...), tensor.FromMemory(uintptr(mem0), uintptr(memsize0)))
	d0 := mat0.Data().([]float64)
	for i := range d0 {
		d0[i] = float64(i + 1)
	}
	fmt.Printf("A: \n%#v\n", mat0)

	memsize1 := calcMemsize(dt, s1)
	mem1, err := cu.MemAllocManaged(memsize1, cu.AttachGlobal)
	if err != nil {
		log.Fatal(err)
	}
	mat1 := tensor.New(tensor.Of(dt), tensor.WithShape(s1...), tensor.FromMemory(uintptr(mem1), uintptr(memsize1)))
	d1 := mat1.Data().([]float64)
	for i := range d1 {
		d1[i] = float64(i + 1)
	}
	fmt.Printf("B: \n%#v\n", mat1)

	memsize2 := calcMemsize(dt, s2)
	mem2, err := cu.MemAllocManaged(memsize2, cu.AttachGlobal)
	if err != nil {
		log.Fatal(err)
	}
	mat2 := tensor.New(tensor.Of(dt), tensor.WithShape(s2...), tensor.FromMemory(uintptr(mem2), uintptr(memsize2)))
	d2 := mat2.Data().([]float64)
	fmt.Printf("C: \n%#v\n", mat2)

	impl := cublas.NewImplementation()

	m := s0[0]
	k := s0[1]
	n := s1[1]
	lda := mat0.Strides()[0]
	ldb := mat1.Strides()[0]
	ldc := mat2.Strides()[0]
	alpha := 1.0
	beta := 0.0
	impl.Dgemm(blas.NoTrans, blas.NoTrans, n, m, k, alpha, d1, ldn, d0, lda, beta, d2, ldc)
	if err := cu.Synchronize(); err != nil {
		log.Fatal(err)
	}
	fmt.Printf("C: \n%#v\n", mat2)
	cu.MemFree(mem0)
	cu.MemFree(mem1)
	cu.MemFree(mem2)
}

These are things to note: To do a A×B, you need to essentially do Bᵀ×Aᵀ.

How This Package Is Developed

The majority of the CUDA interface was generated with the cublasgen program. The cublasgen program was adapted from the cgo generator from the gonum/blas package.

The cudagen.h file was generated based off the propietary header from nvidia, then further edited (several variable names were renamed) to match the cblas interface in order to quickly generate the API.

Documentation ¶

Overview ¶

Example ¶

package main

import (
	"reflect"
	"runtime"
	"unsafe"

	"github.com/pkg/errors"
	"gonum.org/v1/gonum/blas"
	"gorgonia.org/cu"
	cublas "gorgonia.org/cu/blas"
	"gorgonia.org/tensor"
)

type Engine struct {
	tensor.StdEng
	ctx cu.Context
	*cublas.Standard
}

func newEngine() *Engine {
	ctx := cu.NewContext(cu.Device(0), cu.SchedAuto)
	blas := cublas.New(cublas.WithContext(ctx))
	return &Engine{
		ctx:      ctx,
		Standard: blas,
	}
}

func (e *Engine) AllocAccessible() bool { return true }

func (e *Engine) Alloc(size int64) (tensor.Memory, error) {
	return e.ctx.MemAllocManaged(size, cu.AttachGlobal)
}

func (e *Engine) AllocFlags() (tensor.MemoryFlag, tensor.DataOrder) {
	return tensor.MakeMemoryFlag(tensor.ManuallyManaged), tensor.ColMajor
}

func (e *Engine) Free(mem tensor.Memory, size int64) error {
	e.ctx.MemFree(mem.(cu.DevicePtr))
	return nil
}

func (e *Engine) Memset(mem tensor.Memory, val interface{}) error {
	panic("not implemented")
}

func (e *Engine) Memclr(mem tensor.Memory) {
	panic("not implemented")
}

func (e *Engine) Memcpy(dst tensor.Memory, src tensor.Memory) error {
	panic("not implemented")
}

func (e *Engine) Accessible(mem tensor.Memory) (tensor.Memory, error) {
	// panic("not implemented")
	size := mem.MemSize()
	retVal := make([]byte, int(size))
	e.ctx.MemcpyDtoH(unsafe.Pointer(&retVal[0]), cu.DevicePtr(mem.Uintptr()), int64(size))
	l := int(size / 8)
	foo2 := &reflect.SliceHeader{
		Data: uintptr(unsafe.Pointer(&retVal[0])),
		Len:  l,
		Cap:  l,
	}
	return *(*foomem)(unsafe.Pointer(foo2)), e.ctx.Error()

}

func (e *Engine) WorksWith(order tensor.DataOrder) bool { return true }

func (e *Engine) NonStdAlloc() {}

func (e *Engine) ContextErr() error { return e.ctx.Error() }

type foomem []float64

func (m foomem) Uintptr() uintptr { return uintptr(unsafe.Pointer(&m[0])) }
func (m foomem) MemSize() uintptr { return uintptr(len(m) * 8) }

func (e *Engine) checkThreeFloat(a, b, ret tensor.Tensor) (ad, bd, retVal *tensor.Dense, err error) {
	if /*a.IsNativelyAccessible() &&*/ !a.IsManuallyManaged() {
		return nil, nil, nil, errors.New("CUDA Engine only takes non-natively accessible memory (memory on graphics cards). a isn't.")
	}

	if /* b.IsNativelyAccessible() && */ !b.IsManuallyManaged() {
		return nil, nil, nil, errors.New("CUDA Engine only takes non-natively accessible memory (memory on graphics cards). b isn't")
	}

	if /* ret.IsNativelyAccessible() && */ !ret.IsManuallyManaged() {
		return nil, nil, nil, errors.New("CUDA Engine only takes non-natively accessible memory (memory on graphics cards). ret isn't")
	}

	if a.Dtype() != b.Dtype() || b.Dtype() != ret.Dtype() {
		return nil, nil, nil, errors.New("Expected a and b and retVal all to have the same Dtype")
	}
	var ok bool
	if ad, ok = a.(*tensor.Dense); !ok {
		return nil, nil, nil, errors.New("Expected a to be a *tensor.Dense")
	}
	if bd, ok = b.(*tensor.Dense); !ok {
		return nil, nil, nil, errors.New("Expected b to be a *tensor.Dense")
	}
	if retVal, ok = ret.(*tensor.Dense); !ok {
		return nil, nil, nil, errors.New("Expected ret to be a *tensor.Dense")
	}
	return
}

func (e *Engine) MatVecMul(a, b, prealloc tensor.Tensor) (err error) {
	var ad, bd, pd *tensor.Dense
	if ad, bd, pd, err = e.checkThreeFloat(a, b, prealloc); err != nil {
		return errors.Wrapf(err, "MatVecMul failed pre check")
	}

	tA := blas.Trans
	do := a.DataOrder()
	z := do.IsTransposed()

	m := a.Shape()[0]
	n := a.Shape()[1]

	var lda int
	switch {
	case do.IsRowMajor() && z:
		tA = blas.NoTrans
		lda = m
	case do.IsRowMajor() && !z:
		lda = n
		m, n = n, m
	case do.IsColMajor() && z:
		tA = blas.Trans
		lda = n
		m, n = n, m
	case do.IsColMajor() && !z:
		lda = m
		tA = blas.NoTrans
	}

	incX, incY := 1, 1 // step size

	// ASPIRATIONAL TODO: different incX and incY
	// TECHNICAL DEBT. TECHDEBT. TECH DEBT
	// Example use case:
	// log.Printf("a %v %v", ad.Strides(), ad.ostrides())
	// log.Printf("b %v", b.Strides())
	// incX := a.Strides()[0]
	// incY = b.Strides()[0]

	switch ad.Dtype() {
	case tensor.Float64:
		A := ad.Float64s()
		X := bd.Float64s()
		Y := pd.Float64s()
		alpha, beta := float64(1), float64(0)
		e.Standard.Dgemv(tA, m, n, alpha, A, lda, X, incX, beta, Y, incY)
	case tensor.Float32:
		A := ad.Float32s()
		X := bd.Float32s()
		Y := pd.Float32s()
		alpha, beta := float32(1), float32(0)
		e.Standard.Sgemv(tA, m, n, alpha, A, lda, X, incX, beta, Y, incY)
	default:
		return errors.New("Unsupported Dtype")
	}
	return e.Standard.Err()
}

func (e *Engine) MatMul(a, b, prealloc tensor.Tensor) (err error) {
	var ad, bd, pd *tensor.Dense
	if ad, bd, pd, err = e.checkThreeFloat(a, b, prealloc); err != nil {
		return errors.Wrapf(err, "MatVecMul failed pre check")
	}

	ado := a.DataOrder()
	bdo := b.DataOrder()
	if !ado.HasSameOrder(bdo) {
		return errors.Errorf("a does not have the same data order as b. a is %v. b is %v", a.DataOrder(), b.DataOrder())
	}

	// get result shapes. k is the shared dimension
	// a is (m, k)
	// b is (k, n)
	// c is (m, n)
	var m, n, k int
	m = ad.Shape()[0]
	k = ad.Shape()[1]
	n = bd.Shape()[1]

	// // wrt the strides, we use the original strides, because that's what BLAS needs, instead of calling .Strides()
	// // lda in colmajor = number of rows;
	// // lda in row major = number of cols
	var lda, ldb, ldc int
	tA, tB := blas.Trans, blas.Trans
	za := ado.IsTransposed()
	zb := bdo.IsTransposed()

	// swapping around the operands if they are row major (a becomes b, and b becomes a)
	switch {
	case ado.IsColMajor() && bdo.IsColMajor() && !za && !zb:
		lda = m
		ldb = k
		ldc = prealloc.Shape()[0]
		tA, tB = blas.NoTrans, blas.NoTrans
	case ado.IsColMajor() && bdo.IsColMajor() && za && !zb:
		lda = k
		ldb = k
		ldc = prealloc.Shape()[0]
		tA, tB = blas.Trans, blas.NoTrans
	case ado.IsColMajor() && bdo.IsColMajor() && za && zb:
		lda = k
		ldb = n
		ldc = prealloc.Shape()[0]
		tA, tB = blas.Trans, blas.Trans
	case ado.IsColMajor() && bdo.IsColMajor() && !za && zb:
		lda = m
		ldb = n
		ldc = prealloc.Shape()[0]
		tA, tB = blas.NoTrans, blas.Trans
	case ado.IsRowMajor() && bdo.IsRowMajor() && !za && !zb:
		lda = k
		ldb = n
		ldc = prealloc.Shape()[1]
		tA, tB = blas.NoTrans, blas.NoTrans

		// magic swappy thingy
		m, n = n, m
		lda, ldb = ldb, lda
		ad, bd = bd, ad
	case ado.IsRowMajor() && bdo.IsRowMajor() && za && !zb:
		lda = m
		ldb = n
		ldc = prealloc.Shape()[1]
		tA, tB = blas.Trans, blas.NoTrans

		// magic swappy thingy
		m, n = n, m
		lda, ldb = ldb, lda
		tA, tB = tB, tA
		ad, bd = bd, ad
	case ado.IsRowMajor() && bdo.IsRowMajor() && za && zb:
		lda = m
		ldb = k
		ldc = prealloc.Shape()[1]
		tA, tB = blas.Trans, blas.Trans

		// magic swappy thingy
		m, n = n, m
		lda, ldb = ldb, lda
		ad, bd = bd, ad
	case ado.IsRowMajor() && bdo.IsRowMajor() && !za && zb:
		lda = k
		ldb = k
		ldc = prealloc.Shape()[1]
		tA, tB = blas.NoTrans, blas.Trans

		// magic swappy thingy
		m, n = n, m
		lda, ldb = ldb, lda
		tA, tB = tB, tA
		ad, bd = bd, ad

	default:
		panic("Unreachable")
	}

	switch ad.Dtype() {
	case tensor.Float64:
		A := ad.Float64s()
		B := bd.Float64s()
		C := pd.Float64s()
		alpha, beta := float64(1), float64(0)
		e.Standard.Dgemm(tA, tB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc)

	case tensor.Float32:
		A := ad.Float32s()
		B := bd.Float32s()
		C := pd.Float32s()
		alpha, beta := float32(1), float32(0)
		e.Standard.Sgemm(tA, tB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc)
	default:
		return errors.Errorf("Unsupported Dtype %v", ad.Dtype())
	}
	return e.Standard.Err()
}

func (e *Engine) Outer(a, b, prealloc tensor.Tensor) (err error) {
	var ad, bd, pd *tensor.Dense
	if ad, bd, pd, err = e.checkThreeFloat(a, b, prealloc); err != nil {
		return errors.Wrapf(err, "MatVecMul failed pre check")
	}
	m := ad.Size()
	n := bd.Size()
	pdo := pd.DataOrder()

	var lda int
	switch {
	case pdo.IsColMajor():
		lda = pd.Shape()[0]
	case pdo.IsRowMajor():
		aShape := a.Shape().Clone()
		bShape := b.Shape().Clone()
		if err = a.Reshape(aShape[0], 1); err != nil {
			return err
		}
		if err = b.Reshape(1, bShape[0]); err != nil {
			return err
		}

		if err = e.MatMul(a, b, prealloc); err != nil {
			return err
		}

		if err = b.Reshape(bShape...); err != nil {
			return
		}
		if err = a.Reshape(aShape...); err != nil {
			return
		}
		return nil
	}
	incX, incY := 1, 1
	switch ad.Dtype() {
	case tensor.Float64:
		x := ad.Float64s()
		y := bd.Float64s()
		A := pd.Float64s()
		alpha := float64(1)
		e.Standard.Dger(m, n, alpha, x, incX, y, incY, A, lda)
	case tensor.Float32:
		x := ad.Float32s()
		y := bd.Float32s()
		A := pd.Float32s()
		alpha := float32(1)
		e.Standard.Sger(m, n, alpha, x, incX, y, incY, A, lda)
	}
	return e.Standard.Err()
}
func main() {
	// debug.SetGCPercent(-1)
	runtime.LockOSThread()
	defer runtime.UnlockOSThread()

	matVecMulColmajorNonTransposed()
	matVecMulColmajorTransposed()
	matVecMulRowMajorNonTransposed()
	matVecMulRowMajorTransposed()

	matMulColmajorNTNT()
	matMulColmajorTNT()
	matMulColmajorTT()
	matMulColmajorNTT()

	matMulRowmajorNTNT()
	matMulRowmajorTNT()
	matMulRowmajorTT()
	matMulRowmajorNTT()

	outerColMajor()
	outerRowMajor()

}

Output:

ColMajor Non Transposed
A:
⎡1  2  3⎤
⎣4  5  6⎦

B:[1  2  3]
C:[1000  1000]
C:
[14 32]
==========
ColMajor Transposed
A:
⎡1  4⎤
⎢2  5⎥
⎣3  6⎦

B:[1  2]
C[1000  1000  1000]
C:
[9 12 15]
==========
RowMajor Non Transposed
A:
⎡1  2  3⎤
⎣4  5  6⎦

B:[1  2  3]
C[1000  1000]
C:
[14 32]
==========
RowMajor Transposed
A:
⎡1  4⎤
⎢2  5⎥
⎣3  6⎦

B:[1  2]
C[1000  1000  1000]
C:
[9 12 15]
==========
ColMajor Non Transposed Non Transposed
A:
⎡1  2  3⎤
⎣4  5  6⎦

B:
⎡ 0   1   2   3⎤
⎢ 4   5   6   7⎥
⎣ 8   9  10  11⎦

C:
⎡1000  1000  1000  1000⎤
⎣1000  1000  1000  1000⎦

C:
[32 68 38 83 44 98 50 113]
==========
ColMajor Transposed Non Transposed
A:
⎡1  4⎤
⎢2  5⎥
⎣3  6⎦

B:
⎡0  1  2  3⎤
⎣4  5  6  7⎦

C:
⎡1000  1000  1000  1000⎤
⎢1000  1000  1000  1000⎥
⎣1000  1000  1000  1000⎦

C:
[16 20 24 21 27 33 26 34 42 31 41 51]
==========
ColMajor Transposed Transposed
A:
⎡1  4⎤
⎢2  5⎥
⎣3  6⎦

B:
⎡0  2  4  6⎤
⎣1  3  5  7⎦

C:
⎡1000  1000  1000  1000⎤
⎢1000  1000  1000  1000⎥
⎣1000  1000  1000  1000⎦

C:
[4 5 6 14 19 24 24 33 42 34 47 60]
==========
ColMajor Non Transposed Transposed
A:
⎡1  2  3⎤
⎣4  5  6⎦

B:
⎡ 0   3   6   9⎤
⎢ 1   4   7  10⎥
⎣ 2   5   8  11⎦

C:
⎡1000  1000  1000  1000⎤
⎣1000  1000  1000  1000⎦

C:
[8 17 26 62 44 107 62 152]
==========
RowMajor Non Transposed Non Transposed
A:
⎡1  2  3⎤
⎣4  5  6⎦

B:
⎡ 0   1   2   3⎤
⎢ 4   5   6   7⎥
⎣ 8   9  10  11⎦

C:
⎡1000  1000  1000  1000⎤
⎣1000  1000  1000  1000⎦

C:
[32 38 44 50 68 83 98 113]
==========
RowMajor Transposed Non Transposed
A:
⎡1  3  5⎤
⎣2  4  6⎦

B:
⎡ 0   1   2   3⎤
⎢ 4   5   6   7⎥
⎣ 8   9  10  11⎦

C:
⎡1000  1000  1000  1000⎤
⎣1000  1000  1000  1000⎦

C:
[52 61 70 79 64 76 88 100]
==========
RowMajor Transposed Non Transposed
A:
⎡1  3  5⎤
⎣2  4  6⎦

B:
⎡ 0   3   6   9⎤
⎢ 1   4   7  10⎥
⎣ 2   5   8  11⎦

C:
⎡1000  1000  1000  1000⎤
⎣1000  1000  1000  1000⎦

C:
[13 40 67 94 16 52 88 124]
==========
RowMajor Transposed Non Transposed
A:
⎡1  2  3⎤
⎣4  5  6⎦

B:
⎡ 0   3   6   9⎤
⎢ 1   4   7  10⎥
⎣ 2   5   8  11⎦

C:
⎡1000  1000  1000  1000⎤
⎣1000  1000  1000  1000⎦

C:
[8 26 44 62 17 62 107 152]
==========
RowMajor Non Transposed
A:
[1  2  3]
B:[0  1]
C
⎡1000  1000⎤
⎢1000  1000⎥
⎣1000  1000⎦

C:
[0 0 0 1 2 3]
==========
RowMajor Non Transposed
A:
[1  2  3]
B:[0  1]
C
⎡1000  1000⎤
⎢1000  1000⎥
⎣1000  1000⎦

C:
[0 1 0 2 0 3]
==========

Index ¶

Constants
type BLAS
type ConsOpt
- func WithContext(ctx cu.Context) ConsOpt
- func WithNativeData() ConsOpt
type Order
type PointerMode
type Standard
- func New(opts ...ConsOpt) *Standard
type Status
- func (err Status) Error() string
- func (err Status) String() string

Examples ¶

Package

Constants ¶

View Source

const (
	NoTrans   = C.CUBLAS_OP_N // NoTrans represents the no-transpose operation
	Trans     = C.CUBLAS_OP_T // Trans represents the transpose operation
	ConjTrans = C.CUBLAS_OP_C // ConjTrans represents the conjugate transpose operation

	Upper = C.CUBLAS_FILL_MODE_UPPER // Upper is used to specify that the matrix is an upper triangular matrix
	Lower = C.CUBLAS_FILL_MODE_LOWER // Lower is used to specify that the matrix is an lower triangular matrix

	NonUnit = C.CUBLAS_DIAG_NON_UNIT // NonUnit is used to specify that the matrix is not a unit triangular matrix
	Unit    = C.CUBLAS_DIAG_UNIT     // Unit is used to specify that the matrix is a unit triangular matrix

	Left  = C.CUBLAS_SIDE_LEFT  // Left is used to specify a multiplication op is performed from the left
	Right = C.CUBLAS_SIDE_RIGHT // Right is used to specify a multiplication op is performed from the right
)

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type BLAS ¶

type BLAS interface {
	cu.Context
	blas.Float32
	blas.Float64
	blas.Complex64
	blas.Complex128
}

BLAS is the interface for all cuBLAS implementaions

type ConsOpt ¶ added in v0.9.1

type ConsOpt func(impl *Standard)

func WithContext ¶ added in v0.9.1

func WithContext(ctx cu.Context) ConsOpt

func WithNativeData ¶ added in v0.9.1

func WithNativeData() ConsOpt

type Order ¶

type Order byte

Order is used to specify the matrix storage format. We still interact with an API that allows client calls to specify order, so this is here to document that fact.

const (
	RowMajor Order = iota // Row Major
	ColMajor              // Column Major (cublas assumes all matrices be stored in this order)
)

type PointerMode ¶

type PointerMode byte

PointerMode

const (
	Host PointerMode = iota
	Device
)

type Standard ¶

type Standard struct {
	cu.Context

	sync.Mutex
	// contains filtered or unexported fields
}

Standard is the standard cuBLAS handler. By default it assumes that the data is in RowMajor, DESPITE the fact that cuBLAS takes ColMajor only. This is done for the ease of use of developers writing in Go.

Use New to create a new BLAS handler. Use the various ConsOpts to set the options

func New ¶ added in v0.9.1

func New(opts ...ConsOpt) *Standard

func (*Standard) Caxpy ¶

func (impl *Standard) Caxpy(n int, alpha complex64, x []complex64, incX int, y []complex64, incY int)

func (*Standard) Ccopy ¶

func (impl *Standard) Ccopy(n int, x []complex64, incX int, y []complex64, incY int)

func (*Standard) Cdgmm ¶

func (impl *Standard) Cdgmm(mode blas.Side, m, n int, a []complex64, lda int, x []complex64, incX int, c []complex64, ldc int)

func (*Standard) Cdotc ¶

func (impl *Standard) Cdotc(n int, x []complex64, incX int, y []complex64, incY int) (dotc complex64)

func (*Standard) Cdotu ¶

func (impl *Standard) Cdotu(n int, x []complex64, incX int, y []complex64, incY int) (dotu complex64)

func (*Standard) Cgbmv ¶

func (impl *Standard) Cgbmv(tA blas.Transpose, m, n, kl, ku int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int)

func (*Standard) Cgeam ¶

func (impl *Standard) Cgeam(tA, tB blas.Transpose, m, n int, alpha complex64, a []complex64, lda int, beta complex64, b []complex64, ldb int, c []complex64, ldc int)

func (*Standard) Cgemm ¶

func (impl *Standard) Cgemm(tA, tB blas.Transpose, m, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int)

func (*Standard) Cgemm3m ¶

func (impl *Standard) Cgemm3m(tA, tB blas.Transpose, m, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int)

func (*Standard) Cgemv ¶

func (impl *Standard) Cgemv(tA blas.Transpose, m, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int)

func (*Standard) Cgerc ¶

func (impl *Standard) Cgerc(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int)

func (*Standard) Cgeru ¶

func (impl *Standard) Cgeru(m, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int)

func (*Standard) Chbmv ¶

func (impl *Standard) Chbmv(ul blas.Uplo, n, k int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int)

func (*Standard) Chemm ¶

func (impl *Standard) Chemm(s blas.Side, ul blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int)

func (*Standard) Chemv ¶

func (impl *Standard) Chemv(ul blas.Uplo, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int)

func (*Standard) Cher ¶

func (impl *Standard) Cher(ul blas.Uplo, n int, alpha float32, x []complex64, incX int, a []complex64, lda int)

func (*Standard) Cher2 ¶

func (impl *Standard) Cher2(ul blas.Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int)

func (*Standard) Cher2k ¶

func (impl *Standard) Cher2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta float32, c []complex64, ldc int)

func (*Standard) Cherk ¶

func (impl *Standard) Cherk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float32, a []complex64, lda int, beta float32, c []complex64, ldc int)

func (*Standard) Cherkx ¶

func (impl *Standard) Cherkx(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta float32, c []complex64, ldc int)

func (*Standard) Chpmv ¶

func (impl *Standard) Chpmv(ul blas.Uplo, n int, alpha complex64, aP, x []complex64, incX int, beta complex64, y []complex64, incY int)

func (*Standard) Chpr ¶

func (impl *Standard) Chpr(ul blas.Uplo, n int, alpha float32, x []complex64, incX int, aP []complex64)

func (*Standard) Chpr2 ¶

func (impl *Standard) Chpr2(ul blas.Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, aP []complex64)

func (*Standard) Close ¶ added in v0.9.1

func (impl *Standard) Close() error

func (*Standard) Crot ¶

func (impl *Standard) Crot(n int, x []complex64, incX int, y []complex64, incY int, cScalar float32, sScalar []complex64)

func (*Standard) Cscal ¶

func (impl *Standard) Cscal(n int, alpha complex64, x []complex64, incX int)

func (*Standard) Csscal ¶

func (impl *Standard) Csscal(n int, alpha float32, x []complex64, incX int)

func (*Standard) Cswap ¶

func (impl *Standard) Cswap(n int, x []complex64, incX int, y []complex64, incY int)

func (*Standard) Csymm ¶

func (impl *Standard) Csymm(s blas.Side, ul blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int)

func (*Standard) Csymv ¶

func (impl *Standard) Csymv(ul blas.Uplo, n int, alpha complex64, a []complex64, lda int, x []complex64, incX int, beta complex64, y []complex64, incY int)

func (*Standard) Csyr ¶

func (impl *Standard) Csyr(ul blas.Uplo, n int, alpha complex64, x []complex64, incX int, a []complex64, lda int)

func (*Standard) Csyr2 ¶

func (impl *Standard) Csyr2(ul blas.Uplo, n int, alpha complex64, x []complex64, incX int, y []complex64, incY int, a []complex64, lda int)

func (*Standard) Csyr2k ¶

func (impl *Standard) Csyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int)

func (*Standard) Csyrk ¶

func (impl *Standard) Csyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, beta complex64, c []complex64, ldc int)

func (*Standard) Csyrkx ¶

func (impl *Standard) Csyrkx(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int)

func (*Standard) Ctbmv ¶

func (impl *Standard) Ctbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex64, lda int, x []complex64, incX int)

func (*Standard) Ctbsv ¶

func (impl *Standard) Ctbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex64, lda int, x []complex64, incX int)

func (*Standard) Ctpmv ¶

func (impl *Standard) Ctpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []complex64, incX int)

func (*Standard) Ctpsv ¶

func (impl *Standard) Ctpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []complex64, incX int)

func (*Standard) Ctpttr ¶

func (impl *Standard) Ctpttr(ul blas.Uplo, n int, aP, a []complex64, lda int)

func (*Standard) Ctrmm ¶

func (impl *Standard) Ctrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int)

func (*Standard) Ctrmv ¶

func (impl *Standard) Ctrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex64, lda int, x []complex64, incX int)

func (*Standard) Ctrsm ¶

func (impl *Standard) Ctrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int)

func (*Standard) Ctrsv ¶

func (impl *Standard) Ctrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex64, lda int, x []complex64, incX int)

func (*Standard) Ctrttp ¶

func (impl *Standard) Ctrttp(ul blas.Uplo, n int, a []complex64, lda int, aP []complex64)

func (*Standard) Dasum ¶

func (impl *Standard) Dasum(n int, x []float64, incX int) (retVal float64)

Dasum computes the sum of the absolute values of the elements of x.

\sum_i |x[i]|

Dasum returns 0 if incX is negative.

func (*Standard) Daxpy ¶

func (impl *Standard) Daxpy(n int, alpha float64, x []float64, incX int, y []float64, incY int)

Daxpy adds alpha times x to y

y[i] += alpha * x[i] for all i

func (*Standard) Dcopy ¶

func (impl *Standard) Dcopy(n int, x []float64, incX int, y []float64, incY int)

Dcopy copies the elements of x into the elements of y.

y[i] = x[i] for all i

func (*Standard) Ddgmm ¶

func (impl *Standard) Ddgmm(mode blas.Side, m, n int, a []float64, lda int, x []float64, incX int, c []float64, ldc int)

func (*Standard) Ddot ¶

func (impl *Standard) Ddot(n int, x []float64, incX int, y []float64, incY int) (retVal float64)

Ddot computes the dot product of the two vectors

\sum_i x[i]*y[i]

func (*Standard) Dgbmv ¶

func (impl *Standard) Dgbmv(tA blas.Transpose, m, n, kl, ku int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int)

Dgbmv computes

y = alpha * A * x + beta * y if tA == blas.NoTrans
y = alpha * A^T * x + beta * y if tA == blas.Trans or blas.ConjTrans

where a is an m×n band matrix kL subdiagonals and kU super-diagonals, and m and n refer to the size of the full dense matrix it represents. x and y are vectors, and alpha and beta are scalars.

func (*Standard) Dgeam ¶

func (impl *Standard) Dgeam(tA, tB blas.Transpose, m, n int, alpha float64, a []float64, lda int, beta float64, b []float64, ldb int, c []float64, ldc int)

func (*Standard) Dgemm ¶

func (impl *Standard) Dgemm(tA, tB blas.Transpose, m, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int)

Dgemm computes

C = beta * C + alpha * A * B,

where A, B, and C are dense matrices, and alpha and beta are scalars. tA and tB specify whether A or B are transposed.

func (*Standard) Dgemv ¶

func (impl *Standard) Dgemv(tA blas.Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int)

Dgemv computes

y = alpha * a * x + beta * y if tA = blas.NoTrans
y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans

where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.

func (*Standard) Dger ¶

func (impl *Standard) Dger(m, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int)

Dger performs the rank-one operation

A += alpha * x * y^T

where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.

func (*Standard) Dnrm2 ¶

func (impl *Standard) Dnrm2(n int, x []float64, incX int) (retVal float64)

Dnrm2 computes the Euclidean norm of a vector,

sqrt(\sum_i x[i] * x[i]).

This function returns 0 if incX is negative.

func (*Standard) Drot ¶

func (impl *Standard) Drot(n int, x []float64, incX int, y []float64, incY int, cScalar, sScalar float64)

Drot applies a plane transformation.

x[i] = c * x[i] + s * y[i]
y[i] = c * y[i] - s * x[i]

func (*Standard) Drotg ¶

func (impl *Standard) Drotg(a float64, b float64) (c float64, s float64, r float64, z float64)

func (*Standard) Drotm ¶

func (impl *Standard) Drotm(n int, x []float64, incX int, y []float64, incY int, p blas.DrotmParams)

func (*Standard) Drotmg ¶

func (impl *Standard) Drotmg(d1 float64, d2 float64, b1 float64, b2 float64) (p blas.DrotmParams, rd1 float64, rd2 float64, rb1 float64)

func (*Standard) Dsbmv ¶

func (impl *Standard) Dsbmv(ul blas.Uplo, n, k int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int)

Dsbmv performs

y = alpha * A * x + beta * y

where A is an n×n symmetric banded matrix, x and y are vectors, and alpha and beta are scalars.

func (*Standard) Dscal ¶

func (impl *Standard) Dscal(n int, alpha float64, x []float64, incX int)

Dscal scales x by alpha.

x[i] *= alpha

Dscal has no effect if incX < 0.

func (*Standard) Dsdot ¶

func (impl *Standard) Dsdot(n int, x []float32, incX int, y []float32, incY int) float64

func (*Standard) Dspmv ¶

func (impl *Standard) Dspmv(ul blas.Uplo, n int, alpha float64, aP, x []float64, incX int, beta float64, y []float64, incY int)

Dspmv performs

y = alpha * A * x + beta * y,

where A is an n×n symmetric matrix in packed format, x and y are vectors and alpha and beta are scalars.

func (*Standard) Dspr ¶

func (impl *Standard) Dspr(ul blas.Uplo, n int, alpha float64, x []float64, incX int, aP []float64)

Dspr computes the rank-one operation

a += alpha * x * x^T

where a is an n×n symmetric matrix in packed format, x is a vector, and alpha is a scalar.

func (*Standard) Dspr2 ¶

func (impl *Standard) Dspr2(ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, aP []float64)

Dspr2 performs the symmetric rank-2 update

A += alpha * x * y^T + alpha * y * x^T,

where A is an n×n symmetric matrix in packed format, x and y are vectors, and alpha is a scalar.

func (*Standard) Dswap ¶

func (impl *Standard) Dswap(n int, x []float64, incX int, y []float64, incY int)

Dswap exchanges the elements of two vectors.

x[i], y[i] = y[i], x[i] for all i

func (*Standard) Dsymm ¶

func (impl *Standard) Dsymm(s blas.Side, ul blas.Uplo, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int)

Dsymm performs one of

C = alpha * A * B + beta * C, if side == blas.Left,
C = alpha * B * A + beta * C, if side == blas.Right,

where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha is a scalar.

func (*Standard) Dsymv ¶

func (impl *Standard) Dsymv(ul blas.Uplo, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int)

Dsymv computes

y = alpha * A * x + beta * y,

where a is an n×n symmetric matrix, x and y are vectors, and alpha and beta are scalars.

func (*Standard) Dsyr ¶

func (impl *Standard) Dsyr(ul blas.Uplo, n int, alpha float64, x []float64, incX int, a []float64, lda int)

Dsyr performs the rank-one update

a += alpha * x * x^T

where a is an n×n symmetric matrix, and x is a vector.

func (*Standard) Dsyr2 ¶

func (impl *Standard) Dsyr2(ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int)

Dsyr2 performs the symmetric rank-two update

A += alpha * x * y^T + alpha * y * x^T

where A is a symmetric n×n matrix, x and y are vectors, and alpha is a scalar.

func (*Standard) Dsyr2k ¶

func (impl *Standard) Dsyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int)

Dsyr2k performs the symmetric rank 2k operation

C = alpha * A * B^T + alpha * B * A^T + beta * C

where C is an n×n symmetric matrix. A and B are n×k matrices if tA == NoTrans and k×n otherwise. alpha and beta are scalars.

func (*Standard) Dsyrk ¶

func (impl *Standard) Dsyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float64, a []float64, lda int, beta float64, c []float64, ldc int)

Dsyrk performs the symmetric rank-k operation

C = alpha * A * A^T + beta*C

C is an n×n symmetric matrix. A is an n×k matrix if tA == blas.NoTrans, and a k×n matrix otherwise. alpha and beta are scalars.

func (*Standard) Dsyrkx ¶

func (impl *Standard) Dsyrkx(ul blas.Uplo, t blas.Transpose, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int)

func (*Standard) Dtbmv ¶

func (impl *Standard) Dtbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, x []float64, incX int)

Dtbmv computes

x = A * x if tA == blas.NoTrans
x = A^T * x if tA == blas.Trans or blas.ConjTrans

where A is an n×n triangular banded matrix with k diagonals, and x is a vector.

func (*Standard) Dtbsv ¶

func (impl *Standard) Dtbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, x []float64, incX int)

Dtbsv solves

A * x = b

where A is an n×n triangular banded matrix with k diagonals in packed format, and x is a vector. At entry to the function, x contains the values of b, and the result is stored in place into x.

No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine.

func (*Standard) Dtpmv ¶

func (impl *Standard) Dtpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []float64, incX int)

Dtpmv computes

x = A * x if tA == blas.NoTrans
x = A^T * x if tA == blas.Trans or blas.ConjTrans

where A is an n×n unit triangular matrix in packed format, and x is a vector.

func (*Standard) Dtpsv ¶

func (impl *Standard) Dtpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []float64, incX int)

Dtpsv solves

A * x = b if tA == blas.NoTrans
A^T * x = b if tA == blas.Trans or blas.ConjTrans

where A is an n×n triangular matrix in packed format and x is a vector. At entry to the function, x contains the values of b, and the result is stored in place into x.

No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine.

func (*Standard) Dtpttr ¶

func (impl *Standard) Dtpttr(ul blas.Uplo, n int, aP, a []float64, lda int)

func (*Standard) Dtrmm ¶

func (impl *Standard) Dtrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int)

func (*Standard) Dtrmv ¶

func (impl *Standard) Dtrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int)

Dtrmv computes

x = A * x if tA == blas.NoTrans
x = A^T * x if tA == blas.Trans or blas.ConjTrans

A is an n×n Triangular matrix and x is a vector.

func (*Standard) Dtrsm ¶

func (impl *Standard) Dtrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int)

Dtrsm solves

A * X = alpha * B,   if tA == blas.NoTrans side == blas.Left,
A^T * X = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Left,
X * A = alpha * B,   if tA == blas.NoTrans side == blas.Right,
X * A^T = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Right,

where A is an n×n or m×m triangular matrix, X is an m×n matrix, and alpha is a scalar.

At entry to the function, X contains the values of B, and the result is stored in place into X.

No check is made that A is invertible.

func (*Standard) Dtrsv ¶

func (impl *Standard) Dtrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int)

Dtrsv solves

A * x = b if tA == blas.NoTrans
A^T * x = b if tA == blas.Trans or blas.ConjTrans

A is an n×n triangular matrix and x is a vector. At entry to the function, x contains the values of b, and the result is stored in place into x.

No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine.

func (*Standard) Dtrttp ¶

func (impl *Standard) Dtrttp(ul blas.Uplo, n int, a []float64, lda int, aP []float64)

func (*Standard) Dzasum ¶

func (impl *Standard) Dzasum(n int, x []complex128, incX int) (retVal float64)

func (*Standard) Dznrm2 ¶

func (impl *Standard) Dznrm2(n int, x []complex128, incX int) (retVal float64)

func (*Standard) Err ¶

func (impl *Standard) Err() error

func (*Standard) Icamax ¶

func (impl *Standard) Icamax(n int, x []complex64, incX int) (retVal int)

func (*Standard) Icamin ¶

func (impl *Standard) Icamin(n int, x []complex64, incX int) (retVal int)

func (*Standard) Idamax ¶

func (impl *Standard) Idamax(n int, x []float64, incX int) (retVal int)

Idamax returns the index of an element of x with the largest absolute value. If there are multiple such indices the earliest is returned. Idamax returns -1 if n == 0.

func (*Standard) Idamin ¶

func (impl *Standard) Idamin(n int, x []float64, incX int) (retVal int)

func (*Standard) Init ¶ added in v0.9.1

func (impl *Standard) Init(opts ...ConsOpt) error

func (*Standard) Isamax ¶

func (impl *Standard) Isamax(n int, x []float32, incX int) (retVal int)

Isamax returns the index of an element of x with the largest absolute value. If there are multiple such indices the earliest is returned. Isamax returns -1 if n == 0.

func (*Standard) Isamin ¶

func (impl *Standard) Isamin(n int, x []float32, incX int) (retVal int)

func (*Standard) Izamax ¶

func (impl *Standard) Izamax(n int, x []complex128, incX int) (retVal int)

func (*Standard) Izamin ¶

func (impl *Standard) Izamin(n int, x []complex128, incX int) (retVal int)

func (*Standard) Sasum ¶

func (impl *Standard) Sasum(n int, x []float32, incX int) (retVal float32)

Sasum computes the sum of the absolute values of the elements of x.

\sum_i |x[i]|

Sasum returns 0 if incX is negative.

func (*Standard) Saxpy ¶

func (impl *Standard) Saxpy(n int, alpha float32, x []float32, incX int, y []float32, incY int)

Saxpy adds alpha times x to y

y[i] += alpha * x[i] for all i

func (*Standard) Scasum ¶

func (impl *Standard) Scasum(n int, x []complex64, incX int) (retVal float32)

func (*Standard) Scnrm2 ¶

func (impl *Standard) Scnrm2(n int, x []complex64, incX int) (retVal float32)

func (*Standard) Scopy ¶

func (impl *Standard) Scopy(n int, x []float32, incX int, y []float32, incY int)

Scopy copies the elements of x into the elements of y.

y[i] = x[i] for all i

func (*Standard) Sdgmm ¶

func (impl *Standard) Sdgmm(mode blas.Side, m, n int, a []float32, lda int, x []float32, incX int, c []float32, ldc int)

func (*Standard) Sdot ¶

func (impl *Standard) Sdot(n int, x []float32, incX int, y []float32, incY int) (retVal float32)

Sdot computes the dot product of the two vectors

\sum_i x[i]*y[i]

func (*Standard) Sdsdot ¶

func (impl *Standard) Sdsdot(n int, alpha float32, x []float32, incX int, y []float32, incY int) float32

func (*Standard) Sgbmv ¶

func (impl *Standard) Sgbmv(tA blas.Transpose, m, n, kl, ku int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int)

Sgbmv computes

y = alpha * A * x + beta * y if tA == blas.NoTrans
y = alpha * A^T * x + beta * y if tA == blas.Trans or blas.ConjTrans

where a is an m×n band matrix kL subdiagonals and kU super-diagonals, and m and n refer to the size of the full dense matrix it represents. x and y are vectors, and alpha and beta are scalars.

func (*Standard) Sgeam ¶

func (impl *Standard) Sgeam(tA, tB blas.Transpose, m, n int, alpha float32, a []float32, lda int, beta float32, b []float32, ldb int, c []float32, ldc int)

func (*Standard) Sgemm ¶

func (impl *Standard) Sgemm(tA, tB blas.Transpose, m, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int)

Sgemm computes

C = beta * C + alpha * A * B,

where A, B, and C are dense matrices, and alpha and beta are scalars. tA and tB specify whether A or B are transposed.

func (*Standard) Sgemv ¶

func (impl *Standard) Sgemv(tA blas.Transpose, m, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int)

Sgemv computes

y = alpha * a * x + beta * y if tA = blas.NoTrans
y = alpha * A^T * x + beta * y if tA = blas.Trans or blas.ConjTrans

where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.

func (*Standard) Sger ¶

func (impl *Standard) Sger(m, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int)

Sger performs the rank-one operation

A += alpha * x * y^T

where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar.

func (*Standard) Snrm2 ¶

func (impl *Standard) Snrm2(n int, x []float32, incX int) (retVal float32)

Snrm2 computes the Euclidean norm of a vector,

sqrt(\sum_i x[i] * x[i]).

This function returns 0 if incX is negative.

func (*Standard) Srot ¶

func (impl *Standard) Srot(n int, x []float32, incX int, y []float32, incY int, cScalar, sScalar float32)

Srot applies a plane transformation.

x[i] = c * x[i] + s * y[i]
y[i] = c * y[i] - s * x[i]

func (*Standard) Srotg ¶

func (impl *Standard) Srotg(a float32, b float32) (c float32, s float32, r float32, z float32)

func (*Standard) Srotm ¶

func (impl *Standard) Srotm(n int, x []float32, incX int, y []float32, incY int, p blas.SrotmParams)

func (*Standard) Srotmg ¶

func (impl *Standard) Srotmg(d1 float32, d2 float32, b1 float32, b2 float32) (p blas.SrotmParams, rd1 float32, rd2 float32, rb1 float32)

func (*Standard) Ssbmv ¶

func (impl *Standard) Ssbmv(ul blas.Uplo, n, k int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int)

Ssbmv performs

y = alpha * A * x + beta * y

where A is an n×n symmetric banded matrix, x and y are vectors, and alpha and beta are scalars.

func (*Standard) Sscal ¶

func (impl *Standard) Sscal(n int, alpha float32, x []float32, incX int)

Sscal scales x by alpha.

x[i] *= alpha

Sscal has no effect if incX < 0.

func (*Standard) Sspmv ¶

func (impl *Standard) Sspmv(ul blas.Uplo, n int, alpha float32, aP, x []float32, incX int, beta float32, y []float32, incY int)

Sspmv performs

y = alpha * A * x + beta * y,

where A is an n×n symmetric matrix in packed format, x and y are vectors and alpha and beta are scalars.

func (*Standard) Sspr ¶

func (impl *Standard) Sspr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, aP []float32)

Sspr computes the rank-one operation

a += alpha * x * x^T

where a is an n×n symmetric matrix in packed format, x is a vector, and alpha is a scalar.

func (*Standard) Sspr2 ¶

func (impl *Standard) Sspr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, aP []float32)

Sspr2 performs the symmetric rank-2 update

A += alpha * x * y^T + alpha * y * x^T,

where A is an n×n symmetric matrix in packed format, x and y are vectors, and alpha is a scalar.

func (*Standard) Sswap ¶

func (impl *Standard) Sswap(n int, x []float32, incX int, y []float32, incY int)

Sswap exchanges the elements of two vectors.

x[i], y[i] = y[i], x[i] for all i

func (*Standard) Ssymm ¶

func (impl *Standard) Ssymm(s blas.Side, ul blas.Uplo, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int)

Ssymm performs one of

C = alpha * A * B + beta * C, if side == blas.Left,
C = alpha * B * A + beta * C, if side == blas.Right,

where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha is a scalar.

func (*Standard) Ssymv ¶

func (impl *Standard) Ssymv(ul blas.Uplo, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int)

Ssymv computes

y = alpha * A * x + beta * y,

where a is an n×n symmetric matrix, x and y are vectors, and alpha and beta are scalars.

func (*Standard) Ssyr ¶

func (impl *Standard) Ssyr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, a []float32, lda int)

Ssyr performs the rank-one update

a += alpha * x * x^T

where a is an n×n symmetric matrix, and x is a vector.

func (*Standard) Ssyr2 ¶

func (impl *Standard) Ssyr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int)

Ssyr2 performs the symmetric rank-two update

A += alpha * x * y^T + alpha * y * x^T

where A is a symmetric n×n matrix, x and y are vectors, and alpha is a scalar.

func (*Standard) Ssyr2k ¶

func (impl *Standard) Ssyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int)

Ssyr2k performs the symmetric rank 2k operation

C = alpha * A * B^T + alpha * B * A^T + beta * C

where C is an n×n symmetric matrix. A and B are n×k matrices if tA == NoTrans and k×n otherwise. alpha and beta are scalars.

func (*Standard) Ssyrk ¶

func (impl *Standard) Ssyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float32, a []float32, lda int, beta float32, c []float32, ldc int)

Ssyrk performs the symmetric rank-k operation

C = alpha * A * A^T + beta*C

C is an n×n symmetric matrix. A is an n×k matrix if tA == blas.NoTrans, and a k×n matrix otherwise. alpha and beta are scalars.

func (*Standard) Ssyrkx ¶

func (impl *Standard) Ssyrkx(ul blas.Uplo, t blas.Transpose, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int)

func (*Standard) Stbmv ¶

func (impl *Standard) Stbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int)

Stbmv computes

x = A * x if tA == blas.NoTrans
x = A^T * x if tA == blas.Trans or blas.ConjTrans

where A is an n×n triangular banded matrix with k diagonals, and x is a vector.

func (*Standard) Stbsv ¶

func (impl *Standard) Stbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int)

Stbsv solves

A * x = b

where A is an n×n triangular banded matrix with k diagonals in packed format, and x is a vector. At entry to the function, x contains the values of b, and the result is stored in place into x.

No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine.

func (*Standard) Stpmv ¶

func (impl *Standard) Stpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []float32, incX int)

Stpmv computes

x = A * x if tA == blas.NoTrans
x = A^T * x if tA == blas.Trans or blas.ConjTrans

where A is an n×n unit triangular matrix in packed format, and x is a vector.

func (*Standard) Stpsv ¶

func (impl *Standard) Stpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []float32, incX int)

Stpsv solves

A * x = b if tA == blas.NoTrans
A^T * x = b if tA == blas.Trans or blas.ConjTrans

where A is an n×n triangular matrix in packed format and x is a vector. At entry to the function, x contains the values of b, and the result is stored in place into x.

No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine.

func (*Standard) Stpttr ¶

func (impl *Standard) Stpttr(ul blas.Uplo, n int, aP, a []float32, lda int)

func (*Standard) Strmm ¶

func (impl *Standard) Strmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int)

func (*Standard) Strmv ¶

func (impl *Standard) Strmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int)

Strmv computes

x = A * x if tA == blas.NoTrans
x = A^T * x if tA == blas.Trans or blas.ConjTrans

A is an n×n Triangular matrix and x is a vector.

func (*Standard) Strsm ¶

func (impl *Standard) Strsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int)

Strsm solves

A * X = alpha * B,   if tA == blas.NoTrans side == blas.Left,
A^T * X = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Left,
X * A = alpha * B,   if tA == blas.NoTrans side == blas.Right,
X * A^T = alpha * B, if tA == blas.Trans or blas.ConjTrans, and side == blas.Right,

where A is an n×n or m×m triangular matrix, X is an m×n matrix, and alpha is a scalar.

At entry to the function, X contains the values of B, and the result is stored in place into X.

No check is made that A is invertible.

func (*Standard) Strsv ¶

func (impl *Standard) Strsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int)

Strsv solves

A * x = b if tA == blas.NoTrans
A^T * x = b if tA == blas.Trans or blas.ConjTrans

A is an n×n triangular matrix and x is a vector. At entry to the function, x contains the values of b, and the result is stored in place into x.

No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine.

func (*Standard) Strttp ¶

func (impl *Standard) Strttp(ul blas.Uplo, n int, a []float32, lda int, aP []float32)

func (*Standard) Zaxpy ¶

func (impl *Standard) Zaxpy(n int, alpha complex128, x []complex128, incX int, y []complex128, incY int)

func (*Standard) Zcopy ¶

func (impl *Standard) Zcopy(n int, x []complex128, incX int, y []complex128, incY int)

func (*Standard) Zdgmm ¶

func (impl *Standard) Zdgmm(mode blas.Side, m, n int, a []complex128, lda int, x []complex128, incX int, c []complex128, ldc int)

func (*Standard) Zdotc ¶

func (impl *Standard) Zdotc(n int, x []complex128, incX int, y []complex128, incY int) (dotc complex128)

func (*Standard) Zdotu ¶

func (impl *Standard) Zdotu(n int, x []complex128, incX int, y []complex128, incY int) (dotu complex128)

func (*Standard) Zdscal ¶

func (impl *Standard) Zdscal(n int, alpha float64, x []complex128, incX int)

func (*Standard) Zgbmv ¶

func (impl *Standard) Zgbmv(tA blas.Transpose, m, n, kl, ku int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int)

func (*Standard) Zgeam ¶

func (impl *Standard) Zgeam(tA, tB blas.Transpose, m, n int, alpha complex128, a []complex128, lda int, beta complex128, b []complex128, ldb int, c []complex128, ldc int)

func (*Standard) Zgemm ¶

func (impl *Standard) Zgemm(tA, tB blas.Transpose, m, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)

func (*Standard) Zgemm3m ¶

func (impl *Standard) Zgemm3m(tA, tB blas.Transpose, m, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)

func (*Standard) Zgemv ¶

func (impl *Standard) Zgemv(tA blas.Transpose, m, n int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int)

func (*Standard) Zgerc ¶

func (impl *Standard) Zgerc(m, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int)

func (*Standard) Zgeru ¶

func (impl *Standard) Zgeru(m, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int)

func (*Standard) Zhbmv ¶

func (impl *Standard) Zhbmv(ul blas.Uplo, n, k int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int)

func (*Standard) Zhemm ¶

func (impl *Standard) Zhemm(s blas.Side, ul blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)

func (*Standard) Zhemv ¶

func (impl *Standard) Zhemv(ul blas.Uplo, n int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int)

func (*Standard) Zher ¶

func (impl *Standard) Zher(ul blas.Uplo, n int, alpha float64, x []complex128, incX int, a []complex128, lda int)

func (*Standard) Zher2 ¶

func (impl *Standard) Zher2(ul blas.Uplo, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int)

func (*Standard) Zher2k ¶

func (impl *Standard) Zher2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta float64, c []complex128, ldc int)

func (*Standard) Zherk ¶

func (impl *Standard) Zherk(ul blas.Uplo, t blas.Transpose, n, k int, alpha float64, a []complex128, lda int, beta float64, c []complex128, ldc int)

func (*Standard) Zherkx ¶

func (impl *Standard) Zherkx(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta float64, c []complex128, ldc int)

func (*Standard) Zhpmv ¶

func (impl *Standard) Zhpmv(ul blas.Uplo, n int, alpha complex128, aP, x []complex128, incX int, beta complex128, y []complex128, incY int)

func (*Standard) Zhpr ¶

func (impl *Standard) Zhpr(ul blas.Uplo, n int, alpha float64, x []complex128, incX int, aP []complex128)

func (*Standard) Zhpr2 ¶

func (impl *Standard) Zhpr2(ul blas.Uplo, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, aP []complex128)

func (*Standard) Zrot ¶

func (impl *Standard) Zrot(n int, x []complex128, incX int, y []complex128, incY int, cScalar float64, sScalar complex128)

func (*Standard) Zscal ¶

func (impl *Standard) Zscal(n int, alpha complex128, x []complex128, incX int)

func (*Standard) Zswap ¶

func (impl *Standard) Zswap(n int, x []complex128, incX int, y []complex128, incY int)

func (*Standard) Zsymm ¶

func (impl *Standard) Zsymm(s blas.Side, ul blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)

func (*Standard) Zsymv ¶

func (impl *Standard) Zsymv(ul blas.Uplo, n int, alpha complex128, a []complex128, lda int, x []complex128, incX int, beta complex128, y []complex128, incY int)

func (*Standard) Zsyr ¶

func (impl *Standard) Zsyr(ul blas.Uplo, n int, alpha complex128, x []complex128, incX int, a []complex128, lda int)

func (*Standard) Zsyr2 ¶

func (impl *Standard) Zsyr2(ul blas.Uplo, n int, alpha complex128, x []complex128, incX int, y []complex128, incY int, a []complex128, lda int)

func (*Standard) Zsyr2k ¶

func (impl *Standard) Zsyr2k(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)

func (*Standard) Zsyrk ¶

func (impl *Standard) Zsyrk(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, beta complex128, c []complex128, ldc int)

func (*Standard) Zsyrkx ¶

func (impl *Standard) Zsyrkx(ul blas.Uplo, t blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int)

func (*Standard) Ztbmv ¶

func (impl *Standard) Ztbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex128, lda int, x []complex128, incX int)

func (*Standard) Ztbsv ¶

func (impl *Standard) Ztbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []complex128, lda int, x []complex128, incX int)

func (*Standard) Ztpmv ¶

func (impl *Standard) Ztpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []complex128, incX int)

func (*Standard) Ztpsv ¶

func (impl *Standard) Ztpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, aP, x []complex128, incX int)

func (*Standard) Ztpttr ¶

func (impl *Standard) Ztpttr(ul blas.Uplo, n int, aP, a []complex128, lda int)

func (*Standard) Ztrmm ¶

func (impl *Standard) Ztrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int)

func (*Standard) Ztrmv ¶

func (impl *Standard) Ztrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex128, lda int, x []complex128, incX int)

func (*Standard) Ztrsm ¶

func (impl *Standard) Ztrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int)

func (*Standard) Ztrsv ¶

func (impl *Standard) Ztrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []complex128, lda int, x []complex128, incX int)

func (*Standard) Ztrttp ¶

func (impl *Standard) Ztrttp(ul blas.Uplo, n int, a []complex128, lda int, aP []complex128)

type Status ¶

type Status int

Status is the cublas status.

const (
	Success        Status = C.CUBLAS_STATUS_SUCCESS          // The operation completed successfully.
	NotInitialized Status = C.CUBLAS_STATUS_NOT_INITIALIZED  // The cuBLAS library was not initialized. This is usually caused by the lack of a prior cublasCreate() call,
	AllocFailed    Status = C.CUBLAS_STATUS_ALLOC_FAILED     // Resource allocation failed inside the cuBLAS library.
	InvalidValue   Status = C.CUBLAS_STATUS_INVALID_VALUE    // An unsupported value or parameter was passed to the function (a negative vector size, for example).
	ArchMismatch   Status = C.CUBLAS_STATUS_ARCH_MISMATCH    // The function requires a feature absent from the device architecture; usually caused by the lack of support for double precision.
	MappingError   Status = C.CUBLAS_STATUS_MAPPING_ERROR    // An access to GPU memory space failed, which is usually caused by a failure to bind a texture.
	ExecFailed     Status = C.CUBLAS_STATUS_EXECUTION_FAILED // The GPU program failed to execute. This is often caused by a launch failure of the kernel on the GPU, which can be caused by multiple reasons.
	InternalError  Status = C.CUBLAS_STATUS_INTERNAL_ERROR   // An internal cuBLAS operation failed. This error is usually caused by a cudaMemcpyAsync() failure.
	Unsupported    Status = C.CUBLAS_STATUS_NOT_SUPPORTED    // The functionnality requested is not supported
	LicenceError   Status = C.CUBLAS_STATUS_LICENSE_ERROR    // The functionnality requested requires some license and an error was detected when trying to check the current licensing.
)

func (Status) Error ¶

func (err Status) Error() string

func (Status) String ¶

func (err Status) String() string

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL