ml

package
v1.4.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 28, 2023 License: MIT Imports: 9 Imported by: 1

Documentation

Index

Constants

View Source
const (
	DEBUG = false

	MAX_DIMS   = 4
	MAX_NODES  = 4096
	MAX_PARAMS = 16
	MAX_OPT    = 4

	QK = 32 // quantization

	TOKEN_BOS = 1
	TOKEN_EOS = 2
)
View Source
const (
	OP_NONE optype = iota
	OP_DUP
	OP_ADD
	OP_SUB
	OP_MUL
	OP_DIV
	OP_SQR
	OP_SQRT
	OP_SUM
	OP_MEAN
	OP_REPEAT
	OP_ABS
	OP_SGN
	OP_NEG
	OP_STEP
	OP_RELU
	OP_GELU
	OP_SILU
	OP_NORM
	OP_RMS_NORM

	OP_MUL_MAT

	OP_SCALE
	OP_CPY
	OP_RESHAPE
	OP_VIEW
	OP_PERMUTE
	OP_TRANSPOSE
	OP_GET_ROWS
	OP_DIAG_MASK_INF
	OP_SOFT_MAX
	OP_ROPE
	OP_CONV_1D_1S
	OP_CONV_1D_2S

	OP_FLASH_ATTN
	OP_FLASH_FF

	OP_COUNT
)
View Source
const MaxMem = 0 // 28_000_000_000
View Source
const MaxPool = 0 // 2_000_000_000

TODO: Precompute max needed RAM size

View Source
const NewLineToken = 13 // ml.Tokenize(Ctx.Vocab, "\n", false)[0]

Variables

View Source
var BLCK_SIZE [TYPE_COUNT]uint32 = [TYPE_COUNT]uint32{1, 1, QK, QK, 1, 1, 1, 0}
View Source
var TYPE_SIZE [TYPE_COUNT]uint32 = [TYPE_COUNT]uint32{4, 2, 4 + QK/2, 4*2 + QK/2, 1, 2, 4, 0}
View Source
var TableExpFP16 [1 << 16]float16.Float16

precomputed exp table for f16 (128 KB) static ggml_fp16_t table_exp_f16[1 << 16];

Functions

func AreSameShape

func AreSameShape(a, b *Tensor) bool

func BuildForwardExpand

func BuildForwardExpand(graph *Graph, tensor *Tensor)

ggml_build_forward_expand

func BuildForwardImpl

func BuildForwardImpl(graph *Graph, tensor *Tensor, expand bool)

ggml_build_forward_impl

func CanMulMat

func CanMulMat(t0, t1 *Tensor) bool

ggml_can_mul_mat

func CheckGraph added in v1.2.0

func CheckGraph()

TODO: Implement all the tensor asserts BEFORE the real computing

func ComputeBackward

func ComputeBackward(ctx *Context, tensor *Tensor, inplace bool)

func ComputeForward

func ComputeForward(ctx *Context, graph *Graph, params *ComputeParams, tensor *Tensor)

func ComputeForwardAddFP32

func ComputeForwardAddFP32(params *ComputeParams, src0, src1, dst *Tensor)

ggml_compute_forward_add

func ComputeForwardCopy

func ComputeForwardCopy(params *ComputeParams, src0, dst *Tensor)

func ComputeForwardDiagMaskInfFP32

func ComputeForwardDiagMaskInfFP32(params *ComputeParams, src0, src1, dst *Tensor)

ggml_compute_forward_diag_mask_inf

func ComputeForwardDupFP32

func ComputeForwardDupFP32(params *ComputeParams, src0, dst *Tensor)

ggml_compute_forward_dup_f32

func ComputeForwardGetRows

func ComputeForwardGetRows(params *ComputeParams, src0, src1, dst *Tensor)

ggml_compute_forward_get_rows_f32

func ComputeForwardMulFP32

func ComputeForwardMulFP32(params *ComputeParams, src0, src1, dst *Tensor)

ggml_compute_forward_mul

func ComputeForwardMulMatFP32

func ComputeForwardMulMatFP32(params *ComputeParams, src0, src1, dst *Tensor)

ggml_compute_forward_mul_mat_f32

func ComputeForwardPermute

func ComputeForwardPermute(params *ComputeParams, src0 *Tensor)

ggml_compute_forward_permute

func ComputeForwardRMSNormFP32

func ComputeForwardRMSNormFP32(params *ComputeParams, src0, dst *Tensor)

ggml_compute_forward_rms_norm_f32

func ComputeForwardRepeatFP32

func ComputeForwardRepeatFP32(params *ComputeParams, src0, dst *Tensor)

ggml_compute_forward_repeat

func ComputeForwardReshape

func ComputeForwardReshape(params *ComputeParams, src0, dst *Tensor)

ggml_compute_forward_reshape

func ComputeForwardRopeFP32

func ComputeForwardRopeFP32(params *ComputeParams, src0, src1, dst *Tensor)

ggml_compute_forward_rope

func ComputeForwardScaleFP32

func ComputeForwardScaleFP32(params *ComputeParams, src0, src1, dst *Tensor)

ggml_compute_forward_scale_f32

func ComputeForwardSiluFP32

func ComputeForwardSiluFP32(params *ComputeParams, src0, dst *Tensor)

ggml_compute_forward_silu

func ComputeForwardSoftMaxFP32

func ComputeForwardSoftMaxFP32(params *ComputeParams, src0, dst *Tensor)

ggml_compute_forward_soft_max

func ComputeForwardView

func ComputeForwardView(params *ComputeParams, src0 *Tensor)

ggml_compute_forward_view

func Do added in v1.2.0

func Do(params *ComputeParams, id int)

Do is an experimental alternative for always waiting Job threads

func GraphCompute

func GraphCompute(ctx *Context, graph *Graph)

func Init

func Init(params InitParams)

TODO Do we need this?

func IsMatrix

func IsMatrix(tensor *Tensor) bool

func IsScalar

func IsScalar(tensor *Tensor) bool

func IsVector

func IsVector(tensor *Tensor) bool

func Job

func Job(listen <-chan *ComputeParams, id int)

Job is goroutine existing while the computation loop is active The main purpose of the Job is to perform some part of time consuming matrix multiplications TODO: Investigate https://pkg.go.dev/runtime#LockOSThread

func SiluFP32

func SiluFP32(x float32) float32

Sigmoid Linear Unit (SiLU) function

func Token2Str

func Token2Str(vocab *Vocab, token uint32) string

func Tokenize

func Tokenize(vocab *Vocab, text string, bos bool) []uint32

void tokenize(const std::string & text, std::vector<llama_vocab::id> & output) {

func TryAddBigram

func TryAddBigram(vocab *Vocab, symbols []Symbol, workQueue *[]Bigram, left, right int)

func TypeSizeFloat

func TypeSizeFloat(dt DType) float32

func VecAccFP32

func VecAccFP32(n uint32, y, x []float32)

ggml_vec_acc_f32

func VecAddFP32

func VecAddFP32(n uint32, z, x, y []float32)

inline static void ggml_vec_add_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i] + y[i]; }

func VecCopyFP32

func VecCopyFP32(n uint32, y, x []float32)

func VecDotFP32

func VecDotFP32(n uint32, x, y []float32) float32

ggml_vec_dot_f32

func VecMadFP32

func VecMadFP32(n uint32, y, x []float32, v float32)

ggml_vec_mad_f32

func VecMaxFP32

func VecMaxFP32(n uint32, x []float32) float32

func VecMulFP32

func VecMulFP32(n uint32, z, x, y []float32)

func VecScaleFP32

func VecScaleFP32(n uint32, y []float32, v float32)

ggml_vec_scale_f32

func VecSiluFP32

func VecSiluFP32(n uint32, y, x []float32)

inline static void ggml_vec_silu_f32(const int n, float * y, const float * x) {

func VisitParents

func VisitParents(graph *Graph, node *Tensor)

ggml_visit_parents

Types

type Allocator added in v1.3.0

type Allocator struct {
	sync.Mutex

	PoolSize int
	MemSize  int

	Pool []byte
	Mem  []byte
}

Allocator is an experimental memory pool for FP32 slices TODO: Investigate https://github.com/valyala/bytebufferpool

func NewAllocator added in v1.3.0

func NewAllocator() *Allocator

func (*Allocator) Get added in v1.3.0

func (a *Allocator) Get(size uint32) *[]float32

Get new or reuse memory buffer of size bytes

func (*Allocator) GetFixed added in v1.3.0

func (a *Allocator) GetFixed(size uint32) *[]float32

Get fixed memory buffer of size bytes

func (*Allocator) Reset added in v1.3.0

func (a *Allocator) Reset()

type Bigram

type Bigram struct {

	// NB! Allow -1
	Left  int
	Right int

	Score float32
	Size  uint32
}

struct llama_sp_bigram {

func PopMax

func PopMax(queue *[]Bigram) Bigram

type ComputeParams

type ComputeParams struct {
	Type TaskType

	UseAVX  bool
	UseNEON bool
	// contains filtered or unexported fields
}

type Context

type Context struct {
	MaxThreads int
	UseAVX     bool
	UseNEON    bool
	//Graph      *Graph
	Compute   chan *ComputeParams
	Allocator *Allocator
}

func NewContext added in v1.3.0

func NewContext(maxThreads int, useAVX, useNEON bool) *Context

func (*Context) ReleaseContext added in v1.4.0

func (ctx *Context) ReleaseContext()

ReleaseContext frees all context resources - channel will be closed and goroutines stopped

type DType

type DType uint8
const (
	TYPE_F32   DType = 0
	TYPE_F16   DType = 1
	TYPE_Q4_0  DType = 2
	TYPE_Q4_1  DType = 3
	TYPE_I8    DType = 4
	TYPE_I16   DType = 5
	TYPE_I32   DType = 6
	TYPE_COUNT DType = 8
)

Data types are the same as in llama.cpp so full compatibility there

type Graph

type Graph struct {
	NodesCount uint32
	LeafsCount uint32

	Jobs chan *ComputeParams

	Nodes [MAX_NODES]*Tensor
	Grads [MAX_NODES]*Tensor
	Leafs [MAX_NODES]*Tensor
}

computation graph

func BuildBackward

func BuildBackward(ctx *Context, gf *Graph, keep bool) Graph

func BuildForward

func BuildForward(tensor *Tensor) *Graph

type InitParams

type InitParams struct {
}

type Symbol

type Symbol struct {

	// NB! Allow -1
	Prev int
	Next int

	Text string
	N    uint32
}

struct llama_sp_symbol {

type TaskType

type TaskType uint8
const (
	TASK_INIT     TaskType = 0
	TASK_COMPUTE  TaskType = 1
	TASK_FINALIZE TaskType = 2
)

type Tensor

type Tensor struct {
	Type DType

	Reusable bool // this tensor Data buffer might be reused with pooling

	Dims uint32

	NE [MAX_DIMS]uint32 // number of elements
	NB [MAX_DIMS]uint32 // stride in bytes

	TasksCount int

	Data []float32
	// contains filtered or unexported fields
}

func Add

func Add(ctx *Context, a, b *Tensor) *Tensor

func AddImpl

func AddImpl(ctx *Context, a, b *Tensor, inplace bool) *Tensor

ggml_add

func AddInplace

func AddInplace(ctx *Context, a, b *Tensor) *Tensor

func Copy

func Copy(ctx *Context, a, b *Tensor) *Tensor

func CopyImpl

func CopyImpl(ctx *Context, a, b *Tensor, inplace bool) *Tensor

ggml_cpy

func CopyInplace

func CopyInplace(ctx *Context, a, b *Tensor) *Tensor

func DiagMaskInf

func DiagMaskInf(ctx *Context, a *Tensor, past uint32) *Tensor

ggml_diag_mask_inf

func Div

func Div(ctx *Context, a, b *Tensor) *Tensor

func DivImpl

func DivImpl(ctx *Context, a, b *Tensor, inplace bool) *Tensor

ggml_div

func DivInplace

func DivInplace(ctx *Context, a, b *Tensor, inplace bool) *Tensor

func DupTensor

func DupTensor(ctx *Context, src *Tensor) *Tensor

ggml_dup_tensor

func GetRows

func GetRows(ctx *Context, a, b *Tensor) *Tensor

ggml_get_rows

func Mul

func Mul(ctx *Context, a, b *Tensor) *Tensor

struct ggml_tensor * Mul(

func MulImpl

func MulImpl(ctx *Context, a, b *Tensor, inplace bool) *Tensor

struct ggml_tensor * Mul_impl(

func MulInplace

func MulInplace(ctx *Context, a, b *Tensor) *Tensor

struct ggml_tensor * Mul_inplace(

func MulMat

func MulMat(ctx *Context, a, b *Tensor) *Tensor

ggml_mul_mat

func NewFP32

func NewFP32(ctx *Context, value float32) *Tensor

ggml_new_f32

func NewTensor

func NewTensor(ctx *Context, dt DType, dims uint32, ne0, ne1, ne2, ne3 uint32, data []float32) *Tensor

ggml_new_tensor_impl

func NewTensor1D

func NewTensor1D(ctx *Context, dt DType, ne0 uint32) *Tensor

ggml_new_tensor_1d

func NewTensor2D

func NewTensor2D(ctx *Context, dt DType, ne0, ne1 uint32) *Tensor

ggml_new_tensor_2d

func NewTensor3D

func NewTensor3D(ctx *Context, dt DType, ne0, ne1, ne2 uint32) *Tensor

func NewTensor4D

func NewTensor4D(ctx *Context, dt DType, ne0, ne1, ne2, ne3 uint32) *Tensor

func Permute

func Permute(ctx *Context, a *Tensor, axis0, axis1, axis2, axis3 uint32) *Tensor

ggml_permute

func RMSNorm

func RMSNorm(ctx *Context, a *Tensor) *Tensor

func RMSNormImpl

func RMSNormImpl(ctx *Context, a *Tensor, inplace bool) *Tensor

ggml_rms_norm_impl

func RMSNormInplace

func RMSNormInplace(ctx *Context, a *Tensor) *Tensor

func Repeat

func Repeat(ctx *Context, a, b *Tensor) *Tensor

struct ggml_tensor * Repeat(

func Reshape3D

func Reshape3D(ctx *Context, a *Tensor, ne0, ne1, ne2 uint32) *Tensor

func Rope

func Rope(ctx *Context, a *Tensor, past, dims, mode uint32) *Tensor

ggml_rope

func Scale

func Scale(ctx *Context, a, b *Tensor) *Tensor

func ScaleImpl

func ScaleImpl(ctx *Context, a, b *Tensor, inplace bool) *Tensor

ggml_scale

func ScaleInplace

func ScaleInplace(ctx *Context, a, b *Tensor) *Tensor

func SetFP32

func SetFP32(tensor *Tensor, value float32) *Tensor

ggml_set_f32

func Sgn

func Sgn(ctx *Context, a *Tensor) *Tensor

func SgnImpl

func SgnImpl(ctx *Context, a *Tensor, inplace bool) *Tensor

ggml_sgn

func SgnInplace

func SgnInplace(ctx *Context, a *Tensor) *Tensor

func Silu

func Silu(ctx *Context, a *Tensor) *Tensor

func SiluImpl

func SiluImpl(ctx *Context, a *Tensor, inplace bool) *Tensor

func SiluInplace

func SiluInplace(ctx *Context, a *Tensor) *Tensor

func SoftMax

func SoftMax(ctx *Context, a *Tensor) *Tensor

ggml_soft_max

func Step

func Step(ctx *Context, a *Tensor) *Tensor

func StepImpl

func StepImpl(ctx *Context, a *Tensor, inplace bool) *Tensor

ggml_step

func StepInplace

func StepInplace(ctx *Context, a *Tensor) *Tensor

func Sub

func Sub(ctx *Context, a, b *Tensor) *Tensor

func SubImpl

func SubImpl(ctx *Context, a, b *Tensor, inplace bool) *Tensor

ggml_sub

func SubInplace

func SubInplace(ctx *Context, a, b *Tensor) *Tensor

func Sum

func Sum(ctx *Context, a *Tensor) *Tensor

ggml_sum

func Transpose

func Transpose(ctx *Context, a *Tensor) *Tensor

func View1D

func View1D(ctx *Context, a *Tensor, ne0 uint32, offset uint32) *Tensor

ggml_view_1d NB! Originally offset in bytes, but here in floats (4-bytes)

func ViewTensor

func ViewTensor(ctx *Context, src *Tensor) *Tensor

ggml_view_tensor

func (*Tensor) IsContiguous

func (tensor *Tensor) IsContiguous() bool

ggml_is_contiguous

func (*Tensor) Nbytes

func (t *Tensor) Nbytes() uint32

ggml_nbytes

func (*Tensor) Nelements

func (t *Tensor) Nelements() uint32

func (*Tensor) Nrows

func (t *Tensor) Nrows() uint32

type TokenScore

type TokenScore struct {
	Token string
	Score float32
}

type Vocab

type Vocab struct {
	Size     uint32
	Token2ID map[string]uint32
	ID2Token []TokenScore
}

func NewVocab

func NewVocab(size uint32) *Vocab

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL