tensor

package

v1.8.0 Latest Latest Go to latest Published: Apr 29, 2026 License: Apache-2.0 Imports: 19 Imported by: 57

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/zerfoo/ztensor

Links

Documentation ¶

Rendered for

Overview ¶

Package tensor provides a multi-dimensional array (tensor) implementation.

Index ¶

Variables
func AssertClose[T Numeric](t *testing.T, expected, actual *TensorNumeric[T], tolerance float64)
func AssertEquals[T Numeric](t *testing.T, expected, actual *TensorNumeric[T])
func BroadcastIndex(index int, shape, outputShape []int, broadcast bool) int
func BroadcastShapes(a, b []int) (shape []int, broadcastA, broadcastB bool, err error)
func ConvertInt64ToInt(s []int64) []int
func ConvertIntToInt64(s []int) []int64
func DequantizeIQ3S(raw []byte, dst []float32)
func DequantizeIQ4NL(raw []byte, dst []float32)
func DequantizeQ4K(raw []byte, dst []float32)
func DequantizeQ5K(raw []byte, dst []float32)
func DequantizeQ5_0(raw []byte, dst []float32)
func DequantizeQ6K(raw []byte, dst []float32)
func Equals[T Numeric](a, b *TensorNumeric[T]) bool
func Float32ToBytes(f []float32) ([]byte, error)
func GemmF32W8A8NT(m, n, k int, a []float32, b *W8A8Storage, c []float32)
func GemmW8A8(m, n, k int, a, b *W8A8Storage, c []float32)
func GemmW8A8NT(m, n, k int, a, b *W8A8Storage, c []float32)
func Int8ToBytes(i []int8) ([]byte, error)
func ListQuantTypes() []string
func MadviseDontNeed(data []byte) error
func MadviseRandom(data []byte) error
func MadviseSequential(data []byte) error
func MadviseWillNeed(data []byte) error
func Mmap(fd uintptr, offset int64, length int) ([]byte, error)
func MmapFile(path string) (data []byte, closer func() error, err error)
func Munmap(data []byte) error
func Ones[T Numeric](size int) []T
func Product(s []int) int
func Q4GPUDataOffset(totalBlocks int) int
func Q4GPUScaleOffset() int
func Q5_0GPUQhOffset(totalBlocks int) int
func Q5_0GPUQsOffset(totalBlocks int) int
func RegisterQuantType(name string, d Dequantizer)
func SameShape(a, b []int) bool
func ShapesEqual(a, b []int) bool
func Uint8ToBytes(u []uint8) ([]byte, error)
type AWQStorage
- func NewAWQStorageFromRaw(data []uint32, scales, zeros []float16.Float16, numElements, groupSize int) (*AWQStorage, error)
- func QuantizeAWQ(src []float32, groupSize int) *AWQStorage
- func (s *AWQStorage) ByteSize() int
- func (s *AWQStorage) Dequantize(dst []float32)
- func (s *AWQStorage) DeviceType() device.Type
- func (s *AWQStorage) GPUPtr() (unsafe.Pointer, int, int)
- func (s *AWQStorage) GroupSize() int
- func (s *AWQStorage) Len() int
- func (s *AWQStorage) NumGroups() int
- func (s *AWQStorage) Set(_ []float32)
- func (s *AWQStorage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)
- func (s *AWQStorage) Slice() []float32
type Addable
type BFloat16Storage
- func NewBFloat16Storage(src []float32) *BFloat16Storage
- func NewBFloat16StorageFromRaw(data []uint16) *BFloat16Storage
- func (s *BFloat16Storage) DeviceType() device.Type
- func (s *BFloat16Storage) GPUPtr() (unsafe.Pointer, int, int)
- func (s *BFloat16Storage) Len() int
- func (s *BFloat16Storage) RawBytes() []byte
- func (s *BFloat16Storage) Set(data []float32)
- func (s *BFloat16Storage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)
- func (s *BFloat16Storage) Slice() []float32
type CPUStorage
- func NewCPUStorage[T Numeric](data []T) *CPUStorage[T]
- func (s *CPUStorage[T]) DeviceType() device.Type
- func (s *CPUStorage[T]) Len() int
- func (s *CPUStorage[T]) Set(data []T)
- func (s *CPUStorage[T]) Slice() []T
type Dequantizer
- func GetQuantType(name string) (Dequantizer, bool)
type FP8E4M3Storage
- func NewFP8E4M3Storage(src []float32) *FP8E4M3Storage
- func (s *FP8E4M3Storage) DeviceType() device.Type
- func (s *FP8E4M3Storage) GPUPtr() (unsafe.Pointer, int, int)
- func (s *FP8E4M3Storage) Len() int
- func (s *FP8E4M3Storage) RawBytes() []byte
- func (s *FP8E4M3Storage) Scale() float32
- func (s *FP8E4M3Storage) ScaleGPUPtr() unsafe.Pointer
- func (s *FP8E4M3Storage) Set(data []float32)
- func (s *FP8E4M3Storage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)
- func (s *FP8E4M3Storage) SetScaleGPUPtr(ptr unsafe.Pointer)
- func (s *FP8E4M3Storage) Slice() []float32
type FP8E5M2Storage
- func NewFP8E5M2Storage(src []float32) *FP8E5M2Storage
- func (s *FP8E5M2Storage) DeviceType() device.Type
- func (s *FP8E5M2Storage) Len() int
- func (s *FP8E5M2Storage) Scale() float32
- func (s *FP8E5M2Storage) Set(data []float32)
- func (s *FP8E5M2Storage) Slice() []float32
type Float
type Float16Storage
- func NewFloat16StorageFromF32(src []float32) *Float16Storage
- func NewFloat16StorageFromRaw(raw []byte, numElems int) *Float16Storage
- func NewFloat16StorageGPU(ptr unsafe.Pointer, numElems, deviceID int) *Float16Storage
- func (s *Float16Storage) DeviceType() device.Type
- func (s *Float16Storage) GPUPtr() (unsafe.Pointer, int, int)
- func (s *Float16Storage) Len() int
- func (s *Float16Storage) RawBytes() []byte
- func (s *Float16Storage) Set(data []float32)
- func (s *Float16Storage) SetGPUByteSize(byteSize int)
- func (s *Float16Storage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)
- func (s *Float16Storage) Slice() []float32
- func (s *Float16Storage) SubSlice(offset, length int) *Float16Storage
type GGMLType
type GPTQStorage
- func NewGPTQStorageFromRaw(data []byte, scales, zeros []float16.Float16, numElements, groupSize, bits int) (*GPTQStorage, error)
- func QuantizeGPTQ(src []float32, groupSize, bits int) *GPTQStorage
- func (s *GPTQStorage) Bits() int
- func (s *GPTQStorage) ByteSize() int
- func (s *GPTQStorage) Dequantize(dst []float32)
- func (s *GPTQStorage) DeviceType() device.Type
- func (s *GPTQStorage) GPUPtr() (unsafe.Pointer, int, int)
- func (s *GPTQStorage) GroupSize() int
- func (s *GPTQStorage) Len() int
- func (s *GPTQStorage) NumGroups() int
- func (s *GPTQStorage) Set(_ []float32)
- func (s *GPTQStorage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)
- func (s *GPTQStorage) Slice() []float32
type GPUStorage
- func NewGPUStorage[T Numeric](length int, deviceID ...int) (*GPUStorage[T], error)
- func NewGPUStorageFromPool[T Numeric](devPtr unsafe.Pointer, length int, pool gpuapi.MemPool, deviceID int) (*GPUStorage[T], error)
- func NewGPUStorageFromPtr[T Numeric](devPtr unsafe.Pointer, length int, deviceID ...int) (*GPUStorage[T], error)
- func NewGPUStorageFromSlice[T Numeric](data []T, deviceID ...int) (*GPUStorage[T], error)
- func NewGPUStorageView[T Numeric](parent *GPUStorage[T], offsetElems, length int) *GPUStorage[T]
- func NewGPUStorageViewFromPtr[T Numeric](devPtr unsafe.Pointer, length int, deviceID int) *GPUStorage[T]
- func NewManagedGPUStorage[T Numeric](pool gpuapi.MemPool, length int, deviceID ...int) (*GPUStorage[T], error)
- func (s *GPUStorage[T]) ByteSize() int
- func (s *GPUStorage[T]) CopyFromDevice(src *GPUStorage[T], dstOffsetElems, srcOffsetElems, numElems int) error
- func (s *GPUStorage[T]) CopyFromDeviceAsync(src *GPUStorage[T], dstOffsetElems, srcOffsetElems, numElems int, ...) error
- func (s *GPUStorage[T]) CopyFromHost(data []T, dstOffsetElems int) error
- func (s *GPUStorage[T]) CopyFromHostAsync(data []T, dstOffsetElems int, stream gpuapi.Stream) error
- func (s *GPUStorage[T]) CopyTo(dst []T) error
- func (s *GPUStorage[T]) DeviceID() int
- func (s *GPUStorage[T]) DeviceType() device.Type
- func (s *GPUStorage[T]) Free() error
- func (s *GPUStorage[T]) Len() int
- func (s *GPUStorage[T]) Managed() bool
- func (s *GPUStorage[T]) Ptr() unsafe.Pointer
- func (s *GPUStorage[T]) Set(data []T)
- func (s *GPUStorage[T]) Slice() []T
- func (s *GPUStorage[T]) SubSlice(offsetElems, length int) *GPUStorage[T]
- func (s *GPUStorage[T]) TrySet(data []T) error
- func (s *GPUStorage[T]) TrySlice() ([]T, error)
- func (s *GPUStorage[T]) View(length int) *GPUStorage[T]
type IQ2XXSStorage
- func NewIQ2XXSStorage(numElements int) *IQ2XXSStorage
- func (s *IQ2XXSStorage) Dequantize() []float32
- func (s *IQ2XXSStorage) DeviceType() device.Type
- func (s *IQ2XXSStorage) Len() int
- func (s *IQ2XXSStorage) MarshalBinary() ([]byte, error)
- func (s *IQ2XXSStorage) RawBytes() []byte
- func (s *IQ2XXSStorage) RawScales() []float32
- func (s *IQ2XXSStorage) SetBlock(blockIdx int, scale float32, data []byte)
- func (s *IQ2XXSStorage) UnmarshalBinary(buf []byte) error
type IQ3SStorage
- func NewIQ3SStorageFromRaw(raw []byte, numElements int) (*IQ3SStorage, error)
- func (q *IQ3SStorage) Dequantize(dst []float32)
- func (q *IQ3SStorage) DeviceType() device.Type
- func (q *IQ3SStorage) GPUPtr() (unsafe.Pointer, int, int)
- func (q *IQ3SStorage) Len() int
- func (q *IQ3SStorage) NumBlocks() int
- func (q *IQ3SStorage) RawBytes() []byte
- func (q *IQ3SStorage) Set(_ []float32)
- func (q *IQ3SStorage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)
- func (q *IQ3SStorage) Slice() []float32
type IQ4NLStorage
- func MergeIQ4NLStorage(storages ...*IQ4NLStorage) *IQ4NLStorage
- func NewIQ4NLStorageFromRaw(raw []byte, numElements int) (*IQ4NLStorage, error)
- func (q *IQ4NLStorage) Dequantize(dst []float32)
- func (q *IQ4NLStorage) DeviceType() device.Type
- func (q *IQ4NLStorage) GPUPtr() (unsafe.Pointer, int, int)
- func (q *IQ4NLStorage) Len() int
- func (q *IQ4NLStorage) NumBlocks() int
- func (q *IQ4NLStorage) RawBytes() []byte
- func (q *IQ4NLStorage) Set(_ []float32)
- func (q *IQ4NLStorage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)
- func (q *IQ4NLStorage) Slice() []float32
type MmapStorage
- func NewMmapStorage(data []byte, length int, qtype GGMLType) (*MmapStorage, error)
- func (s *MmapStorage) ByteSize() int
- func (s *MmapStorage) DeviceType() device.Type
- func (s *MmapStorage) GPUPtr() (unsafe.Pointer, int, int)
- func (s *MmapStorage) Len() int
- func (s *MmapStorage) Q4KBlockRaw(blockIdx int) []byte
- func (s *MmapStorage) Q5KBlockRaw(blockIdx int) []byte
- func (s *MmapStorage) Q6KBlockRaw(blockIdx int) []byte
- func (s *MmapStorage) QType() GGMLType
- func (s *MmapStorage) RawBytes() []byte
- func (s *MmapStorage) RawBytesGPU() []byte
- func (s *MmapStorage) Set(_ []float32)
- func (s *MmapStorage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)
- func (s *MmapStorage) Slice() []float32
- func (s *MmapStorage) SliceElements(start, end int) (*MmapStorage, error)
type NF4Storage
- func NewNF4Storage(src []float32, shape []int) *NF4Storage
- func (s *NF4Storage) ByteSize() int64
- func (s *NF4Storage) Dequantize() []float32
- func (s *NF4Storage) DeviceType() device.Type
- func (s *NF4Storage) Len() int
- func (s *NF4Storage) NumBlocks() int
- func (s *NF4Storage) Quantize(src []float32) error
- func (s *NF4Storage) Set(data []float32)
- func (s *NF4Storage) Slice() []float32
type NVFloat4Storage
- func NewNVFloat4Storage(src []float32, shape []int) *NVFloat4Storage
- func (s *NVFloat4Storage) ByteSize() int
- func (s *NVFloat4Storage) Dequantize() []float32
- func (s *NVFloat4Storage) DeviceType() device.Type
- func (s *NVFloat4Storage) Len() int
- func (s *NVFloat4Storage) NumBlocks() int
- func (s *NVFloat4Storage) Quantize(data []float32) error
- func (s *NVFloat4Storage) Set(data []float32)
- func (s *NVFloat4Storage) Slice() []float32
type Numeric
type Q4KStorage
- func MergeQ4KStorage(storages ...*Q4KStorage) *Q4KStorage
- func NewQ4KStorageFromRaw(raw []byte, numElements int) (*Q4KStorage, error)
- func QuantizeQ4K(src []float32) *Q4KStorage
- func (q *Q4KStorage) Dequantize(dst []float32)
- func (q *Q4KStorage) DequantizeSubBlock(blkIdx, subIdx int, dst []float32)
- func (q *Q4KStorage) DeviceType() device.Type
- func (q *Q4KStorage) GPUPtr() (unsafe.Pointer, int, int)
- func (q *Q4KStorage) Len() int
- func (q *Q4KStorage) NumBlocks() int
- func (q *Q4KStorage) RawBytes() []byte
- func (q *Q4KStorage) Set(_ []float32)
- func (q *Q4KStorage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)
- func (q *Q4KStorage) Slice() []float32
type Q4Storage
- func MergeQ4Storage(storages ...*Q4Storage) *Q4Storage
- func NewQ4StorageFromRaw(raw []byte, numElements int) (*Q4Storage, error)
- func QuantizeQ4(src []float32) *Q4Storage
- func (q *Q4Storage) BlockData(i int) *byte
- func (q *Q4Storage) BlockPtr(i int) *byte
- func (q *Q4Storage) BlockScaleF32(i int) float32
- func (q *Q4Storage) ByteSize() int
- func (q *Q4Storage) Dequantize(dst []float32)
- func (q *Q4Storage) DeviceType() device.Type
- func (q *Q4Storage) GPUPtr() (unsafe.Pointer, int, int)
- func (q *Q4Storage) Len() int
- func (q *Q4Storage) NumBlocks() int
- func (q *Q4Storage) RawBytes() []byte
- func (q *Q4Storage) RawBytesGPU(blocksPerRow int) []byte
- func (q *Q4Storage) Set(_ []float32)
- func (q *Q4Storage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)
- func (q *Q4Storage) Slice() []float32
type Q5KStorage
- func NewQ5KStorageFromRaw(raw []byte, numElements int) (*Q5KStorage, error)
- func (q *Q5KStorage) BlockRaw(blockIdx int) []byte
- func (q *Q5KStorage) Dequantize(dst []float32)
- func (q *Q5KStorage) DeviceType() device.Type
- func (q *Q5KStorage) GPUPtr() (unsafe.Pointer, int, int)
- func (q *Q5KStorage) Len() int
- func (q *Q5KStorage) NumBlocks() int
- func (q *Q5KStorage) RawBytes() []byte
- func (q *Q5KStorage) Set(_ []float32)
- func (q *Q5KStorage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)
- func (q *Q5KStorage) Slice() []float32
type Q5_0Storage
- func NewQ5_0StorageFromRaw(raw []byte, numElements int) (*Q5_0Storage, error)
- func (q *Q5_0Storage) Dequantize(dst []float32)
- func (q *Q5_0Storage) DeviceType() device.Type
- func (q *Q5_0Storage) GPUPtr() (unsafe.Pointer, int, int)
- func (q *Q5_0Storage) Len() int
- func (q *Q5_0Storage) NumBlocks() int
- func (q *Q5_0Storage) RawBytes() []byte
- func (q *Q5_0Storage) RawBytesGPU() []byte
- func (q *Q5_0Storage) Set(_ []float32)
- func (q *Q5_0Storage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)
- func (q *Q5_0Storage) Slice() []float32
type Q6KStorage
- func MergeQ6KStorage(storages ...*Q6KStorage) *Q6KStorage
- func NewQ6KStorageFromRaw(raw []byte, numElements int) (*Q6KStorage, error)
- func (q *Q6KStorage) BlockRaw(blockIdx int) []byte
- func (q *Q6KStorage) Dequantize(dst []float32)
- func (q *Q6KStorage) DeviceType() device.Type
- func (q *Q6KStorage) GPUPtr() (unsafe.Pointer, int, int)
- func (q *Q6KStorage) Len() int
- func (q *Q6KStorage) NumBlocks() int
- func (q *Q6KStorage) RawBytes() []byte
- func (q *Q6KStorage) Set(_ []float32)
- func (q *Q6KStorage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)
- func (q *Q6KStorage) Slice() []float32
type Q8Storage
- func NewQ8StorageFromBlocks(scales []float32, quants []int8, numElements int) (*Q8Storage, error)
- func QuantizeQ8(src []float32) *Q8Storage
- func (q *Q8Storage) BlockQuants(i int) []int8
- func (q *Q8Storage) BlockScale(i int) float32
- func (q *Q8Storage) ByteSize() int
- func (q *Q8Storage) Dequantize(dst []float32)
- func (q *Q8Storage) DequantizeBlock(blockIdx int, dst *[32]float32)
- func (q *Q8Storage) DequantizeRange(dst []float32, start, count int)
- func (q *Q8Storage) DeviceType() device.Type
- func (q *Q8Storage) GPUPtr() (unsafe.Pointer, int, int)
- func (q *Q8Storage) Len() int
- func (q *Q8Storage) NumBlocks() int
- func (q *Q8Storage) RawBytes() []byte
- func (q *Q8Storage) Set(_ []float32)
- func (q *Q8Storage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)
- func (q *Q8Storage) Slice() []float32
type Storage
type Tensor
- func NewFromType(t reflect.Type, shape []int, data any) (Tensor, error)
type TensorBool
- func NewBool(shape []int, data []bool) (*TensorBool, error)
- func (t *TensorBool) Bytes() ([]byte, error)
- func (t *TensorBool) DType() reflect.Type
- func (t *TensorBool) Data() []bool
- func (t *TensorBool) Shape() []int
type TensorNumeric
- func New[T Numeric](shape []int, data []T) (*TensorNumeric[T], error)
- func NewFromBytes[T Numeric](shape []int, data []byte) (*TensorNumeric[T], error)
- func NewWithStorage[T Numeric](shape []int, s Storage[T]) (*TensorNumeric[T], error)
- func ToCPU[T Numeric](t *TensorNumeric[T]) *TensorNumeric[T]
- func ToGPU[T Numeric](t *TensorNumeric[T]) (*TensorNumeric[T], error)
- func ToGPUDevice[T Numeric](t *TensorNumeric[T], deviceID int) (*TensorNumeric[T], error)
- func (t *TensorNumeric[T]) At(indices ...int) (T, error)
- func (t *TensorNumeric[T]) Bytes() ([]byte, error)
- func (t *TensorNumeric[T]) Copy() *TensorNumeric[T]
- func (t *TensorNumeric[T]) DType() reflect.Type
- func (t *TensorNumeric[T]) Data() []T
- func (t *TensorNumeric[T]) Dims() int
- func (t *TensorNumeric[T]) Each(fn func(T))
- func (t *TensorNumeric[T]) GetStorage() Storage[T]
- func (t *TensorNumeric[T]) Release()
- func (t *TensorNumeric[T]) Reshape(newShape []int) (*TensorNumeric[T], error)
- func (t *TensorNumeric[T]) Set(value T, indices ...int) error
- func (t *TensorNumeric[T]) SetData(data []T)
- func (t *TensorNumeric[T]) SetShape(shape []int)
- func (t *TensorNumeric[T]) SetStorage(s Storage[T])
- func (t *TensorNumeric[T]) SetStrides(strides []int)
- func (t *TensorNumeric[T]) Shape() []int
- func (t *TensorNumeric[T]) ShapeEquals(other *TensorNumeric[T]) bool
- func (t *TensorNumeric[T]) Size() int
- func (t *TensorNumeric[T]) Slice(ranges ...[2]int) (*TensorNumeric[T], error)
- func (t *TensorNumeric[T]) Strides() []int
- func (t *TensorNumeric[T]) String() string
type TensorString
- func NewString(shape []int, data []string) (*TensorString, error)
- func (t *TensorString) DType() reflect.Type
- func (t *TensorString) Data() []string
- func (t *TensorString) Shape() []int
type TernaryStorage
- func NewTernaryStorage(size int) *TernaryStorage
- func NewTernaryStorageFrom(values []int8) *TernaryStorage
- func (s *TernaryStorage) DeviceType() device.Type
- func (s *TernaryStorage) Get(i int) int8
- func (s *TernaryStorage) Len() int
- func (s *TernaryStorage) RawBytes() []byte
- func (s *TernaryStorage) Set(data []float32)
- func (s *TernaryStorage) SetElement(i int, val int8)
- func (s *TernaryStorage) Slice() []float32
type W8A8Storage
- func NewW8A8StorageFromBlocks(scales []float32, quants []int8, numElements int) (*W8A8Storage, error)
- func QuantizeW8A8(src []float32) *W8A8Storage
- func (s *W8A8Storage) BlockQuants(i int) []int8
- func (s *W8A8Storage) BlockScale(i int) float32
- func (s *W8A8Storage) ByteSize() int
- func (s *W8A8Storage) Dequantize(dst []float32)
- func (s *W8A8Storage) DequantizeBlock(blockIdx int, dst *[w8a8GroupSize]float32)
- func (s *W8A8Storage) DeviceType() device.Type
- func (s *W8A8Storage) GPUPtr() (unsafe.Pointer, int, int)
- func (s *W8A8Storage) GroupSize() int
- func (s *W8A8Storage) Len() int
- func (s *W8A8Storage) NumGroups() int
- func (s *W8A8Storage) RawBytes() []byte
- func (s *W8A8Storage) Set(_ []float32)
- func (s *W8A8Storage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)
- func (s *W8A8Storage) Slice() []float32

Constants ¶

This section is empty.

Variables ¶

View Source

var IQ4NLTable = [16]float32{
	-1.0, -0.6961928, -0.5250730, -0.3947555,
	-0.2831612, -0.1790887, -0.0805542, 0.0,
	0.0805542, 0.1790887, 0.2831612, 0.3947555,
	0.5250730, 0.6961928, 1.0, 1.3312578,
}

IQ4NLTable is the non-linear 4-bit quantization lookup table. These 16 values are the reconstruction points for the IQ4_NL format. Reference: llama.cpp kvalues_iq4nl in ggml-quants.c

Functions ¶

func AssertClose ¶

func AssertClose[T Numeric](t *testing.T, expected, actual *TensorNumeric[T], tolerance float64)

AssertClose checks if two tensors are close enough and fails the test if they are not.

func AssertEquals ¶

func AssertEquals[T Numeric](t *testing.T, expected, actual *TensorNumeric[T])

AssertEquals checks if two tensors are equal and fails the test if they are not.

func BroadcastIndex ¶

func BroadcastIndex(index int, shape, outputShape []int, broadcast bool) int

BroadcastIndex computes the index in the original tensor for a given index in the broadcasted tensor.

func BroadcastShapes ¶

func BroadcastShapes(a, b []int) (shape []int, broadcastA, broadcastB bool, err error)

BroadcastShapes computes the resulting shape of a broadcast operation between two shapes.

func ConvertInt64ToInt ¶

func ConvertInt64ToInt(s []int64) []int

ConvertInt64ToInt converts a slice of int64 to a slice of int.

func ConvertIntToInt64 ¶

func ConvertIntToInt64(s []int) []int64

ConvertIntToInt64 converts a slice of int to a slice of int64.

func DequantizeIQ3S ¶ added in v0.11.0

func DequantizeIQ3S(raw []byte, dst []float32)

DequantizeIQ3S dequantizes one IQ3_S super-block (110 bytes) into 256 float32 values. This matches llama.cpp's dequantize_row_iq3_s.

func DequantizeIQ4NL ¶ added in v0.11.0

func DequantizeIQ4NL(raw []byte, dst []float32)

DequantizeIQ4NL dequantizes one IQ4_NL block (18 bytes) into 32 float32 values.

func DequantizeQ4K ¶

func DequantizeQ4K(raw []byte, dst []float32)

DequantizeQ4K dequantizes one Q4_K super-block (144 bytes) into 256 float32 values. Each 32 bytes of quantized data produces 64 output values: low nibbles map to the first 32 positions and high nibbles map to the next 32 positions. This matches llama.cpp's dequantize_row_q4_K.

func DequantizeQ5K ¶

func DequantizeQ5K(raw []byte, dst []float32)

DequantizeQ5K dequantizes one Q5_K super-block (176 bytes) into 256 float32 values. Same split ordering as Q4_K, but each element has an extra high bit from qh. For each group of 64 elements (32 bytes of ql):

low nibbles + qh bit (2*group)   -> positions j..j+31
high nibbles + qh bit (2*group+1) -> positions j+32..j+63

This matches llama.cpp's dequantize_row_q5_K.

func DequantizeQ5_0 ¶ added in v0.3.0

func DequantizeQ5_0(raw []byte, dst []float32)

DequantizeQ5_0 dequantizes one Q5_0 block (22 bytes) into 32 float32 values. This matches llama.cpp's dequantize_row_q5_0.

func DequantizeQ6K ¶

func DequantizeQ6K(raw []byte, dst []float32)

DequantizeQ6K dequantizes one Q6_K super-block (210 bytes) into 256 float32 values. Each 128-element half uses 64 ql bytes + 32 qh bytes to produce 4 groups of 32:

low nibbles of ql[0:32]  + qh bits 0-1 -> positions 0-31
low nibbles of ql[32:64] + qh bits 2-3 -> positions 32-63
high nibbles of ql[0:32] + qh bits 4-5 -> positions 64-95
high nibbles of ql[32:64]+ qh bits 6-7 -> positions 96-127

This matches llama.cpp's dequantize_row_q6_K.

func Equals ¶

func Equals[T Numeric](a, b *TensorNumeric[T]) bool

Equals checks if two tensors are equal.

func Float32ToBytes ¶

func Float32ToBytes(f []float32) ([]byte, error)

Float32ToBytes converts a float32 slice to a byte slice.

func GemmF32W8A8NT ¶ added in v0.3.0

func GemmF32W8A8NT(m, n, k int, a []float32, b *W8A8Storage, c []float32)

GemmF32W8A8NT computes C = A * B^T where A is float32 [M,K] and B is W8A8 [N,K]. B is stored in row-major W8A8 format. The "NT" suffix means B is not transposed in memory — the caller passes B in its original [N,K] layout. K must be a multiple of w8a8GroupSize (32). Falls back to dequant for unaligned K.

func GemmW8A8 ¶ added in v0.3.0

func GemmW8A8(m, n, k int, a, b *W8A8Storage, c []float32)

GemmW8A8 computes C = A * B where both A and B are W8A8 quantized. A has logical shape [M, K], B has shape [K, N], C is float32 [M, N]. Dequantizes both operands and accumulates in FP32 for numerical stability. For the optimized INT8xINT8->FP32 path, use GemmW8A8NT with [N,K] layout.

func GemmW8A8NT ¶ added in v0.3.0

func GemmW8A8NT(m, n, k int, a, b *W8A8Storage, c []float32)

GemmW8A8NT computes C = A * B^T where A is W8A8 [M,K] and B is W8A8 [N,K]. B is stored in row-major [N,K] layout; the transpose is implicit. Uses INT8xINT8 dot product with FP32 accumulation for numerical stability. K must be a multiple of w8a8GroupSize (32).

func Int8ToBytes ¶

func Int8ToBytes(i []int8) ([]byte, error)

Int8ToBytes converts an int8 slice to a byte slice.

func ListQuantTypes ¶ added in v0.3.0

func ListQuantTypes() []string

ListQuantTypes returns the names of all registered quantization formats in sorted order.

func MadviseDontNeed ¶ added in v0.10.0

func MadviseDontNeed(data []byte) error

MadviseDontNeed hints to the kernel that the specified region is no longer needed. The kernel may free the physical pages, reducing RSS. Use after processing a layer to release pages back to the OS.

func MadviseRandom ¶ added in v0.10.0

func MadviseRandom(data []byte) error

MadviseRandom hints to the kernel that the mmap'd region will be accessed randomly. This disables read-ahead, which is optimal during inference when individual transformer layers are accessed in unpredictable patterns (especially with MoE or speculative decoding).

func MadviseSequential ¶ added in v0.10.0

func MadviseSequential(data []byte) error

MadviseSequential hints to the kernel that the mmap'd region will be accessed sequentially. This enables aggressive read-ahead, which is optimal during model loading when all tensors are read in order.

func MadviseWillNeed ¶ added in v0.10.0

func MadviseWillNeed(data []byte) error

MadviseWillNeed hints to the kernel that the specified region will be needed soon. The kernel may start paging in the data asynchronously. Use this to prefetch the next transformer layer's weights while the current layer is computing.

func Mmap ¶ added in v0.7.0

func Mmap(fd uintptr, offset int64, length int) ([]byte, error)

Mmap maps a region of a file descriptor into memory.

func MmapFile ¶ added in v0.7.0

func MmapFile(path string) (data []byte, closer func() error, err error)

MmapFile memory-maps the entire file at the given path for reading. It returns the mapped byte slice and a cleanup function that unmaps the region. The caller must call the cleanup function when done to release the mapping.

On Unix (Linux, Darwin), this uses syscall.Mmap with PROT_READ and MAP_PRIVATE.

func Munmap ¶ added in v0.7.0

func Munmap(data []byte) error

Munmap releases a previously mapped memory region.

func Ones ¶

func Ones[T Numeric](size int) []T

Ones creates a slice of the given size filled with ones.

func Product ¶

func Product(s []int) int

Product returns the product of the elements in a slice of ints.

func Q4GPUDataOffset ¶

func Q4GPUDataOffset(totalBlocks int) int

Q4GPUDataOffset returns the byte offset from the start of RawBytesGPU output where the packed data region begins, given the total number of blocks.

func Q4GPUScaleOffset ¶

func Q4GPUScaleOffset() int

Q4GPUScaleOffset returns the byte offset from the start of RawBytesGPU output where the scale region begins (always 0).

func Q5_0GPUQhOffset ¶ added in v1.2.0

func Q5_0GPUQhOffset(totalBlocks int) int

Q5_0GPUQhOffset returns the byte offset where the qh region starts in the RawBytesGPU layout, given the total number of blocks.

func Q5_0GPUQsOffset ¶ added in v1.2.0

func Q5_0GPUQsOffset(totalBlocks int) int

Q5_0GPUQsOffset returns the byte offset where the qs region starts in the RawBytesGPU layout, given the total number of blocks.

func RegisterQuantType ¶ added in v0.3.0

func RegisterQuantType(name string, d Dequantizer)

RegisterQuantType registers a quantization format by name. It panics if name is empty or if a format with that name is already registered. This is intended to be called from init() functions.

func SameShape ¶

func SameShape(a, b []int) bool

SameShape checks if two shapes are identical.

func ShapesEqual ¶

func ShapesEqual(a, b []int) bool

ShapesEqual compares two shapes and returns true if they are equal.

func Uint8ToBytes ¶

func Uint8ToBytes(u []uint8) ([]byte, error)

Uint8ToBytes converts a uint8 slice to a byte slice.

Types ¶

type AWQStorage ¶ added in v0.3.0

type AWQStorage struct {
	// contains filtered or unexported fields
}

AWQStorage holds AWQ group-quantized tensor data on CPU.

func NewAWQStorageFromRaw ¶ added in v0.3.0

func NewAWQStorageFromRaw(data []uint32, scales, zeros []float16.Float16, numElements, groupSize int) (*AWQStorage, error)

NewAWQStorageFromRaw creates AWQStorage from pre-extracted components. data is the packed INT4 values (all groups concatenated, 8 nibbles per uint32). scales and zeros are FP16 values, one per group. numElements is the logical element count. groupSize defines the quantization group size.

func QuantizeAWQ ¶ added in v0.3.0

func QuantizeAWQ(src []float32, groupSize int) *AWQStorage

QuantizeAWQ quantizes a float32 slice into AWQ format. groupSize is the number of elements per group (typically 128). Weights are quantized to 4-bit (INT4, unsigned 0-15).

func (*AWQStorage) ByteSize ¶ added in v0.3.0

func (s *AWQStorage) ByteSize() int

ByteSize returns the raw byte size of the quantized data. Each group: packed uint32 data bytes + 2 bytes scale + 2 bytes zero.

func (*AWQStorage) Dequantize ¶ added in v0.3.0

func (s *AWQStorage) Dequantize(dst []float32)

Dequantize unpacks AWQ groups into dst. len(dst) must be >= s.Len(). Formula: weight_fp = (quant_int4 - zero) * scale

func (*AWQStorage) DeviceType ¶ added in v0.3.0

func (s *AWQStorage) DeviceType() device.Type

DeviceType returns device.CPU.

func (*AWQStorage) GPUPtr ¶ added in v0.3.0

func (s *AWQStorage) GPUPtr() (unsafe.Pointer, int, int)

GPUPtr returns the cached GPU device pointer, byte size, and device ID.

func (*AWQStorage) GroupSize ¶ added in v0.3.0

func (s *AWQStorage) GroupSize() int

GroupSize returns the number of elements per group.

func (*AWQStorage) Len ¶ added in v0.3.0

func (s *AWQStorage) Len() int

Len returns the number of logical float32 elements.

func (*AWQStorage) NumGroups ¶ added in v0.3.0

func (s *AWQStorage) NumGroups() int

NumGroups returns the number of quantization groups.

func (*AWQStorage) Set ¶ added in v0.3.0

func (s *AWQStorage) Set(_ []float32)

Set is not supported on quantized storage (weights are immutable).

func (*AWQStorage) SetGPUPtr ¶ added in v0.3.0

func (s *AWQStorage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)

SetGPUPtr stores a pre-uploaded GPU device pointer for the raw bytes.

func (*AWQStorage) Slice ¶ added in v0.3.0

func (s *AWQStorage) Slice() []float32

Slice returns a dequantized float32 view of the data.

type Addable ¶

type Addable interface {
	~int | ~int8 | ~int16 | ~int32 | ~int64 |
		~uint | ~uint32 | ~uint64 |
		~float32 | ~float64
}

Addable defines the constraint for numeric types that support the built-in arithmetic operators directly (e.g., +, -, *, /) and zero literals. This intentionally excludes custom minifloat types like float8.Float8, float16.Float16, and float16.BFloat16, which are defined types that do not support Go's built-in operators without explicit conversion helpers.

type BFloat16Storage ¶

type BFloat16Storage struct {
	// contains filtered or unexported fields
}

BFloat16Storage holds float32 tensor data in BFloat16 format on CPU. It implements Storage[float32] so that models can use BF16 weights with FP32 activations (mixed-precision inference). The raw BF16 bytes can be uploaded to GPU for use with cublasGemmEx.

func NewBFloat16Storage ¶

func NewBFloat16Storage(src []float32) *BFloat16Storage

NewBFloat16Storage converts float32 data to BFloat16 format.

func NewBFloat16StorageFromRaw ¶

func NewBFloat16StorageFromRaw(data []uint16) *BFloat16Storage

NewBFloat16StorageFromRaw creates a BFloat16Storage from pre-encoded uint16 values.

func (*BFloat16Storage) DeviceType ¶

func (s *BFloat16Storage) DeviceType() device.Type

DeviceType returns device.CPU.

func (*BFloat16Storage) GPUPtr ¶

func (s *BFloat16Storage) GPUPtr() (unsafe.Pointer, int, int)

GPUPtr returns the cached GPU device pointer, byte size, and device ID. Returns nil if no GPU copy exists.

func (*BFloat16Storage) Len ¶

func (s *BFloat16Storage) Len() int

Len returns the number of logical float32 elements.

func (*BFloat16Storage) RawBytes ¶

func (s *BFloat16Storage) RawBytes() []byte

RawBytes returns the raw BF16 data as a byte slice (2 bytes per element).

func (*BFloat16Storage) Set ¶

func (s *BFloat16Storage) Set(data []float32)

Set encodes float32 data into BFloat16 format.

func (*BFloat16Storage) SetGPUPtr ¶

func (s *BFloat16Storage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)

SetGPUPtr stores a pre-uploaded GPU device pointer for the raw BF16 bytes.

func (*BFloat16Storage) Slice ¶

func (s *BFloat16Storage) Slice() []float32

Slice decodes BFloat16 data to float32.

type CPUStorage ¶

type CPUStorage[T Numeric] struct {
	// contains filtered or unexported fields
}

CPUStorage is a CPU-backed Storage implementation wrapping a Go slice. Slice() returns the underlying slice with zero copy.

func NewCPUStorage ¶

func NewCPUStorage[T Numeric](data []T) *CPUStorage[T]

NewCPUStorage creates a new CPUStorage wrapping the provided data slice.

func (*CPUStorage[T]) DeviceType ¶

func (s *CPUStorage[T]) DeviceType() device.Type

DeviceType returns device.CPU.

func (*CPUStorage[T]) Len ¶

func (s *CPUStorage[T]) Len() int

Len returns the number of elements.

func (*CPUStorage[T]) Set ¶

func (s *CPUStorage[T]) Set(data []T)

Set replaces the underlying data slice.

func (*CPUStorage[T]) Slice ¶

func (s *CPUStorage[T]) Slice() []T

Slice returns the underlying data slice directly (zero copy).

type Dequantizer ¶ added in v0.3.0

type Dequantizer interface {
	// Dequantize decodes quantized bytes in src into float32 values in dst.
	// The caller must ensure dst has sufficient capacity for the decoded output.
	Dequantize(src []byte, dst []float32) error

	// BlockSize returns the number of elements per quantization block.
	BlockSize() int

	// BitsPerWeight returns the effective number of bits per weight element.
	BitsPerWeight() int
}

Dequantizer decodes quantized data back to floating point.

func GetQuantType ¶ added in v0.3.0

func GetQuantType(name string) (Dequantizer, bool)

GetQuantType returns the Dequantizer registered under name. The second return value is false if no format is registered under that name.

type FP8E4M3Storage ¶

type FP8E4M3Storage struct {
	// contains filtered or unexported fields
}

FP8E4M3Storage holds FP8 E4M3 quantized tensor data on CPU. Uses per-tensor absmax scaling: fp8_value = float32_value / scale.

func NewFP8E4M3Storage ¶

func NewFP8E4M3Storage(src []float32) *FP8E4M3Storage

NewFP8E4M3Storage quantizes float32 data into FP8 E4M3 format with absmax scaling.

func (*FP8E4M3Storage) DeviceType ¶

func (s *FP8E4M3Storage) DeviceType() device.Type

DeviceType returns device.CPU.

func (*FP8E4M3Storage) GPUPtr ¶

func (s *FP8E4M3Storage) GPUPtr() (unsafe.Pointer, int, int)

GPUPtr returns the cached GPU device pointer, byte size, and device ID. Returns nil if no GPU copy exists.

func (*FP8E4M3Storage) Len ¶

func (s *FP8E4M3Storage) Len() int

Len returns the number of logical float32 elements.

func (*FP8E4M3Storage) RawBytes ¶

func (s *FP8E4M3Storage) RawBytes() []byte

RawBytes returns the raw FP8 data as a byte slice (1 byte per element).

func (*FP8E4M3Storage) Scale ¶

func (s *FP8E4M3Storage) Scale() float32

Scale returns the per-tensor scale factor.

func (*FP8E4M3Storage) ScaleGPUPtr ¶

func (s *FP8E4M3Storage) ScaleGPUPtr() unsafe.Pointer

ScaleGPUPtr returns the GPU device pointer for the per-tensor scale factor.

func (*FP8E4M3Storage) Set ¶

func (s *FP8E4M3Storage) Set(data []float32)

Set encodes float32 data into FP8 E4M3 format.

func (*FP8E4M3Storage) SetGPUPtr ¶

func (s *FP8E4M3Storage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)

SetGPUPtr stores a pre-uploaded GPU device pointer for the raw FP8 bytes.

func (*FP8E4M3Storage) SetScaleGPUPtr ¶

func (s *FP8E4M3Storage) SetScaleGPUPtr(ptr unsafe.Pointer)

SetScaleGPUPtr stores the GPU device pointer for the per-tensor scale factor.

func (*FP8E4M3Storage) Slice ¶

func (s *FP8E4M3Storage) Slice() []float32

Slice decodes FP8 E4M3 data to float32 by multiplying by the scale factor.

type FP8E5M2Storage ¶

type FP8E5M2Storage struct {
	// contains filtered or unexported fields
}

FP8E5M2Storage holds FP8 E5M2 quantized tensor data on CPU. Uses per-tensor absmax scaling: fp8_value = float32_value / scale.

func NewFP8E5M2Storage ¶

func NewFP8E5M2Storage(src []float32) *FP8E5M2Storage

NewFP8E5M2Storage quantizes float32 data into FP8 E5M2 format with absmax scaling.

func (*FP8E5M2Storage) DeviceType ¶

func (s *FP8E5M2Storage) DeviceType() device.Type

DeviceType returns device.CPU.

func (*FP8E5M2Storage) Len ¶

func (s *FP8E5M2Storage) Len() int

Len returns the number of logical float32 elements.

func (*FP8E5M2Storage) Scale ¶

func (s *FP8E5M2Storage) Scale() float32

Scale returns the per-tensor scale factor.

func (*FP8E5M2Storage) Set ¶

func (s *FP8E5M2Storage) Set(data []float32)

Set encodes float32 data into FP8 E5M2 format.

func (*FP8E5M2Storage) Slice ¶

func (s *FP8E5M2Storage) Slice() []float32

Slice decodes FP8 E5M2 data to float32 by multiplying by the scale factor.

type Float ¶

type Float interface {
	~float32 | ~float64
}

Float defines the constraint for floating-point types.

type Float16Storage ¶

type Float16Storage struct {
	// contains filtered or unexported fields
}

Float16Storage holds IEEE 754 half-precision (FP16) tensor data. Each element is stored as 2 bytes in little-endian order. This is a native FP16 storage type — no per-tensor scaling is needed.

func NewFloat16StorageFromF32 ¶

func NewFloat16StorageFromF32(src []float32) *Float16Storage

NewFloat16StorageFromF32 converts float32 data to FP16 and returns a Float16Storage.

func NewFloat16StorageFromRaw ¶ added in v0.2.0

func NewFloat16StorageFromRaw(raw []byte, numElems int) *Float16Storage

NewFloat16StorageFromRaw creates a Float16Storage from pre-encoded FP16 bytes. The raw slice must contain numElems * 2 bytes in little-endian IEEE 754 half-precision format. A copy is made so the caller can reuse raw.

func NewFloat16StorageGPU ¶

func NewFloat16StorageGPU(ptr unsafe.Pointer, numElems, deviceID int) *Float16Storage

NewFloat16StorageGPU creates a Float16Storage backed by a GPU device pointer. The storage has no host data; only the GPU pointer is set.

func (*Float16Storage) DeviceType ¶

func (s *Float16Storage) DeviceType() device.Type

DeviceType returns device.CPU.

func (*Float16Storage) GPUPtr ¶

func (s *Float16Storage) GPUPtr() (unsafe.Pointer, int, int)

GPUPtr returns the cached GPU device pointer, byte size, and device ID. Returns nil if no GPU copy exists.

func (*Float16Storage) Len ¶

func (s *Float16Storage) Len() int

Len returns the number of logical FP16 elements.

func (*Float16Storage) RawBytes ¶

func (s *Float16Storage) RawBytes() []byte

RawBytes returns the raw FP16 data as a byte slice (2 bytes per element).

func (*Float16Storage) Set ¶

func (s *Float16Storage) Set(data []float32)

Set encodes float32 data into FP16 format.

func (*Float16Storage) SetGPUByteSize ¶

func (s *Float16Storage) SetGPUByteSize(byteSize int)

SetGPUByteSize updates the GPU byte size. This is useful when the GPU allocation size differs from the logical byte size (e.g. padded allocations).

func (*Float16Storage) SetGPUPtr ¶

func (s *Float16Storage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)

SetGPUPtr stores a pre-uploaded GPU device pointer for the raw FP16 bytes.

func (*Float16Storage) Slice ¶

func (s *Float16Storage) Slice() []float32

Slice decodes FP16 data to float32. Returns a zero-filled slice if no host data is available (GPU-only storage).

func (*Float16Storage) SubSlice ¶

func (s *Float16Storage) SubSlice(offset, length int) *Float16Storage

SubSlice returns a zero-copy view into the storage from element offset for length elements. The caller must ensure the parent outlives the returned view.

type GGMLType ¶ added in v0.7.0

type GGMLType int

GGMLType identifies the quantization format of mmap'd tensor data. These values match the GGML type IDs used in GGUF files.

const (
	GGMLTypeF32  GGMLType = 0
	GGMLTypeF16  GGMLType = 1
	GGMLTypeQ4_0 GGMLType = 2
	GGMLTypeQ4_1 GGMLType = 3
	GGMLTypeQ5_0 GGMLType = 6
	GGMLTypeQ5_1 GGMLType = 7
	GGMLTypeQ8_0 GGMLType = 8
	GGMLTypeQ4_K GGMLType = 12
	GGMLTypeQ5_K GGMLType = 13
	GGMLTypeQ6_K GGMLType = 14
	GGMLTypeBF16 GGMLType = 30
)

type GPTQStorage ¶ added in v0.3.0

type GPTQStorage struct {
	// contains filtered or unexported fields
}

GPTQStorage holds GPTQ group-quantized tensor data on CPU.

func NewGPTQStorageFromRaw ¶ added in v0.3.0

func NewGPTQStorageFromRaw(data []byte, scales, zeros []float16.Float16, numElements, groupSize, bits int) (*GPTQStorage, error)

NewGPTQStorageFromRaw creates GPTQStorage from pre-extracted components. data is the packed quantized values (all groups concatenated). scales and zeros are FP16 values, one per group. numElements is the logical element count. groupSize and bits define the quantization parameters.

func QuantizeGPTQ ¶ added in v0.3.0

func QuantizeGPTQ(src []float32, groupSize, bits int) *GPTQStorage

QuantizeGPTQ quantizes a float32 slice into GPTQ format. groupSize is the number of elements per group (typically 128). bits must be 4 or 8.

func (*GPTQStorage) Bits ¶ added in v0.3.0

func (s *GPTQStorage) Bits() int

Bits returns the quantization bit width (4 or 8).

func (*GPTQStorage) ByteSize ¶ added in v0.3.0

func (s *GPTQStorage) ByteSize() int

ByteSize returns the raw byte size of the quantized data. Each group: packed data bytes + 2 bytes scale + 2 bytes zero.

func (*GPTQStorage) Dequantize ¶ added in v0.3.0

func (s *GPTQStorage) Dequantize(dst []float32)

Dequantize unpacks GPTQ groups into dst. len(dst) must be >= s.Len(). Formula: weight_fp = (quant_int - zero) * scale

func (*GPTQStorage) DeviceType ¶ added in v0.3.0

func (s *GPTQStorage) DeviceType() device.Type

DeviceType returns device.CPU.

func (*GPTQStorage) GPUPtr ¶ added in v0.3.0

func (s *GPTQStorage) GPUPtr() (unsafe.Pointer, int, int)

GPUPtr returns the cached GPU device pointer, byte size, and device ID.

func (*GPTQStorage) GroupSize ¶ added in v0.3.0

func (s *GPTQStorage) GroupSize() int

GroupSize returns the number of elements per group.

func (*GPTQStorage) Len ¶ added in v0.3.0

func (s *GPTQStorage) Len() int

Len returns the number of logical float32 elements.

func (*GPTQStorage) NumGroups ¶ added in v0.3.0

func (s *GPTQStorage) NumGroups() int

NumGroups returns the number of quantization groups.

func (*GPTQStorage) Set ¶ added in v0.3.0

func (s *GPTQStorage) Set(_ []float32)

Set is not supported on quantized storage (weights are immutable).

func (*GPTQStorage) SetGPUPtr ¶ added in v0.3.0

func (s *GPTQStorage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)

SetGPUPtr stores a pre-uploaded GPU device pointer for the raw bytes.

func (*GPTQStorage) Slice ¶ added in v0.3.0

func (s *GPTQStorage) Slice() []float32

Slice returns a dequantized float32 view of the data.

type GPUStorage ¶

type GPUStorage[T Numeric] struct {
	// contains filtered or unexported fields
}

GPUStorage is a GPU device-backed Storage implementation. Slice() copies data from the GPU to a new CPU slice (not zero-copy). Set() copies data from a CPU slice to the GPU. Each GPUStorage tracks which device it resides on via deviceID. When managed is true, the storage uses unified memory (cudaMallocManaged) and TrySlice/TrySet access the pointer directly without Memcpy.

Shared ownership: when View() is called, the returned GPUStorage shares the same refcount. Free() decrements the refcount; only the last Free actually releases memory (back to pool or via cudaFree). This avoids both double-free and GC-dependent cleanup for reshape/transpose views.

func NewGPUStorage ¶

func NewGPUStorage[T Numeric](length int, deviceID ...int) (*GPUStorage[T], error)

NewGPUStorage allocates GPU device memory for the given number of elements on the specified device. An optional deviceID selects the GPU (default 0).

func NewGPUStorageFromPool ¶

func NewGPUStorageFromPool[T Numeric](devPtr unsafe.Pointer, length int, pool gpuapi.MemPool, deviceID int) (*GPUStorage[T], error)

NewGPUStorageFromPool wraps a GPU device pointer allocated from a MemPool. When Free() is called, the pointer is returned to the pool instead of being freed via cudaFree. Uses reference counting so views can safely share the allocation without double-free or GC-dependent cleanup.

func NewGPUStorageFromPtr ¶

func NewGPUStorageFromPtr[T Numeric](devPtr unsafe.Pointer, length int, deviceID ...int) (*GPUStorage[T], error)

NewGPUStorageFromPtr wraps an existing GPU device pointer as a GPUStorage. A GC finalizer ensures the device memory is freed if Release() is not called. An optional deviceID records which device the pointer belongs to (default 0).

func NewGPUStorageFromSlice ¶

func NewGPUStorageFromSlice[T Numeric](data []T, deviceID ...int) (*GPUStorage[T], error)

NewGPUStorageFromSlice allocates GPU device memory, copies data from a CPU slice, and returns a GPUStorage on the specified device. An optional deviceID selects the GPU (default 0).

func NewGPUStorageView ¶

func NewGPUStorageView[T Numeric](parent *GPUStorage[T], offsetElems, length int) *GPUStorage[T]

NewGPUStorageView creates a non-owning view into an existing GPUStorage starting at offsetElems elements from the beginning. The returned storage shares the parent's device memory -- no finalizer is set, so the parent must outlive the view.

func NewGPUStorageViewFromPtr ¶

func NewGPUStorageViewFromPtr[T Numeric](devPtr unsafe.Pointer, length int, deviceID int) *GPUStorage[T]

NewGPUStorageViewFromPtr creates a non-owning GPUStorage that wraps a raw device pointer. Free() is a no-op — the caller retains ownership of the memory. This is used for scratchpad buffers where the compute engine owns the allocation and the tensor is a temporary view into it.

func NewManagedGPUStorage ¶

func NewManagedGPUStorage[T Numeric](pool gpuapi.MemPool, length int, deviceID ...int) (*GPUStorage[T], error)

NewManagedGPUStorage allocates unified (managed) GPU memory via pool.AllocManaged. The returned storage is host-accessible: TrySlice and TrySet skip Memcpy. This is beneficial on hardware with coherent unified memory (e.g. DGX Spark NVLink-C2C). On backends that do not support managed memory, AllocManaged returns an error.

func (*GPUStorage[T]) ByteSize ¶ added in v1.1.0

func (s *GPUStorage[T]) ByteSize() int

ByteSize returns the total size in bytes of the stored elements.

func (*GPUStorage[T]) CopyFromDevice ¶

func (s *GPUStorage[T]) CopyFromDevice(src *GPUStorage[T], dstOffsetElems, srcOffsetElems, numElems int) error

CopyFromDevice copies numElems elements from src (at srcOffsetElems) into s (at dstOffsetElems) using a synchronous device-to-device memcpy. Both storages must reside on the same device.

func (*GPUStorage[T]) CopyFromDeviceAsync ¶

func (s *GPUStorage[T]) CopyFromDeviceAsync(src *GPUStorage[T], dstOffsetElems, srcOffsetElems, numElems int, stream gpuapi.Stream) error

CopyFromDeviceAsync copies numElems elements from src (at srcOffsetElems) into s (at dstOffsetElems) using an asynchronous device-to-device memcpy on the given stream. Both storages must reside on the same device.

func (*GPUStorage[T]) CopyFromHost ¶

func (s *GPUStorage[T]) CopyFromHost(data []T, dstOffsetElems int) error

CopyFromHost copies numElems elements from a CPU slice into s starting at dstOffsetElems using a synchronous host-to-device memcpy.

func (*GPUStorage[T]) CopyFromHostAsync ¶

func (s *GPUStorage[T]) CopyFromHostAsync(data []T, dstOffsetElems int, stream gpuapi.Stream) error

CopyFromHostAsync copies elements from a CPU slice into s starting at dstOffsetElems using an asynchronous host-to-device memcpy on the given stream. The caller must ensure the source slice remains valid until the stream is synchronized.

func (*GPUStorage[T]) CopyTo ¶

func (s *GPUStorage[T]) CopyTo(dst []T) error

CopyTo copies GPU device memory into an existing CPU slice without allocating. The destination must have at least Len() elements. Returns an error on failure.

func (*GPUStorage[T]) DeviceID ¶

func (s *GPUStorage[T]) DeviceID() int

DeviceID returns the GPU device ordinal this storage resides on.

func (*GPUStorage[T]) DeviceType ¶

func (s *GPUStorage[T]) DeviceType() device.Type

DeviceType returns the device type for this storage.

func (*GPUStorage[T]) Free ¶

func (s *GPUStorage[T]) Free() error

Free releases the GPU device memory. After calling Free, the storage must not be used. For refcounted storage (pool-backed with views), the refcount is decremented and memory is only returned to the pool when it reaches 0. Legacy views (non-refcounted) are no-ops.

func (*GPUStorage[T]) Len ¶

func (s *GPUStorage[T]) Len() int

Len returns the number of elements.

func (*GPUStorage[T]) Managed ¶

func (s *GPUStorage[T]) Managed() bool

Managed returns true if this storage uses unified (managed) memory.

func (*GPUStorage[T]) Ptr ¶

func (s *GPUStorage[T]) Ptr() unsafe.Pointer

Ptr returns the raw GPU device pointer.

func (*GPUStorage[T]) Set ¶

func (s *GPUStorage[T]) Set(data []T)

Set copies data from a CPU slice to the GPU, replacing the current contents. On error, logs a warning instead of panicking.

func (*GPUStorage[T]) Slice ¶

func (s *GPUStorage[T]) Slice() []T

Slice copies device memory to a new CPU slice and returns it. On error, logs a warning and returns a zero-valued slice.

func (*GPUStorage[T]) SubSlice ¶

func (s *GPUStorage[T]) SubSlice(offsetElems, length int) *GPUStorage[T]

SubSlice returns a non-owning GPUStorage view into a sub-range of the receiver's device buffer, starting at offsetElems for length elements. No data is copied (no D2H transfer). The caller must ensure the parent outlives the returned view.

func (*GPUStorage[T]) TrySet ¶

func (s *GPUStorage[T]) TrySet(data []T) error

TrySet copies data from a CPU slice to the GPU, replacing the current contents. If the new slice has a different length, the old device memory is freed and new memory is allocated. For managed storage, data is written directly to the unified pointer without Memcpy. Returns an error on failure.

func (*GPUStorage[T]) TrySlice ¶

func (s *GPUStorage[T]) TrySlice() ([]T, error)

TrySlice copies device memory to a new CPU slice. For managed storage, the data is read directly from the unified pointer without a D2H Memcpy. Returns an error if the copy fails.

func (*GPUStorage[T]) View ¶

func (s *GPUStorage[T]) View(length int) *GPUStorage[T]

View returns a GPUStorage sharing the same device pointer but with a different element count. If the parent has a refcount (pool-backed), the view shares it and Free() on any copy decrements; only the last Free returns memory to the pool. For non-refcounted storage the view uses the legacy no-op Free behavior.

type IQ2XXSStorage ¶ added in v0.11.0

type IQ2XXSStorage struct {
	// contains filtered or unexported fields
}

IQ2XXSStorage packs 2-bit quantized values with per-block scale factors. Each super-block holds 256 elements: 64 data bytes + 1 float32 scale. Dequantization: value = gridLookup(data_byte) * scale.

func NewIQ2XXSStorage ¶ added in v0.11.0

func NewIQ2XXSStorage(numElements int) *IQ2XXSStorage

NewIQ2XXSStorage creates an IQ2XXSStorage that can hold numElements values.

func (*IQ2XXSStorage) Dequantize ¶ added in v0.11.0

func (s *IQ2XXSStorage) Dequantize() []float32

Dequantize converts all quantized values to float32.

func (*IQ2XXSStorage) DeviceType ¶ added in v0.11.0

func (s *IQ2XXSStorage) DeviceType() device.Type

DeviceType returns device.CPU.

func (*IQ2XXSStorage) Len ¶ added in v0.11.0

func (s *IQ2XXSStorage) Len() int

Len returns the number of quantized elements.

func (*IQ2XXSStorage) MarshalBinary ¶ added in v0.11.0

func (s *IQ2XXSStorage) MarshalBinary() ([]byte, error)

MarshalBinary encodes the storage as [numElements(4)] [scales...] [data...].

func (*IQ2XXSStorage) RawBytes ¶ added in v0.11.0

func (s *IQ2XXSStorage) RawBytes() []byte

RawBytes returns the underlying packed byte slice.

func (*IQ2XXSStorage) RawScales ¶ added in v0.11.0

func (s *IQ2XXSStorage) RawScales() []float32

RawScales returns the block scale factors.

func (*IQ2XXSStorage) SetBlock ¶ added in v0.11.0

func (s *IQ2XXSStorage) SetBlock(blockIdx int, scale float32, data []byte)

SetBlock sets the scale and packed data for the block at blockIdx. data must contain exactly 64 bytes (256 elements / 4 per byte).

func (*IQ2XXSStorage) UnmarshalBinary ¶ added in v0.11.0

func (s *IQ2XXSStorage) UnmarshalBinary(buf []byte) error

UnmarshalBinary decodes the storage from bytes produced by MarshalBinary.

type IQ3SStorage ¶ added in v0.11.0

type IQ3SStorage struct {
	// contains filtered or unexported fields
}

IQ3SStorage holds IQ3_S quantized tensor data on CPU.

func NewIQ3SStorageFromRaw ¶ added in v0.11.0

func NewIQ3SStorageFromRaw(raw []byte, numElements int) (*IQ3SStorage, error)

NewIQ3SStorageFromRaw creates IQ3SStorage from raw super-block data.

func (*IQ3SStorage) Dequantize ¶ added in v0.11.0

func (q *IQ3SStorage) Dequantize(dst []float32)

Dequantize unpacks all IQ3_S super-blocks into dst.

func (*IQ3SStorage) DeviceType ¶ added in v0.11.0

func (q *IQ3SStorage) DeviceType() device.Type

DeviceType returns device.CPU.

func (*IQ3SStorage) GPUPtr ¶ added in v0.11.0

func (q *IQ3SStorage) GPUPtr() (unsafe.Pointer, int, int)

GPUPtr returns the cached GPU device pointer, byte size, and device ID.

func (*IQ3SStorage) Len ¶ added in v0.11.0

func (q *IQ3SStorage) Len() int

Len returns the number of logical float32 elements.

func (*IQ3SStorage) NumBlocks ¶ added in v0.11.0

func (q *IQ3SStorage) NumBlocks() int

NumBlocks returns the number of IQ3_S super-blocks.

func (*IQ3SStorage) RawBytes ¶ added in v0.11.0

func (q *IQ3SStorage) RawBytes() []byte

RawBytes returns the raw IQ3_S super-block data for GPU upload.

func (*IQ3SStorage) Set ¶ added in v0.11.0

func (q *IQ3SStorage) Set(_ []float32)

Set panics because IQ3SStorage is immutable.

func (*IQ3SStorage) SetGPUPtr ¶ added in v0.11.0

func (q *IQ3SStorage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)

SetGPUPtr stores a pre-uploaded GPU device pointer for the raw bytes.

func (*IQ3SStorage) Slice ¶ added in v0.11.0

func (q *IQ3SStorage) Slice() []float32

Slice dequantizes and returns all elements as a float32 slice.

type IQ4NLStorage ¶ added in v0.11.0

type IQ4NLStorage struct {
	// contains filtered or unexported fields
}

IQ4NLStorage holds IQ4_NL quantized tensor data on CPU.

func MergeIQ4NLStorage ¶ added in v0.11.0

func MergeIQ4NLStorage(storages ...*IQ4NLStorage) *IQ4NLStorage

MergeIQ4NLStorage concatenates multiple IQ4NLStorage objects into one.

func NewIQ4NLStorageFromRaw ¶ added in v0.11.0

func NewIQ4NLStorageFromRaw(raw []byte, numElements int) (*IQ4NLStorage, error)

NewIQ4NLStorageFromRaw creates IQ4NLStorage from raw block data.

func (*IQ4NLStorage) Dequantize ¶ added in v0.11.0

func (q *IQ4NLStorage) Dequantize(dst []float32)

Dequantize unpacks all IQ4_NL blocks into dst.

func (*IQ4NLStorage) DeviceType ¶ added in v0.11.0

func (q *IQ4NLStorage) DeviceType() device.Type

DeviceType returns device.CPU.

func (*IQ4NLStorage) GPUPtr ¶ added in v0.11.0

func (q *IQ4NLStorage) GPUPtr() (unsafe.Pointer, int, int)

GPUPtr returns the cached GPU device pointer, byte size, and device ID. Returns nil if no GPU copy exists.

func (*IQ4NLStorage) Len ¶ added in v0.11.0

func (q *IQ4NLStorage) Len() int

Len returns the number of logical float32 elements.

func (*IQ4NLStorage) NumBlocks ¶ added in v0.11.0

func (q *IQ4NLStorage) NumBlocks() int

NumBlocks returns the number of IQ4_NL blocks.

func (*IQ4NLStorage) RawBytes ¶ added in v0.11.0

func (q *IQ4NLStorage) RawBytes() []byte

RawBytes returns the raw IQ4_NL block data for GPU upload.

func (*IQ4NLStorage) Set ¶ added in v0.11.0

func (q *IQ4NLStorage) Set(_ []float32)

Set panics because IQ4NLStorage is immutable.

func (*IQ4NLStorage) SetGPUPtr ¶ added in v0.11.0

func (q *IQ4NLStorage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)

SetGPUPtr stores a pre-uploaded GPU device pointer for the raw bytes.

func (*IQ4NLStorage) Slice ¶ added in v0.11.0

func (q *IQ4NLStorage) Slice() []float32

Slice dequantizes and returns all elements as a float32 slice.

type MmapStorage ¶ added in v0.7.0

type MmapStorage struct {
	// contains filtered or unexported fields
}

MmapStorage wraps a byte slice from an mmap'd GGUF file region. It implements Storage[float32] by lazily dequantizing the raw bytes on first access. The underlying byte slice is NOT copied -- it points directly into the mmap'd region.

func NewMmapStorage ¶ added in v0.7.0

func NewMmapStorage(data []byte, length int, qtype GGMLType) (*MmapStorage, error)

NewMmapStorage creates an MmapStorage that wraps a slice of mmap'd bytes. The data slice must remain valid for the lifetime of this storage (i.e., the mmap must not be unmapped while this storage is in use).

Parameters:

data: raw bytes from the mmap'd region for this tensor
length: number of logical float32 elements
qtype: the GGML quantization type of the raw data

func (*MmapStorage) ByteSize ¶ added in v0.7.0

func (s *MmapStorage) ByteSize() int

ByteSize returns the raw byte size of the mmap'd data.

func (*MmapStorage) DeviceType ¶ added in v0.7.0

func (s *MmapStorage) DeviceType() device.Type

DeviceType returns device.CPU.

func (*MmapStorage) GPUPtr ¶ added in v0.9.0

func (s *MmapStorage) GPUPtr() (unsafe.Pointer, int, int)

GPUPtr returns the cached GPU device pointer, byte size, and device ID. Returns (nil, 0, 0) if the data has not been uploaded to GPU yet.

func (*MmapStorage) Len ¶ added in v0.7.0

func (s *MmapStorage) Len() int

Len returns the number of logical float32 elements.

func (*MmapStorage) Q4KBlockRaw ¶ added in v0.15.0

func (s *MmapStorage) Q4KBlockRaw(blockIdx int) []byte

Q4KBlockRaw returns the raw 144-byte slice for Q4_K superblock blockIdx. Each Q4_K superblock encodes 256 float32 values. The caller must not modify the returned slice.

func (*MmapStorage) Q5KBlockRaw ¶ added in v0.15.0

func (s *MmapStorage) Q5KBlockRaw(blockIdx int) []byte

Q5KBlockRaw returns the raw 176-byte slice for Q5_K superblock blockIdx.

func (*MmapStorage) Q6KBlockRaw ¶ added in v0.15.0

func (s *MmapStorage) Q6KBlockRaw(blockIdx int) []byte

Q6KBlockRaw returns the raw 210-byte slice for Q6_K superblock blockIdx.

func (*MmapStorage) QType ¶ added in v0.7.0

func (s *MmapStorage) QType() GGMLType

QType returns the GGML quantization type of the stored data.

func (*MmapStorage) RawBytes ¶ added in v0.7.0

func (s *MmapStorage) RawBytes() []byte

RawBytes returns the raw mmap'd byte slice for direct GPU DMA upload.

func (*MmapStorage) RawBytesGPU ¶ added in v0.9.0

func (s *MmapStorage) RawBytesGPU() []byte

RawBytesGPU returns the raw bytes in GPU-optimized layout. For Q4_0, this repacks the interleaved blocks into separated scales+data format matching Q4Storage.RawBytesGPU. For all other types, returns the raw bytes as-is.

func (*MmapStorage) Set ¶ added in v0.7.0

func (s *MmapStorage) Set(_ []float32)

Set is not supported on mmap'd storage (weights are immutable).

func (*MmapStorage) SetGPUPtr ¶ added in v0.9.0

func (s *MmapStorage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)

SetGPUPtr stores a pre-uploaded GPU device pointer for the raw quantized bytes. After calling this, GPUPtr() returns the cached pointer and the GPU engine can skip per-operation H2D copies.

func (*MmapStorage) Slice ¶ added in v0.7.0

func (s *MmapStorage) Slice() []float32

Slice returns a dequantized float32 view of the mmap'd data. The first call triggers dequantization; subsequent calls return the cached result. For F32 data, this reinterprets the bytes directly (zero-copy via unsafe would be ideal but we copy for safety since mmap pages may be read-only).

func (*MmapStorage) SliceElements ¶ added in v0.15.0

func (s *MmapStorage) SliceElements(start, end int) (*MmapStorage, error)

SliceElements returns a new MmapStorage that covers elements [start, end) of this storage. Both start and end must be aligned to the quantization block size (32 for Q4_0/Q8_0, 256 for Q4_K/Q5_K/Q6_K). Returns an error if the range is not aligned or out of bounds.

This enables zero-copy expert slicing for stacked MoE weight tensors: instead of calling Slice() (which materializes the full tensor), callers can obtain a sub-tensor that still points into the mmap'd region.

type NF4Storage ¶ added in v0.3.0

type NF4Storage struct {
	Data       []byte    // packed: 2 NF4 indices per byte (lo nibble = first, hi nibble = second)
	Scales     []float32 // one absmax scale per block of nf4BlockSize elements
	MetaScales []float32 // one absmax scale per meta-block of nf4MetaBlockSize blocks
	Shape      []int
	// contains filtered or unexported fields
}

NF4Storage stores float32 data quantized to 4-bit normal floats with double quantization: block scales (one per nf4BlockSize elements) are themselves stored quantized using meta-blocks of nf4MetaBlockSize blocks.

func NewNF4Storage ¶ added in v0.3.0

func NewNF4Storage(src []float32, shape []int) *NF4Storage

NewNF4Storage quantizes src into NF4 format.

func (*NF4Storage) ByteSize ¶ added in v0.3.0

func (s *NF4Storage) ByteSize() int64

ByteSize returns the raw byte size of packed NF4 data plus scales.

func (*NF4Storage) Dequantize ¶ added in v0.3.0

func (s *NF4Storage) Dequantize() []float32

Dequantize decodes NF4 data back to float32.

func (*NF4Storage) DeviceType ¶ added in v0.3.0

func (s *NF4Storage) DeviceType() device.Type

DeviceType returns device.CPU.

func (*NF4Storage) Len ¶ added in v0.3.0

func (s *NF4Storage) Len() int

Len returns the number of elements.

func (*NF4Storage) NumBlocks ¶ added in v0.3.0

func (s *NF4Storage) NumBlocks() int

NumBlocks returns the number of NF4 blocks.

func (*NF4Storage) Quantize ¶ added in v0.3.0

func (s *NF4Storage) Quantize(src []float32) error

Quantize encodes src into NF4 with double quantization.

func (*NF4Storage) Set ¶ added in v0.3.0

func (s *NF4Storage) Set(data []float32)

Set re-quantizes from a new float32 slice.

func (*NF4Storage) Slice ¶ added in v0.3.0

func (s *NF4Storage) Slice() []float32

Slice dequantizes and returns a CPU float32 slice.

type NVFloat4Storage ¶ added in v0.3.0

type NVFloat4Storage struct {
	Data   []byte            // packed 2 FP4 per byte, len = ceil(n/2)
	Scales []float16.Float16 // one scale per block of 16
	Shape  []int
	// contains filtered or unexported fields
}

NVFloat4Storage holds NVFP4 E2M1 quantized tensor data on CPU. Two FP4 values are packed per byte (little-endian nibble order). One float16 scale factor per block of 16 values.

func NewNVFloat4Storage ¶ added in v0.3.0

func NewNVFloat4Storage(src []float32, shape []int) *NVFloat4Storage

NewNVFloat4Storage creates an NVFloat4Storage by quantizing float32 data.

func (*NVFloat4Storage) ByteSize ¶ added in v0.3.0

func (s *NVFloat4Storage) ByteSize() int

ByteSize returns the total byte size of packed data + scales.

func (*NVFloat4Storage) Dequantize ¶ added in v0.3.0

func (s *NVFloat4Storage) Dequantize() []float32

Dequantize unpacks NVFP4 E2M1 data to float32.

func (*NVFloat4Storage) DeviceType ¶ added in v0.3.0

func (s *NVFloat4Storage) DeviceType() device.Type

DeviceType returns device.CPU.

func (*NVFloat4Storage) Len ¶ added in v0.3.0

func (s *NVFloat4Storage) Len() int

Len returns the number of logical float32 elements.

func (*NVFloat4Storage) NumBlocks ¶ added in v0.3.0

func (s *NVFloat4Storage) NumBlocks() int

NumBlocks returns the number of NVFP4 blocks.

func (*NVFloat4Storage) Quantize ¶ added in v0.3.0

func (s *NVFloat4Storage) Quantize(data []float32) error

Quantize encodes float32 data into NVFP4 E2M1 format with block scaling.

func (*NVFloat4Storage) Set ¶ added in v0.3.0

func (s *NVFloat4Storage) Set(data []float32)

Set re-quantizes from float32 data.

func (*NVFloat4Storage) Slice ¶ added in v0.3.0

func (s *NVFloat4Storage) Slice() []float32

Slice returns a dequantized float32 view of the data.

type Numeric ¶

type Numeric interface {
	~int | ~int8 | ~int16 | ~int32 | ~int64 |
		~uint | uint8 | ~uint32 | ~uint64 |
		~float32 | ~float64 |
		float8.Float8 |
		float16.Float16 |
		float16.BFloat16
}

Numeric defines the constraint for numeric types that can be used in Tensors.

type Q4KStorage ¶

type Q4KStorage struct {
	// contains filtered or unexported fields
}

Q4KStorage holds Q4_K quantized tensor data on CPU.

func MergeQ4KStorage ¶ added in v0.5.0

func MergeQ4KStorage(storages ...*Q4KStorage) *Q4KStorage

MergeQ4KStorage concatenates multiple Q4KStorage objects into one. Used to merge Q/K/V or Gate/Up weight matrices row-wise for single-GEMV optimization during inference decode.

func NewQ4KStorageFromRaw ¶

func NewQ4KStorageFromRaw(raw []byte, numElements int) (*Q4KStorage, error)

NewQ4KStorageFromRaw creates Q4KStorage from raw super-block data.

func QuantizeQ4K ¶ added in v1.2.0

func QuantizeQ4K(src []float32) *Q4KStorage

QuantizeQ4K quantizes float32 values into Q4_K format. Q4_K uses asymmetric quantization with per-sub-block 6-bit scales and mins, shared fp16 super-block scale and dmin. 256 values per super-block, 144 bytes.

func (*Q4KStorage) Dequantize ¶

func (q *Q4KStorage) Dequantize(dst []float32)

Dequantize unpacks all Q4_K super-blocks into dst.

func (*Q4KStorage) DequantizeSubBlock ¶ added in v0.13.0

func (q *Q4KStorage) DequantizeSubBlock(blkIdx, subIdx int, dst []float32)

DequantizeSubBlock dequantizes a single sub-block (32 values) from a super-block. blkIdx is the super-block index, subIdx is 0..7.

func (*Q4KStorage) DeviceType ¶

func (q *Q4KStorage) DeviceType() device.Type

DeviceType returns device.CPU.

func (*Q4KStorage) GPUPtr ¶

func (q *Q4KStorage) GPUPtr() (unsafe.Pointer, int, int)

GPUPtr returns the cached GPU device pointer, byte size, and device ID. Returns nil if no GPU copy exists.

func (*Q4KStorage) Len ¶

func (q *Q4KStorage) Len() int

Len returns the number of logical float32 elements.

func (*Q4KStorage) NumBlocks ¶

func (q *Q4KStorage) NumBlocks() int

NumBlocks returns the number of Q4_K super-blocks.

func (*Q4KStorage) RawBytes ¶

func (q *Q4KStorage) RawBytes() []byte

RawBytes returns the raw Q4_K super-block data for GPU upload. The layout is contiguous super-blocks, each 144 bytes.

func (*Q4KStorage) Set ¶

func (q *Q4KStorage) Set(_ []float32)

Set panics because Q4KStorage is immutable.

func (*Q4KStorage) SetGPUPtr ¶

func (q *Q4KStorage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)

SetGPUPtr stores a pre-uploaded GPU device pointer for the raw bytes.

func (*Q4KStorage) Slice ¶

func (q *Q4KStorage) Slice() []float32

Slice dequantizes and returns all elements as a float32 slice.

type Q4Storage ¶

type Q4Storage struct {
	// contains filtered or unexported fields
}

Q4Storage holds Q4_0 quantized tensor data on CPU.

func MergeQ4Storage ¶

func MergeQ4Storage(storages ...*Q4Storage) *Q4Storage

MergeQ4Storage concatenates multiple Q4Storage objects into one. Used to merge Q/K/V or Gate/Up weight matrices row-wise for single-GEMV optimization during inference decode.

func NewQ4StorageFromRaw ¶

func NewQ4StorageFromRaw(raw []byte, numElements int) (*Q4Storage, error)

NewQ4StorageFromRaw creates Q4Storage from raw block data in the standard Q4_0 format: 18 bytes per block (2 bytes float16 scale LE + 16 bytes packed nibbles). numElements is the number of logical float32 elements the data represents.

func QuantizeQ4 ¶

func QuantizeQ4(src []float32) *Q4Storage

QuantizeQ4 quantizes a float32 slice into Q4_0 format. The input is padded to a multiple of 32 if necessary.

func (*Q4Storage) BlockData ¶

func (q *Q4Storage) BlockData(i int) *byte

BlockData returns a pointer to the 16 packed bytes for block i.

func (*Q4Storage) BlockPtr ¶

func (q *Q4Storage) BlockPtr(i int) *byte

BlockPtr returns an unsafe pointer to block i's q4Block struct (18 bytes). The layout is: 2 bytes float16 scale (LE) + 16 bytes packed nibble data. Blocks are contiguous in memory with 18-byte stride.

func (*Q4Storage) BlockScaleF32 ¶

func (q *Q4Storage) BlockScaleF32(i int) float32

BlockScaleF32 returns the dequantization scale for block i as float32.

func (*Q4Storage) ByteSize ¶

func (q *Q4Storage) ByteSize() int

ByteSize returns the raw byte size of the quantized data. Each block is 18 bytes (2 byte scale + 16 bytes packed data).

func (*Q4Storage) Dequantize ¶

func (q *Q4Storage) Dequantize(dst []float32)

Dequantize unpacks Q4_0 blocks into dst. len(dst) must be >= q.Len(). Low nibbles map to the first half (positions 0-15) and high nibbles map to the second half (positions 16-31), matching llama.cpp's dequantize_row_q4_0.

func (*Q4Storage) DeviceType ¶

func (q *Q4Storage) DeviceType() device.Type

DeviceType returns device.CPU.

func (*Q4Storage) GPUPtr ¶

func (q *Q4Storage) GPUPtr() (unsafe.Pointer, int, int)

GPUPtr returns the cached GPU device pointer, byte size, and device ID. Returns nil if no GPU copy exists.

func (*Q4Storage) Len ¶

func (q *Q4Storage) Len() int

Len returns the number of logical float32 elements.

func (*Q4Storage) NumBlocks ¶

func (q *Q4Storage) NumBlocks() int

NumBlocks returns the number of Q4_0 blocks.

func (*Q4Storage) RawBytes ¶

func (q *Q4Storage) RawBytes() []byte

RawBytes serializes Q4_0 blocks as contiguous bytes for GPU upload. Each block is 18 bytes: 2 bytes little-endian float16 scale + 16 bytes packed data.

func (*Q4Storage) RawBytesGPU ¶

func (q *Q4Storage) RawBytesGPU(blocksPerRow int) []byte

RawBytesGPU serializes Q4_0 blocks in a GPU-optimized separated layout. The layout is global (not per-row), so it works regardless of how the weight matrix is logically viewed (before or after virtual transpose):

[all_scales: N * 2 bytes] [padding to 16-byte align] [all_data: N * 16 bytes]

The kernel indexes by block_idx = row * blocks_per_row + bi, which is the same linear block index regardless of the row definition.

blocksPerRow is unused but kept for API compatibility.

func (*Q4Storage) Set ¶

func (q *Q4Storage) Set(_ []float32)

Set is not supported on quantized storage (weights are immutable).

func (*Q4Storage) SetGPUPtr ¶

func (q *Q4Storage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)

SetGPUPtr stores a pre-uploaded GPU device pointer for the raw bytes. byteSize must match len(RawBytes()). The caller retains ownership of the pointer.

func (*Q4Storage) Slice ¶

func (q *Q4Storage) Slice() []float32

Slice returns a dequantized float32 view of the data. The result is cached: the first call dequantizes, subsequent calls return the same slice. This avoids O(N) re-allocation and GC pressure when operations like MatMul call Data() on Q4-backed weight tensors.

type Q5KStorage ¶

type Q5KStorage struct {
	// contains filtered or unexported fields
}

Q5KStorage holds Q5_K quantized tensor data on CPU.

func NewQ5KStorageFromRaw ¶

func NewQ5KStorageFromRaw(raw []byte, numElements int) (*Q5KStorage, error)

NewQ5KStorageFromRaw creates Q5KStorage from raw super-block data.

func (*Q5KStorage) BlockRaw ¶ added in v0.3.0

func (q *Q5KStorage) BlockRaw(blockIdx int) []byte

BlockRaw returns the raw bytes for the given super-block index. The caller must not modify the returned slice.

func (*Q5KStorage) Dequantize ¶

func (q *Q5KStorage) Dequantize(dst []float32)

Dequantize unpacks all Q5_K super-blocks into dst.

func (*Q5KStorage) DeviceType ¶

func (q *Q5KStorage) DeviceType() device.Type

DeviceType returns device.CPU.

func (*Q5KStorage) GPUPtr ¶ added in v0.3.0

func (q *Q5KStorage) GPUPtr() (unsafe.Pointer, int, int)

GPUPtr returns the cached GPU device pointer, byte size, and device ID. Returns nil if no GPU copy exists.

func (*Q5KStorage) Len ¶

func (q *Q5KStorage) Len() int

Len returns the number of logical float32 elements.

func (*Q5KStorage) NumBlocks ¶ added in v0.3.0

func (q *Q5KStorage) NumBlocks() int

NumBlocks returns the number of Q5_K super-blocks.

func (*Q5KStorage) RawBytes ¶ added in v0.3.0

func (q *Q5KStorage) RawBytes() []byte

RawBytes returns the raw Q5_K super-block data for GPU upload. The layout is contiguous super-blocks, each 176 bytes.

func (*Q5KStorage) Set ¶

func (q *Q5KStorage) Set(_ []float32)

Set panics because Q5KStorage is immutable.

func (*Q5KStorage) SetGPUPtr ¶ added in v0.3.0

func (q *Q5KStorage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)

SetGPUPtr stores a pre-uploaded GPU device pointer for the raw bytes.

func (*Q5KStorage) Slice ¶

func (q *Q5KStorage) Slice() []float32

Slice dequantizes and returns all elements as a float32 slice.

type Q5_0Storage ¶ added in v0.3.0

type Q5_0Storage struct {
	// contains filtered or unexported fields
}

Q5_0Storage holds Q5_0 quantized tensor data on CPU.

func NewQ5_0StorageFromRaw ¶ added in v0.3.0

func NewQ5_0StorageFromRaw(raw []byte, numElements int) (*Q5_0Storage, error)

NewQ5_0StorageFromRaw creates Q5_0Storage from raw block data.

func (*Q5_0Storage) Dequantize ¶ added in v0.3.0

func (q *Q5_0Storage) Dequantize(dst []float32)

Dequantize unpacks all Q5_0 blocks into dst.

func (*Q5_0Storage) DeviceType ¶ added in v0.3.0

func (q *Q5_0Storage) DeviceType() device.Type

DeviceType returns device.CPU.

func (*Q5_0Storage) GPUPtr ¶ added in v0.3.0

func (q *Q5_0Storage) GPUPtr() (unsafe.Pointer, int, int)

GPUPtr returns the cached GPU device pointer, byte size, and device ID. Returns nil if no GPU copy exists.

func (*Q5_0Storage) Len ¶ added in v0.3.0

func (q *Q5_0Storage) Len() int

Len returns the number of logical float32 elements.

func (*Q5_0Storage) NumBlocks ¶ added in v0.3.0

func (q *Q5_0Storage) NumBlocks() int

NumBlocks returns the number of Q5_0 blocks.

func (*Q5_0Storage) RawBytes ¶ added in v0.3.0

func (q *Q5_0Storage) RawBytes() []byte

RawBytes returns the raw Q5_0 block data for GPU upload. The layout is contiguous blocks, each 22 bytes.

func (*Q5_0Storage) RawBytesGPU ¶ added in v1.2.0

func (q *Q5_0Storage) RawBytesGPU() []byte

RawBytesGPU returns Q5_0 data in a separated GPU-optimized layout. Instead of interleaved 22-byte blocks [d(2) | qh(4) | qs(16)], the data is separated into three contiguous regions:

[all scales (fp16, 2B each)] pad to 16B
[all qh values (uint32, 4B each)] pad to 16B
[all qs values (16B each)]

This ensures natural alignment: fp16 scales at 2-byte boundaries, uint32 qh at 4-byte boundaries. Eliminates the byte-wise __ldg loads required for the interleaved layout on ARM64 Grace Hopper.

func (*Q5_0Storage) Set ¶ added in v0.3.0

func (q *Q5_0Storage) Set(_ []float32)

Set panics because Q5_0Storage is immutable.

func (*Q5_0Storage) SetGPUPtr ¶ added in v0.3.0

func (q *Q5_0Storage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)

SetGPUPtr stores a pre-uploaded GPU device pointer for the raw bytes.

func (*Q5_0Storage) Slice ¶ added in v0.3.0

func (q *Q5_0Storage) Slice() []float32

Slice dequantizes and returns all elements as a float32 slice.

type Q6KStorage ¶

type Q6KStorage struct {
	// contains filtered or unexported fields
}

Q6KStorage holds Q6_K quantized tensor data on CPU.

func MergeQ6KStorage ¶ added in v0.5.0

func MergeQ6KStorage(storages ...*Q6KStorage) *Q6KStorage

MergeQ6KStorage concatenates multiple Q6KStorage objects into one. Used to merge Q/K/V or Gate/Up weight matrices row-wise for single-GEMV optimization during inference decode.

func NewQ6KStorageFromRaw ¶

func NewQ6KStorageFromRaw(raw []byte, numElements int) (*Q6KStorage, error)

NewQ6KStorageFromRaw creates Q6KStorage from raw super-block data.

func (*Q6KStorage) BlockRaw ¶ added in v0.3.0

func (q *Q6KStorage) BlockRaw(blockIdx int) []byte

BlockRaw returns the raw bytes for the given super-block index. The caller must not modify the returned slice.

func (*Q6KStorage) Dequantize ¶

func (q *Q6KStorage) Dequantize(dst []float32)

Dequantize unpacks all Q6_K super-blocks into dst.

func (*Q6KStorage) DeviceType ¶

func (q *Q6KStorage) DeviceType() device.Type

DeviceType returns device.CPU.

func (*Q6KStorage) GPUPtr ¶ added in v0.3.0

func (q *Q6KStorage) GPUPtr() (unsafe.Pointer, int, int)

GPUPtr returns the GPU-resident copy pointer, byte size, and device ID.

func (*Q6KStorage) Len ¶

func (q *Q6KStorage) Len() int

Len returns the number of logical float32 elements.

func (*Q6KStorage) NumBlocks ¶ added in v0.3.0

func (q *Q6KStorage) NumBlocks() int

NumBlocks returns the number of Q6_K super-blocks.

func (*Q6KStorage) RawBytes ¶ added in v0.3.0

func (q *Q6KStorage) RawBytes() []byte

RawBytes returns the underlying Q6_K super-block data.

func (*Q6KStorage) Set ¶

func (q *Q6KStorage) Set(_ []float32)

Set panics because Q6KStorage is immutable.

func (*Q6KStorage) SetGPUPtr ¶ added in v0.3.0

func (q *Q6KStorage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)

SetGPUPtr stores a GPU-resident copy pointer for avoiding per-op H2D copies.

func (*Q6KStorage) Slice ¶

func (q *Q6KStorage) Slice() []float32

Slice dequantizes and returns all elements as a float32 slice.

type Q8Storage ¶

type Q8Storage struct {
	// contains filtered or unexported fields
}

Q8Storage holds Q8_0 quantized tensor data on CPU.

func NewQ8StorageFromBlocks ¶

func NewQ8StorageFromBlocks(scales []float32, quants []int8, numElements int) (*Q8Storage, error)

NewQ8StorageFromBlocks creates Q8Storage from pre-decoded block data. scales has one entry per block. quants has 32 int8 values per block (flattened). numElements is the number of logical float32 elements.

func QuantizeQ8 ¶

func QuantizeQ8(src []float32) *Q8Storage

QuantizeQ8 quantizes a float32 slice into Q8_0 format.

func (*Q8Storage) BlockQuants ¶

func (q *Q8Storage) BlockQuants(i int) []int8

BlockQuants returns the int8 quantized values for block i.

func (*Q8Storage) BlockScale ¶

func (q *Q8Storage) BlockScale(i int) float32

BlockScale returns the float32 scale for block i.

func (*Q8Storage) ByteSize ¶

func (q *Q8Storage) ByteSize() int

ByteSize returns the raw byte size of the quantized data.

func (*Q8Storage) Dequantize ¶

func (q *Q8Storage) Dequantize(dst []float32)

Dequantize unpacks Q8_0 blocks into dst.

func (*Q8Storage) DequantizeBlock ¶

func (q *Q8Storage) DequantizeBlock(blockIdx int, dst *[32]float32)

DequantizeBlock unpacks a single Q8_0 block into a 32-element buffer.

func (*Q8Storage) DequantizeRange ¶

func (q *Q8Storage) DequantizeRange(dst []float32, start, count int)

DequantizeRange unpacks Q8_0 blocks covering the range [start, start+count) into dst, which must have length >= count.

func (*Q8Storage) DeviceType ¶

func (q *Q8Storage) DeviceType() device.Type

DeviceType returns device.CPU.

func (*Q8Storage) GPUPtr ¶

func (q *Q8Storage) GPUPtr() (unsafe.Pointer, int, int)

GPUPtr returns the cached GPU device pointer, byte size, and device ID. Returns nil if no GPU copy exists.

func (*Q8Storage) Len ¶

func (q *Q8Storage) Len() int

Len returns the number of logical float32 elements.

func (*Q8Storage) NumBlocks ¶

func (q *Q8Storage) NumBlocks() int

NumBlocks returns the number of Q8_0 blocks.

func (*Q8Storage) RawBytes ¶

func (q *Q8Storage) RawBytes() []byte

RawBytes serializes Q8_0 blocks as contiguous bytes for GPU upload. Each block is 36 bytes: 4 bytes little-endian float32 scale + 32 bytes int8 data.

func (*Q8Storage) Set ¶

func (q *Q8Storage) Set(_ []float32)

Set is not supported on quantized storage (weights are immutable).

func (*Q8Storage) SetGPUPtr ¶

func (q *Q8Storage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)

SetGPUPtr stores a pre-uploaded GPU device pointer for the raw bytes. byteSize must match len(RawBytes()). The caller retains ownership of the pointer.

func (*Q8Storage) Slice ¶

func (q *Q8Storage) Slice() []float32

Slice returns a dequantized float32 copy of the data.

type Storage ¶

type Storage[T Numeric] interface {
	// Len returns the number of elements.
	Len() int
	// Slice returns a CPU-accessible []T.
	Slice() []T
	// Set replaces the storage contents from a CPU slice.
	Set(data []T)
	// DeviceType returns the device type this storage resides on.
	DeviceType() device.Type
}

Storage abstracts over CPU and GPU tensor data storage. For CPU storage, Slice() returns the underlying slice directly (zero copy). For GPU storage, Slice() copies device memory to a new host slice.

type Tensor ¶

type Tensor interface {
	Shape() []int
	DType() reflect.Type
	// contains filtered or unexported methods
}

Tensor is an interface that all concrete tensor types must implement. This allows the graph to be type-agnostic at a high level.

func NewFromType ¶

func NewFromType(t reflect.Type, shape []int, data any) (Tensor, error)

NewFromType creates a new tensor of a specific reflect.Type. This is used when the concrete generic type is not known at compile time.

type TensorBool ¶

type TensorBool struct {
	// contains filtered or unexported fields
}

TensorBool represents an n-dimensional array of booleans.

func NewBool ¶

func NewBool(shape []int, data []bool) (*TensorBool, error)

NewBool creates a new TensorBool with the given shape and initializes it with the provided data.

func (*TensorBool) Bytes ¶

func (t *TensorBool) Bytes() ([]byte, error)

Bytes returns the underlying data of the tensor as a byte slice.

func (*TensorBool) DType ¶

func (t *TensorBool) DType() reflect.Type

DType returns the reflect.Type of the tensor's elements.

func (*TensorBool) Data ¶

func (t *TensorBool) Data() []bool

Data returns a slice representing the underlying data of the tensor.

func (*TensorBool) Shape ¶

func (t *TensorBool) Shape() []int

Shape returns a copy of the tensor's shape.

type TensorNumeric ¶

type TensorNumeric[T Numeric] struct {
	// contains filtered or unexported fields
}

TensorNumeric represents an n-dimensional array of a generic numeric type T.

Note: The name includes the package term "Tensor" which may appear as stutter (tensor.TensorNumeric). This is intentional for clarity and API stability.

func New ¶

func New[T Numeric](shape []int, data []T) (*TensorNumeric[T], error)

New creates a new TensorNumeric with the given shape and initializes it with the provided data.

func NewFromBytes ¶

func NewFromBytes[T Numeric](shape []int, data []byte) (*TensorNumeric[T], error)

NewFromBytes creates a new tensor from bytes data with the given shape.

func NewWithStorage ¶

func NewWithStorage[T Numeric](shape []int, s Storage[T]) (*TensorNumeric[T], error)

NewWithStorage creates a TensorNumeric backed by the given Storage. This allows creating tensors with GPUStorage or any other Storage implementation.

func ToCPU ¶

func ToCPU[T Numeric](t *TensorNumeric[T]) *TensorNumeric[T]

ToCPU creates a new tensor with CPUStorage containing the same data as the source tensor. Shape and strides are preserved. The source tensor is not modified.

func ToGPU ¶

func ToGPU[T Numeric](t *TensorNumeric[T]) (*TensorNumeric[T], error)

ToGPU creates a new tensor with GPUStorage on device 0 containing the same data as the source tensor. Shape and strides are preserved. The source tensor is not modified.

func ToGPUDevice ¶

func ToGPUDevice[T Numeric](t *TensorNumeric[T], deviceID int) (*TensorNumeric[T], error)

ToGPUDevice creates a new tensor with GPUStorage on the specified device containing the same data as the source tensor. If the source tensor is already on a GPU, a peer-to-peer D2D copy is used when the devices differ; if on the same device, a D2D copy is performed. If the source is CPU-backed, an H2D copy targets the specified device.

func (*TensorNumeric[T]) At ¶

func (t *TensorNumeric[T]) At(indices ...int) (T, error)

At retrieves the value at the specified indices. It returns an error if the number of indices does not match the tensor's dimensions or if any index is out of bounds.

func (*TensorNumeric[T]) Bytes ¶

func (t *TensorNumeric[T]) Bytes() ([]byte, error)

Bytes returns the underlying data of the tensor as a byte slice.

func (*TensorNumeric[T]) Copy ¶

func (t *TensorNumeric[T]) Copy() *TensorNumeric[T]

Copy creates a deep copy of the tensor.

func (*TensorNumeric[T]) DType ¶

func (t *TensorNumeric[T]) DType() reflect.Type

DType returns the reflect.Type of the tensor's elements.

func (*TensorNumeric[T]) Data ¶

func (t *TensorNumeric[T]) Data() []T

Data returns a slice representing the underlying data of the tensor. For views, this returns only the data visible through the view.

func (*TensorNumeric[T]) Dims ¶

func (t *TensorNumeric[T]) Dims() int

Dims returns the number of dimensions of the tensor.

func (*TensorNumeric[T]) Each ¶

func (t *TensorNumeric[T]) Each(fn func(T))

Each applies a function to each element of the tensor.

func (*TensorNumeric[T]) GetStorage ¶

func (t *TensorNumeric[T]) GetStorage() Storage[T]

GetStorage returns the underlying storage of the tensor.

func (*TensorNumeric[T]) Release ¶

func (t *TensorNumeric[T]) Release()

Release frees any external resources held by this tensor's storage. For CPU tensors this is a no-op. For GPU tensors it frees device memory. After calling Release the tensor must not be used.

func (*TensorNumeric[T]) Reshape ¶

func (t *TensorNumeric[T]) Reshape(newShape []int) (*TensorNumeric[T], error)

Reshape returns a new TensorNumeric with a different shape that shares the same underlying data. The new shape must have the same total number of elements as the original tensor. This operation is a "view" and does not copy the data.

func (*TensorNumeric[T]) Set ¶

func (t *TensorNumeric[T]) Set(value T, indices ...int) error

Set updates the value at the specified indices. It returns an error if the number of indices does not match the tensor's dimensions, if any index is out of bounds, or if the tensor is a read-only view.

func (*TensorNumeric[T]) SetData ¶

func (t *TensorNumeric[T]) SetData(data []T)

SetData sets the underlying data of the tensor.

func (*TensorNumeric[T]) SetShape ¶

func (t *TensorNumeric[T]) SetShape(shape []int)

SetShape sets the tensor's shape.

func (*TensorNumeric[T]) SetStorage ¶

func (t *TensorNumeric[T]) SetStorage(s Storage[T])

SetStorage replaces the underlying storage of the tensor.

func (*TensorNumeric[T]) SetStrides ¶

func (t *TensorNumeric[T]) SetStrides(strides []int)

SetStrides sets the tensor's strides.

func (*TensorNumeric[T]) Shape ¶

func (t *TensorNumeric[T]) Shape() []int

Shape returns a copy of the tensor's shape.

func (*TensorNumeric[T]) ShapeEquals ¶

func (t *TensorNumeric[T]) ShapeEquals(other *TensorNumeric[T]) bool

ShapeEquals returns true if the shapes of two tensors are identical.

func (*TensorNumeric[T]) Size ¶

func (t *TensorNumeric[T]) Size() int

Size returns the total number of elements in the tensor.

func (*TensorNumeric[T]) Slice ¶

func (t *TensorNumeric[T]) Slice(ranges ...[2]int) (*TensorNumeric[T], error)

Slice creates a new TensorNumeric view for the specified range. A slice is defined by a start and end index for each dimension. The returned tensor shares the same underlying data.

func (*TensorNumeric[T]) Strides ¶

func (t *TensorNumeric[T]) Strides() []int

Strides returns a copy of the tensor's strides.

func (*TensorNumeric[T]) String ¶

func (t *TensorNumeric[T]) String() string

String returns a string representation of the tensor.

type TensorString ¶

type TensorString struct {
	// contains filtered or unexported fields
}

TensorString represents an n-dimensional array of strings.

func NewString ¶

func NewString(shape []int, data []string) (*TensorString, error)

NewString creates a new TensorString with the given shape and initializes it with the provided data.

func (*TensorString) DType ¶

func (t *TensorString) DType() reflect.Type

DType returns the reflect.Type of the tensor's elements.

func (*TensorString) Data ¶

func (t *TensorString) Data() []string

Data returns a slice representing the underlying data of the tensor.

func (*TensorString) Shape ¶

func (t *TensorString) Shape() []int

Shape returns a copy of the tensor's shape.

type TernaryStorage ¶ added in v0.11.0

type TernaryStorage struct {
	// contains filtered or unexported fields
}

TernaryStorage packs ternary weights {-1, 0, 1} into 2 bits per value. Each byte holds 4 values. Encoding: 00=-1, 01=0, 10=1.

func NewTernaryStorage ¶ added in v0.11.0

func NewTernaryStorage(size int) *TernaryStorage

NewTernaryStorage creates a TernaryStorage that can hold size ternary values. All values are initialized to zero.

func NewTernaryStorageFrom ¶ added in v0.11.0

func NewTernaryStorageFrom(values []int8) *TernaryStorage

NewTernaryStorageFrom creates a TernaryStorage from a slice of int8 values. Each value must be -1, 0, or 1; otherwise the function panics.

func (*TernaryStorage) DeviceType ¶ added in v0.11.0

func (s *TernaryStorage) DeviceType() device.Type

DeviceType returns device.CPU.

func (*TernaryStorage) Get ¶ added in v0.11.0

func (s *TernaryStorage) Get(i int) int8

Get returns the ternary value at index i as -1, 0, or 1.

func (*TernaryStorage) Len ¶ added in v0.11.0

func (s *TernaryStorage) Len() int

Len returns the number of ternary values stored.

func (*TernaryStorage) RawBytes ¶ added in v0.11.0

func (s *TernaryStorage) RawBytes() []byte

RawBytes returns the underlying packed byte slice.

func (*TernaryStorage) Set ¶ added in v0.11.0

func (s *TernaryStorage) Set(data []float32)

Set replaces the storage contents by quantizing a float32 slice to ternary values. Each value is rounded to the nearest of {-1, 0, 1}. This satisfies the Storage[float32] interface.

func (*TernaryStorage) SetElement ¶ added in v0.12.0

func (s *TernaryStorage) SetElement(i int, val int8)

SetElement stores a ternary value (-1, 0, or 1) at index i. Panics if val is not in {-1, 0, 1} or i is out of range.

func (*TernaryStorage) Slice ¶ added in v0.11.0

func (s *TernaryStorage) Slice() []float32

Slice dequantizes all ternary values to a float32 slice.

type W8A8Storage ¶ added in v0.3.0

type W8A8Storage struct {
	// contains filtered or unexported fields
}

W8A8Storage holds W8A8 symmetric INT8 quantized tensor data on CPU.

func NewW8A8StorageFromBlocks ¶ added in v0.3.0

func NewW8A8StorageFromBlocks(scales []float32, quants []int8, numElements int) (*W8A8Storage, error)

NewW8A8StorageFromBlocks creates W8A8Storage from pre-decoded block data. scales has one entry per group. quants has w8a8GroupSize int8 values per group (flattened). numElements is the number of logical float32 elements.

func QuantizeW8A8 ¶ added in v0.3.0

func QuantizeW8A8(src []float32) *W8A8Storage

QuantizeW8A8 quantizes a float32 slice into W8A8 symmetric INT8 format. groupSize is fixed at 32 elements per group.

func (*W8A8Storage) BlockQuants ¶ added in v0.3.0

func (s *W8A8Storage) BlockQuants(i int) []int8

BlockQuants returns the int8 quantized values for group i.

func (*W8A8Storage) BlockScale ¶ added in v0.3.0

func (s *W8A8Storage) BlockScale(i int) float32

BlockScale returns the float32 scale for group i.

func (*W8A8Storage) ByteSize ¶ added in v0.3.0

func (s *W8A8Storage) ByteSize() int

ByteSize returns the raw byte size of the quantized data. Each group: 4 bytes float32 scale + 32 bytes int8 data = 36 bytes.

func (*W8A8Storage) Dequantize ¶ added in v0.3.0

func (s *W8A8Storage) Dequantize(dst []float32)

Dequantize unpacks W8A8 groups into dst. len(dst) must be >= s.Len().

func (*W8A8Storage) DequantizeBlock ¶ added in v0.3.0

func (s *W8A8Storage) DequantizeBlock(blockIdx int, dst *[w8a8GroupSize]float32)

DequantizeBlock unpacks a single W8A8 group into a 32-element buffer.

func (*W8A8Storage) DeviceType ¶ added in v0.3.0

func (s *W8A8Storage) DeviceType() device.Type

DeviceType returns device.CPU.

func (*W8A8Storage) GPUPtr ¶ added in v0.3.0

func (s *W8A8Storage) GPUPtr() (unsafe.Pointer, int, int)

GPUPtr returns the cached GPU device pointer, byte size, and device ID.

func (*W8A8Storage) GroupSize ¶ added in v0.3.0

func (s *W8A8Storage) GroupSize() int

GroupSize returns the number of elements per group (always 32).

func (*W8A8Storage) Len ¶ added in v0.3.0

func (s *W8A8Storage) Len() int

Len returns the number of logical float32 elements.

func (*W8A8Storage) NumGroups ¶ added in v0.3.0

func (s *W8A8Storage) NumGroups() int

NumGroups returns the number of quantization groups.

func (*W8A8Storage) RawBytes ¶ added in v0.3.0

func (s *W8A8Storage) RawBytes() []byte

RawBytes serializes W8A8 groups as contiguous bytes for GPU upload. Each group is 36 bytes: 4 bytes little-endian float32 scale + 32 bytes int8 data.

func (*W8A8Storage) Set ¶ added in v0.3.0

func (s *W8A8Storage) Set(_ []float32)

Set is not supported on quantized storage (weights are immutable).

func (*W8A8Storage) SetGPUPtr ¶ added in v0.3.0

func (s *W8A8Storage) SetGPUPtr(ptr unsafe.Pointer, byteSize, deviceID int)

SetGPUPtr stores a pre-uploaded GPU device pointer for the raw bytes.

func (*W8A8Storage) Slice ¶ added in v0.3.0

func (s *W8A8Storage) Slice() []float32

Slice returns a dequantized float32 view of the data.

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL