Documentation
¶
Overview ¶
Package numeric provides precision types, arithmetic operations, and generic constraints for the Zerfoo ML framework. It serves as the foundation layer supporting float8, float16, float32, and float64 numeric types with IEEE 754 compliance.
Index ¶
- func Float8TestData() struct{ ... }
- func Float16TestData() struct{ ... }
- func Pack4BitSlice(values []uint8) ([]uint8, error)
- func Pack4BitWeights(low4, high4 uint8) (uint8, error)
- func TestArithmeticOp[T any](t *testing.T, opName string, op func(T, T) T, equal func(T, T) bool, ...)
- func TestLeakyReLUOp[T any](t *testing.T, opName string, op func(T, float64) T, toFloat32 func(T) float32, ...)
- func TestSumOp[T any](t *testing.T, op func([]T) T, toFloat32 func(T) float32, ...)
- func TestUnaryOp[T any](t *testing.T, opName string, op func(T) T, equal func(T, T) bool, ...)
- func Unpack4BitSlice(packed []uint8) []uint8
- func Unpack4BitWeights(packed uint8) (uint8, uint8)
- type Arithmetic
- type ArithmeticTestCase
- type BFloat16Ops
- func (ops BFloat16Ops) Abs(x float16.BFloat16) float16.BFloat16
- func (ops BFloat16Ops) Add(a, b float16.BFloat16) float16.BFloat16
- func (ops BFloat16Ops) Div(a, b float16.BFloat16) float16.BFloat16
- func (ops BFloat16Ops) Exp(x float16.BFloat16) float16.BFloat16
- func (ops BFloat16Ops) FromFloat32(f float32) float16.BFloat16
- func (ops BFloat16Ops) FromFloat64(f float64) float16.BFloat16
- func (ops BFloat16Ops) GreaterThan(a, b float16.BFloat16) bool
- func (ops BFloat16Ops) IsZero(v float16.BFloat16) bool
- func (ops BFloat16Ops) LeakyReLU(x float16.BFloat16, alpha float64) float16.BFloat16
- func (ops BFloat16Ops) LeakyReLUGrad(x float16.BFloat16, alpha float64) float16.BFloat16
- func (ops BFloat16Ops) Log(x float16.BFloat16) float16.BFloat16
- func (ops BFloat16Ops) Mul(a, b float16.BFloat16) float16.BFloat16
- func (ops BFloat16Ops) One() float16.BFloat16
- func (ops BFloat16Ops) Pow(base, exponent float16.BFloat16) float16.BFloat16
- func (ops BFloat16Ops) ReLU(x float16.BFloat16) float16.BFloat16
- func (ops BFloat16Ops) ReLUGrad(x float16.BFloat16) float16.BFloat16
- func (ops BFloat16Ops) Sigmoid(x float16.BFloat16) float16.BFloat16
- func (ops BFloat16Ops) SigmoidGrad(x float16.BFloat16) float16.BFloat16
- func (ops BFloat16Ops) Sqrt(x float16.BFloat16) float16.BFloat16
- func (ops BFloat16Ops) Sub(a, b float16.BFloat16) float16.BFloat16
- func (ops BFloat16Ops) Sum(s []float16.BFloat16) float16.BFloat16
- func (ops BFloat16Ops) Tanh(x float16.BFloat16) float16.BFloat16
- func (ops BFloat16Ops) TanhGrad(x float16.BFloat16) float16.BFloat16
- type Float8Ops
- func (ops Float8Ops) Abs(x float8.Float8) float8.Float8
- func (ops Float8Ops) Add(a, b float8.Float8) float8.Float8
- func (ops Float8Ops) Div(a, b float8.Float8) float8.Float8
- func (ops Float8Ops) Exp(x float8.Float8) float8.Float8
- func (ops Float8Ops) FromFloat32(f float32) float8.Float8
- func (ops Float8Ops) FromFloat64(f float64) float8.Float8
- func (ops Float8Ops) GreaterThan(a, b float8.Float8) bool
- func (ops Float8Ops) IsZero(v float8.Float8) bool
- func (ops Float8Ops) LeakyReLU(x float8.Float8, alpha float64) float8.Float8
- func (ops Float8Ops) LeakyReLUGrad(x float8.Float8, alpha float64) float8.Float8
- func (ops Float8Ops) Log(x float8.Float8) float8.Float8
- func (ops Float8Ops) Mul(a, b float8.Float8) float8.Float8
- func (ops Float8Ops) One() float8.Float8
- func (ops Float8Ops) Pow(base, exponent float8.Float8) float8.Float8
- func (ops Float8Ops) ReLU(x float8.Float8) float8.Float8
- func (ops Float8Ops) ReLUGrad(x float8.Float8) float8.Float8
- func (ops Float8Ops) Sigmoid(x float8.Float8) float8.Float8
- func (ops Float8Ops) SigmoidGrad(x float8.Float8) float8.Float8
- func (ops Float8Ops) Sqrt(x float8.Float8) float8.Float8
- func (ops Float8Ops) Sub(a, b float8.Float8) float8.Float8
- func (ops Float8Ops) Sum(s []float8.Float8) float8.Float8
- func (ops Float8Ops) Tanh(x float8.Float8) float8.Float8
- func (ops Float8Ops) TanhGrad(x float8.Float8) float8.Float8
- func (ops Float8Ops) ToFloat32(t float8.Float8) float32
- type Float16Ops
- func (ops Float16Ops) Abs(x float16.Float16) float16.Float16
- func (ops Float16Ops) Add(a, b float16.Float16) float16.Float16
- func (ops Float16Ops) Div(a, b float16.Float16) float16.Float16
- func (ops Float16Ops) Exp(x float16.Float16) float16.Float16
- func (ops Float16Ops) FromFloat32(f float32) float16.Float16
- func (ops Float16Ops) FromFloat64(f float64) float16.Float16
- func (ops Float16Ops) GreaterThan(a, b float16.Float16) bool
- func (ops Float16Ops) IsZero(v float16.Float16) bool
- func (ops Float16Ops) LeakyReLU(x float16.Float16, alpha float64) float16.Float16
- func (ops Float16Ops) LeakyReLUGrad(x float16.Float16, alpha float64) float16.Float16
- func (ops Float16Ops) Log(x float16.Float16) float16.Float16
- func (ops Float16Ops) Mul(a, b float16.Float16) float16.Float16
- func (ops Float16Ops) One() float16.Float16
- func (ops Float16Ops) Pow(base, exponent float16.Float16) float16.Float16
- func (ops Float16Ops) ReLU(x float16.Float16) float16.Float16
- func (ops Float16Ops) ReLUGrad(x float16.Float16) float16.Float16
- func (ops Float16Ops) Sigmoid(x float16.Float16) float16.Float16
- func (ops Float16Ops) SigmoidGrad(x float16.Float16) float16.Float16
- func (ops Float16Ops) Sqrt(x float16.Float16) float16.Float16
- func (ops Float16Ops) Sub(a, b float16.Float16) float16.Float16
- func (ops Float16Ops) Sum(s []float16.Float16) float16.Float16
- func (ops Float16Ops) Tanh(x float16.Float16) float16.Float16
- func (ops Float16Ops) TanhGrad(x float16.Float16) float16.Float16
- func (ops Float16Ops) ToFloat32(t float16.Float16) float32
- type Float32Ops
- func (ops Float32Ops) Abs(x float32) float32
- func (ops Float32Ops) Add(a, b float32) float32
- func (ops Float32Ops) Div(a, b float32) float32
- func (ops Float32Ops) Exp(x float32) float32
- func (ops Float32Ops) FromFloat32(f float32) float32
- func (ops Float32Ops) FromFloat64(f float64) float32
- func (ops Float32Ops) GreaterThan(a, b float32) bool
- func (ops Float32Ops) IsZero(v float32) bool
- func (ops Float32Ops) LeakyReLU(x float32, alpha float64) float32
- func (ops Float32Ops) LeakyReLUGrad(x float32, alpha float64) float32
- func (ops Float32Ops) Log(x float32) float32
- func (ops Float32Ops) Mul(a, b float32) float32
- func (ops Float32Ops) One() float32
- func (ops Float32Ops) Pow(base, exponent float32) float32
- func (ops Float32Ops) ReLU(x float32) float32
- func (ops Float32Ops) ReLUGrad(x float32) float32
- func (ops Float32Ops) Sigmoid(x float32) float32
- func (ops Float32Ops) SigmoidGrad(x float32) float32
- func (ops Float32Ops) Sqrt(x float32) float32
- func (ops Float32Ops) Sub(a, b float32) float32
- func (ops Float32Ops) Sum(s []float32) float32
- func (ops Float32Ops) Tanh(x float32) float32
- func (ops Float32Ops) TanhGrad(x float32) float32
- func (ops Float32Ops) ToFloat32(t float32) float32
- type Float64Ops
- func (ops Float64Ops) Abs(x float64) float64
- func (ops Float64Ops) Add(a, b float64) float64
- func (ops Float64Ops) Div(a, b float64) float64
- func (ops Float64Ops) Exp(x float64) float64
- func (ops Float64Ops) FromFloat32(f float32) float64
- func (ops Float64Ops) FromFloat64(f float64) float64
- func (ops Float64Ops) GreaterThan(a, b float64) bool
- func (ops Float64Ops) IsZero(v float64) bool
- func (ops Float64Ops) LeakyReLU(x, alpha float64) float64
- func (ops Float64Ops) LeakyReLUGrad(x, alpha float64) float64
- func (ops Float64Ops) Log(x float64) float64
- func (ops Float64Ops) Mul(a, b float64) float64
- func (ops Float64Ops) One() float64
- func (ops Float64Ops) Pow(base, exponent float64) float64
- func (ops Float64Ops) ReLU(x float64) float64
- func (ops Float64Ops) ReLUGrad(x float64) float64
- func (ops Float64Ops) Sigmoid(x float64) float64
- func (ops Float64Ops) SigmoidGrad(x float64) float64
- func (ops Float64Ops) Sqrt(x float64) float64
- func (ops Float64Ops) Sub(a, b float64) float64
- func (ops Float64Ops) Sum(s []float64) float64
- func (ops Float64Ops) Tanh(x float64) float64
- func (ops Float64Ops) TanhGrad(x float64) float64
- func (ops Float64Ops) ToFloat32(t float64) float32
- type Int8Ops
- func (ops Int8Ops) Abs(x int8) int8
- func (ops Int8Ops) Add(a, b int8) int8
- func (ops Int8Ops) Div(a, b int8) int8
- func (ops Int8Ops) Exp(x int8) int8
- func (ops Int8Ops) FromFloat32(f float32) int8
- func (ops Int8Ops) FromFloat64(f float64) int8
- func (ops Int8Ops) GreaterThan(a, b int8) bool
- func (ops Int8Ops) IsZero(v int8) bool
- func (ops Int8Ops) LeakyReLU(x int8, alpha float64) int8
- func (ops Int8Ops) LeakyReLUGrad(x int8, alpha float64) int8
- func (ops Int8Ops) Log(x int8) int8
- func (ops Int8Ops) Mul(a, b int8) int8
- func (ops Int8Ops) One() int8
- func (ops Int8Ops) Pow(base, exponent int8) int8
- func (ops Int8Ops) ReLU(x int8) int8
- func (ops Int8Ops) ReLUGrad(x int8) int8
- func (ops Int8Ops) Sigmoid(x int8) int8
- func (ops Int8Ops) SigmoidGrad(x int8) int8
- func (ops Int8Ops) Sqrt(x int8) int8
- func (ops Int8Ops) Sub(a, b int8) int8
- func (ops Int8Ops) Sum(s []int8) int8
- func (ops Int8Ops) Tanh(x int8) int8
- func (ops Int8Ops) TanhGrad(x int8) int8
- type IntOps
- func (IntOps) Abs(x int) int
- func (IntOps) Add(a, b int) int
- func (IntOps) Div(a, b int) int
- func (IntOps) Exp(x int) int
- func (IntOps) FromFloat32(f float32) int
- func (IntOps) FromFloat64(f float64) int
- func (IntOps) GreaterThan(a, b int) bool
- func (IntOps) IsZero(v int) bool
- func (IntOps) LeakyReLU(x int, alpha float64) int
- func (IntOps) LeakyReLUGrad(x int, alpha float64) int
- func (IntOps) Log(x int) int
- func (IntOps) Mul(a, b int) int
- func (IntOps) One() int
- func (IntOps) Pow(base, exponent int) int
- func (IntOps) ReLU(x int) int
- func (IntOps) ReLUGrad(x int) int
- func (IntOps) Sigmoid(x int) int
- func (IntOps) SigmoidGrad(x int) int
- func (IntOps) Sqrt(x int) int
- func (IntOps) Sub(a, b int) int
- func (IntOps) Sum(s []int) int
- func (IntOps) Tanh(x int) int
- func (IntOps) TanhGrad(x int) int
- func (IntOps) ToFloat32(t int) float32
- type LeakyReLUTestCase
- type QuantizationConfig
- func (qc *QuantizationConfig) Dequantize(quantized uint8) float32
- func (qc *QuantizationConfig) Dequantize4BitWeights(packed []uint8) []float32
- func (qc *QuantizationConfig) DequantizeSlice(quantized []uint8) []float32
- func (qc *QuantizationConfig) QuantizationError(original []float32) float64
- func (qc *QuantizationConfig) Quantize(value float32) uint8
- func (qc *QuantizationConfig) QuantizeSlice(values []float32) []uint8
- func (qc *QuantizationConfig) ValidateQuantizationRoundTrip(values []float32, tolerance float64) error
- type SumTestCase
- type Uint8Ops
- func (ops Uint8Ops) Abs(x uint8) uint8
- func (ops Uint8Ops) Add(a, b uint8) uint8
- func (ops Uint8Ops) Div(a, b uint8) uint8
- func (ops Uint8Ops) Exp(x uint8) uint8
- func (ops Uint8Ops) FromFloat32(f float32) uint8
- func (ops Uint8Ops) FromFloat64(f float64) uint8
- func (ops Uint8Ops) GreaterThan(a, b uint8) bool
- func (ops Uint8Ops) IsZero(v uint8) bool
- func (ops Uint8Ops) LeakyReLU(x uint8, alpha float64) uint8
- func (ops Uint8Ops) LeakyReLUGrad(x uint8, alpha float64) uint8
- func (ops Uint8Ops) Log(x uint8) uint8
- func (ops Uint8Ops) Mul(a, b uint8) uint8
- func (ops Uint8Ops) One() uint8
- func (ops Uint8Ops) Pow(base, exponent uint8) uint8
- func (ops Uint8Ops) ReLU(x uint8) uint8
- func (ops Uint8Ops) ReLUGrad(x uint8) uint8
- func (ops Uint8Ops) Sigmoid(x uint8) uint8
- func (ops Uint8Ops) SigmoidGrad(x uint8) uint8
- func (ops Uint8Ops) Sqrt(x uint8) uint8
- func (ops Uint8Ops) Sub(a, b uint8) uint8
- func (ops Uint8Ops) Sum(s []uint8) uint8
- func (ops Uint8Ops) Tanh(x uint8) uint8
- func (ops Uint8Ops) TanhGrad(x uint8) uint8
- type UnaryTestCase
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func Float8TestData ¶
func Float8TestData() struct {
Add []ArithmeticTestCase[float8.Float8]
Mul []ArithmeticTestCase[float8.Float8]
Div []ArithmeticTestCase[float8.Float8]
Tanh []UnaryTestCase[float8.Float8]
Sigmoid []UnaryTestCase[float8.Float8]
LeakyReLU []LeakyReLUTestCase[float8.Float8]
LeakyReLUGrad []LeakyReLUTestCase[float8.Float8]
Sum []SumTestCase[float8.Float8]
}
Float8TestData provides common test data for float8 operations.
func Float16TestData ¶
func Float16TestData() struct {
Add []ArithmeticTestCase[float16.Float16]
Mul []ArithmeticTestCase[float16.Float16]
Div []ArithmeticTestCase[float16.Float16]
Tanh []UnaryTestCase[float16.Float16]
Sigmoid []UnaryTestCase[float16.Float16]
LeakyReLU []LeakyReLUTestCase[float16.Float16]
LeakyReLUGrad []LeakyReLUTestCase[float16.Float16]
Sum []SumTestCase[float16.Float16]
}
Float16TestData provides common test data for float16 operations.
func Pack4BitSlice ¶
Pack4BitSlice packs a slice of 4-bit values into uint8 array Input length must be even. Each pair of values is packed into one uint8.
func Pack4BitWeights ¶
Pack4BitWeights packs two 4-bit values into a single uint8 This is used for MatMulNBits where weights are stored as 4-bit values
func TestArithmeticOp ¶
func TestArithmeticOp[T any](t *testing.T, opName string, op func(T, T) T, equal func(T, T) bool, tests []ArithmeticTestCase[T])
TestArithmeticOp tests a binary arithmetic operation.
func TestLeakyReLUOp ¶
func TestLeakyReLUOp[T any](t *testing.T, opName string, op func(T, float64) T, toFloat32 func(T) float32, tests []LeakyReLUTestCase[T])
TestLeakyReLUOp tests LeakyReLU operations with epsilon tolerance.
func TestSumOp ¶
func TestSumOp[T any](t *testing.T, op func([]T) T, toFloat32 func(T) float32, tests []SumTestCase[T])
TestSumOp tests sum operations with epsilon tolerance.
func TestUnaryOp ¶
func TestUnaryOp[T any](t *testing.T, opName string, op func(T) T, equal func(T, T) bool, tests []UnaryTestCase[T])
TestUnaryOp tests a unary operation.
func Unpack4BitSlice ¶
Unpack4BitSlice unpacks a slice of uint8 values into 4-bit weights Each uint8 contains two 4-bit values, so output length is 2x input length
func Unpack4BitWeights ¶
Unpack4BitWeights extracts two 4-bit values from a single uint8 Returns (low4, high4) where low4 is bits [0:3] and high4 is bits [4:7]
Types ¶
type Arithmetic ¶
type Arithmetic[T any] interface { // Basic binary operations Add(a, b T) T Sub(a, b T) T Mul(a, b T) T Div(a, b T) T // Activation functions and their derivatives Tanh(x T) T Sigmoid(x T) T ReLU(x T) T LeakyReLU(x T, alpha float64) T TanhGrad(x T) T // Derivative of Tanh SigmoidGrad(x T) T // Derivative of Sigmoid ReLUGrad(x T) T LeakyReLUGrad(x T, alpha float64) T // Conversion from standard types FromFloat32(f float32) T FromFloat64(f float64) T One() T // IsZero checks if a value is zero. IsZero(v T) bool // Abs returns the absolute value of x. Abs(x T) T // Sum returns the sum of all elements in the slice. Sum(s []T) T // Exp returns e**x. Exp(x T) T // Log returns the natural logarithm of x. Log(x T) T // Pow returns base**exponent. Pow(base, exponent T) T // Sqrt returns the square root of x. Sqrt(x T) T // GreaterThan returns true if a is greater than b. GreaterThan(a, b T) bool }
Arithmetic defines a generic interface for all mathematical operations required by the compute engine. This allows the engine to be completely agnostic to the specific numeric type it is operating on.
type ArithmeticTestCase ¶
type ArithmeticTestCase[T any] struct { // contains filtered or unexported fields }
ArithmeticTestCase represents a test case for arithmetic operations.
type BFloat16Ops ¶
type BFloat16Ops struct{}
BFloat16Ops provides the implementation of the Arithmetic interface for the float16.BFloat16 type.
func (BFloat16Ops) FromFloat32 ¶
func (ops BFloat16Ops) FromFloat32(f float32) float16.BFloat16
func (BFloat16Ops) FromFloat64 ¶
func (ops BFloat16Ops) FromFloat64(f float64) float16.BFloat16
func (BFloat16Ops) GreaterThan ¶
func (ops BFloat16Ops) GreaterThan(a, b float16.BFloat16) bool
func (BFloat16Ops) LeakyReLUGrad ¶
func (BFloat16Ops) One ¶
func (ops BFloat16Ops) One() float16.BFloat16
func (BFloat16Ops) Pow ¶
func (ops BFloat16Ops) Pow(base, exponent float16.BFloat16) float16.BFloat16
func (BFloat16Ops) SigmoidGrad ¶
func (ops BFloat16Ops) SigmoidGrad(x float16.BFloat16) float16.BFloat16
type Float8Ops ¶
type Float8Ops struct{}
Float8Ops provides the implementation of the Arithmetic interface for the float8.Float8 type.
func (Float8Ops) FromFloat32 ¶
FromFloat32 converts a float32 to a float8.Float8.
func (Float8Ops) FromFloat64 ¶
FromFloat64 converts a float64 to a float8.Float8.
func (Float8Ops) GreaterThan ¶
GreaterThan checks if a is greater than b.
func (Float8Ops) LeakyReLUGrad ¶
LeakyReLUGrad computes the gradient of the Leaky Rectified Linear Unit function.
func (Float8Ops) SigmoidGrad ¶
SigmoidGrad computes the gradient of the sigmoid function.
type Float16Ops ¶
type Float16Ops struct{}
Float16Ops provides the implementation of the Arithmetic interface for the float16.Float16 type.
func (Float16Ops) Abs ¶
func (ops Float16Ops) Abs(x float16.Float16) float16.Float16
Abs computes the absolute value of x.
func (Float16Ops) Add ¶
func (ops Float16Ops) Add(a, b float16.Float16) float16.Float16
Add performs element-wise addition.
func (Float16Ops) Div ¶
func (ops Float16Ops) Div(a, b float16.Float16) float16.Float16
Div performs element-wise division.
func (Float16Ops) Exp ¶
func (ops Float16Ops) Exp(x float16.Float16) float16.Float16
Exp computes the exponential of x.
func (Float16Ops) FromFloat32 ¶
func (ops Float16Ops) FromFloat32(f float32) float16.Float16
FromFloat32 converts a float32 to a float16.Float16.
func (Float16Ops) FromFloat64 ¶
func (ops Float16Ops) FromFloat64(f float64) float16.Float16
FromFloat64 converts a float64 to a float16.Float16.
func (Float16Ops) GreaterThan ¶
func (ops Float16Ops) GreaterThan(a, b float16.Float16) bool
GreaterThan checks if a is greater than b.
func (Float16Ops) IsZero ¶
func (ops Float16Ops) IsZero(v float16.Float16) bool
IsZero checks if the given float16.Float16 value is zero.
func (Float16Ops) LeakyReLUGrad ¶
LeakyReLUGrad computes the gradient of the Leaky Rectified Linear Unit function.
func (Float16Ops) Log ¶
func (ops Float16Ops) Log(x float16.Float16) float16.Float16
Log computes the natural logarithm of x.
func (Float16Ops) Mul ¶
func (ops Float16Ops) Mul(a, b float16.Float16) float16.Float16
Mul performs element-wise multiplication.
func (Float16Ops) One ¶
func (ops Float16Ops) One() float16.Float16
One returns a float16.Float16 with value 1.
func (Float16Ops) Pow ¶
func (ops Float16Ops) Pow(base, exponent float16.Float16) float16.Float16
Pow computes base raised to the power of exponent.
func (Float16Ops) ReLU ¶
func (ops Float16Ops) ReLU(x float16.Float16) float16.Float16
ReLU computes the Rectified Linear Unit function.
func (Float16Ops) ReLUGrad ¶
func (ops Float16Ops) ReLUGrad(x float16.Float16) float16.Float16
ReLUGrad computes the gradient of the Rectified Linear Unit function.
func (Float16Ops) Sigmoid ¶
func (ops Float16Ops) Sigmoid(x float16.Float16) float16.Float16
Sigmoid computes the sigmoid function of x.
func (Float16Ops) SigmoidGrad ¶
func (ops Float16Ops) SigmoidGrad(x float16.Float16) float16.Float16
SigmoidGrad computes the gradient of the sigmoid function.
func (Float16Ops) Sqrt ¶
func (ops Float16Ops) Sqrt(x float16.Float16) float16.Float16
Sqrt computes the square root of x.
func (Float16Ops) Sub ¶
func (ops Float16Ops) Sub(a, b float16.Float16) float16.Float16
Sub performs element-wise subtraction.
func (Float16Ops) Sum ¶
func (ops Float16Ops) Sum(s []float16.Float16) float16.Float16
Sum computes the sum of elements in a slice.
func (Float16Ops) Tanh ¶
func (ops Float16Ops) Tanh(x float16.Float16) float16.Float16
Tanh computes the hyperbolic tangent of x.
type Float32Ops ¶
type Float32Ops struct{}
Float32Ops provides the implementation of the Arithmetic interface for the float32 type.
func (Float32Ops) Abs ¶
func (ops Float32Ops) Abs(x float32) float32
Abs computes the absolute value of x.
func (Float32Ops) Add ¶
func (ops Float32Ops) Add(a, b float32) float32
Add performs element-wise addition.
func (Float32Ops) Div ¶
func (ops Float32Ops) Div(a, b float32) float32
Div performs element-wise division.
func (Float32Ops) Exp ¶
func (ops Float32Ops) Exp(x float32) float32
Exp computes the exponential of x.
func (Float32Ops) FromFloat32 ¶
func (ops Float32Ops) FromFloat32(f float32) float32
FromFloat32 converts a float32 to a float32.
func (Float32Ops) FromFloat64 ¶
func (ops Float32Ops) FromFloat64(f float64) float32
FromFloat64 converts a float64 to a float32.
func (Float32Ops) GreaterThan ¶
func (ops Float32Ops) GreaterThan(a, b float32) bool
GreaterThan checks if a is greater than b.
func (Float32Ops) IsZero ¶
func (ops Float32Ops) IsZero(v float32) bool
IsZero checks if the given float32 value is zero.
func (Float32Ops) LeakyReLU ¶
func (ops Float32Ops) LeakyReLU(x float32, alpha float64) float32
LeakyReLU computes the Leaky Rectified Linear Unit function.
func (Float32Ops) LeakyReLUGrad ¶
func (ops Float32Ops) LeakyReLUGrad(x float32, alpha float64) float32
LeakyReLUGrad computes the gradient of the Leaky Rectified Linear Unit function.
func (Float32Ops) Log ¶
func (ops Float32Ops) Log(x float32) float32
Log computes the natural logarithm of x.
func (Float32Ops) Mul ¶
func (ops Float32Ops) Mul(a, b float32) float32
Mul performs element-wise multiplication.
func (Float32Ops) Pow ¶
func (ops Float32Ops) Pow(base, exponent float32) float32
Pow computes base raised to the power of exponent.
func (Float32Ops) ReLU ¶
func (ops Float32Ops) ReLU(x float32) float32
ReLU computes the Rectified Linear Unit function.
func (Float32Ops) ReLUGrad ¶
func (ops Float32Ops) ReLUGrad(x float32) float32
ReLUGrad computes the gradient of the Rectified Linear Unit function.
func (Float32Ops) Sigmoid ¶
func (ops Float32Ops) Sigmoid(x float32) float32
Sigmoid computes the sigmoid function of x.
func (Float32Ops) SigmoidGrad ¶
func (ops Float32Ops) SigmoidGrad(x float32) float32
SigmoidGrad computes the gradient of the sigmoid function.
func (Float32Ops) Sqrt ¶
func (ops Float32Ops) Sqrt(x float32) float32
Sqrt computes the square root of x.
func (Float32Ops) Sub ¶
func (ops Float32Ops) Sub(a, b float32) float32
Sub performs element-wise subtraction.
func (Float32Ops) Sum ¶
func (ops Float32Ops) Sum(s []float32) float32
Sum computes the sum of elements in a slice.
func (Float32Ops) Tanh ¶
func (ops Float32Ops) Tanh(x float32) float32
Tanh computes the hyperbolic tangent of x.
func (Float32Ops) TanhGrad ¶
func (ops Float32Ops) TanhGrad(x float32) float32
TanhGrad computes the gradient of the hyperbolic tangent function.
func (Float32Ops) ToFloat32 ¶
func (ops Float32Ops) ToFloat32(t float32) float32
ToFloat32 converts a float32 to a float32.
type Float64Ops ¶
type Float64Ops struct{}
Float64Ops provides the implementation of the Arithmetic interface for the float64 type.
func (Float64Ops) Abs ¶
func (ops Float64Ops) Abs(x float64) float64
Abs computes the absolute value of x.
func (Float64Ops) Add ¶
func (ops Float64Ops) Add(a, b float64) float64
Add performs element-wise addition.
func (Float64Ops) Div ¶
func (ops Float64Ops) Div(a, b float64) float64
Div performs element-wise division.
func (Float64Ops) Exp ¶
func (ops Float64Ops) Exp(x float64) float64
Exp computes the exponential of x.
func (Float64Ops) FromFloat32 ¶
func (ops Float64Ops) FromFloat32(f float32) float64
FromFloat32 converts a float32 to a float64.
func (Float64Ops) FromFloat64 ¶
func (ops Float64Ops) FromFloat64(f float64) float64
FromFloat64 converts a float64 to a float64.
func (Float64Ops) GreaterThan ¶
func (ops Float64Ops) GreaterThan(a, b float64) bool
GreaterThan checks if a is greater than b.
func (Float64Ops) IsZero ¶
func (ops Float64Ops) IsZero(v float64) bool
IsZero checks if the given float64 value is zero.
func (Float64Ops) LeakyReLU ¶
func (ops Float64Ops) LeakyReLU(x, alpha float64) float64
LeakyReLU computes the Leaky Rectified Linear Unit function.
func (Float64Ops) LeakyReLUGrad ¶
func (ops Float64Ops) LeakyReLUGrad(x, alpha float64) float64
LeakyReLUGrad computes the gradient of the Leaky Rectified Linear Unit function.
func (Float64Ops) Log ¶
func (ops Float64Ops) Log(x float64) float64
Log computes the natural logarithm of x.
func (Float64Ops) Mul ¶
func (ops Float64Ops) Mul(a, b float64) float64
Mul performs element-wise multiplication.
func (Float64Ops) Pow ¶
func (ops Float64Ops) Pow(base, exponent float64) float64
Pow computes base raised to the power of exponent.
func (Float64Ops) ReLU ¶
func (ops Float64Ops) ReLU(x float64) float64
ReLU computes the Rectified Linear Unit function.
func (Float64Ops) ReLUGrad ¶
func (ops Float64Ops) ReLUGrad(x float64) float64
ReLUGrad computes the gradient of the Rectified Linear Unit function.
func (Float64Ops) Sigmoid ¶
func (ops Float64Ops) Sigmoid(x float64) float64
Sigmoid computes the sigmoid function of x.
func (Float64Ops) SigmoidGrad ¶
func (ops Float64Ops) SigmoidGrad(x float64) float64
SigmoidGrad computes the gradient of the sigmoid function.
func (Float64Ops) Sqrt ¶
func (ops Float64Ops) Sqrt(x float64) float64
Sqrt computes the square root of x.
func (Float64Ops) Sub ¶
func (ops Float64Ops) Sub(a, b float64) float64
Sub performs element-wise subtraction.
func (Float64Ops) Sum ¶
func (ops Float64Ops) Sum(s []float64) float64
Sum computes the sum of elements in a slice.
func (Float64Ops) Tanh ¶
func (ops Float64Ops) Tanh(x float64) float64
Tanh computes the hyperbolic tangent of x.
func (Float64Ops) TanhGrad ¶
func (ops Float64Ops) TanhGrad(x float64) float64
TanhGrad computes the gradient of the hyperbolic tangent function.
func (Float64Ops) ToFloat32 ¶
func (ops Float64Ops) ToFloat32(t float64) float32
ToFloat32 converts a float64 to a float32.
type Int8Ops ¶
type Int8Ops struct{}
Int8Ops provides the implementation of the Arithmetic interface for the int8 type.
func (Int8Ops) FromFloat32 ¶
FromFloat32 converts a float32 to an int8.
func (Int8Ops) FromFloat64 ¶
FromFloat64 converts a float64 to an int8.
func (Int8Ops) GreaterThan ¶
GreaterThan checks if a is greater than b.
func (Int8Ops) LeakyReLUGrad ¶
LeakyReLUGrad computes the gradient of the Leaky Rectified Linear Unit function.
func (Int8Ops) SigmoidGrad ¶
SigmoidGrad computes the gradient of the sigmoid function.
type IntOps ¶
type IntOps struct{}
IntOps implements Arithmetic for int.
func (IntOps) FromFloat32 ¶
FromFloat32 converts a float32 to an int.
func (IntOps) FromFloat64 ¶
FromFloat64 converts a float64 to an int.
func (IntOps) GreaterThan ¶
GreaterThan checks if a is greater than b.
func (IntOps) LeakyReLUGrad ¶
LeakyReLUGrad computes the gradient of the Leaky Rectified Linear Unit function.
func (IntOps) SigmoidGrad ¶
SigmoidGrad computes the gradient of the sigmoid function.
type LeakyReLUTestCase ¶
type LeakyReLUTestCase[T any] struct { // contains filtered or unexported fields }
LeakyReLUTestCase represents a test case for LeakyReLU operations.
type QuantizationConfig ¶
type QuantizationConfig struct {
Scale float32
ZeroPoint int64
Symmetric bool // If true, zero_point is ignored and assumed to be 0
}
QuantizationConfig holds parameters for quantization operations
func ComputeQuantizationParams ¶
func ComputeQuantizationParams(minVal, maxVal float32, symmetric bool) (*QuantizationConfig, error)
ComputeQuantizationParams computes optimal scale and zero_point for a given data range This is useful for dynamic quantization where parameters aren't predetermined
func NewQuantizationConfig ¶
func NewQuantizationConfig(scale float32, zeroPoint int64, symmetric bool) (*QuantizationConfig, error)
NewQuantizationConfig creates a quantization configuration with validation
func (*QuantizationConfig) Dequantize ¶
func (qc *QuantizationConfig) Dequantize(quantized uint8) float32
Dequantize converts uint8 values back to float32 using linear dequantization Formula: dequantized = scale * (quantized - zero_point)
func (*QuantizationConfig) Dequantize4BitWeights ¶
func (qc *QuantizationConfig) Dequantize4BitWeights(packed []uint8) []float32
Dequantize4BitWeights combines 4-bit unpacking with dequantization This is the typical operation for MatMulNBits: unpack 4-bit -> dequantize -> float32
func (*QuantizationConfig) DequantizeSlice ¶
func (qc *QuantizationConfig) DequantizeSlice(quantized []uint8) []float32
DequantizeSlice dequantizes a slice of uint8 values to float32
func (*QuantizationConfig) QuantizationError ¶
func (qc *QuantizationConfig) QuantizationError(original []float32) float64
QuantizationError computes the quantization error (RMS) between original and quantized values
func (*QuantizationConfig) Quantize ¶
func (qc *QuantizationConfig) Quantize(value float32) uint8
Quantize converts float32 values to uint8 using linear quantization Formula: quantized = round(value / scale + zero_point)
func (*QuantizationConfig) QuantizeSlice ¶
func (qc *QuantizationConfig) QuantizeSlice(values []float32) []uint8
QuantizeSlice quantizes a slice of float32 values to uint8
func (*QuantizationConfig) ValidateQuantizationRoundTrip ¶
func (qc *QuantizationConfig) ValidateQuantizationRoundTrip(values []float32, tolerance float64) error
ValidateQuantizationRoundTrip checks that quantize->dequantize preserves values within tolerance
type SumTestCase ¶
type SumTestCase[T any] struct { // contains filtered or unexported fields }
SumTestCase represents a test case for sum operations.
type Uint8Ops ¶
type Uint8Ops struct{}
Uint8Ops provides the implementation of the Arithmetic interface for the uint8 type.
func (Uint8Ops) FromFloat32 ¶
FromFloat32 converts a float32 to a uint8.
func (Uint8Ops) FromFloat64 ¶
FromFloat64 converts a float64 to a uint8.
func (Uint8Ops) GreaterThan ¶
GreaterThan checks if a is greater than b.
func (Uint8Ops) LeakyReLUGrad ¶
LeakyReLUGrad computes the gradient of the Leaky Rectified Linear Unit function.
func (Uint8Ops) SigmoidGrad ¶
SigmoidGrad computes the gradient of the sigmoid function.
type UnaryTestCase ¶
type UnaryTestCase[T any] struct { // contains filtered or unexported fields }
UnaryTestCase represents a test case for unary operations.