Documentation
¶
Index ¶
- Variables
- func ValueNumeric[T constraints.Integer | constraints.Float](kv GGUFMetadataKV) T
- func ValuesNumeric[T constraints.Integer | constraints.Float](av GGUFMetadataKVArrayValue) []T
- type GGMLType
- type GGMLTypeTrait
- type GGUFArchitectureMetadata
- type GGUFBitsPerWeightScalar
- type GGUFBytesScalar
- type GGUFEstimate
- type GGUFEstimateOption
- type GGUFFile
- type GGUFFileType
- type GGUFFilename
- type GGUFHeader
- type GGUFKVCacheUsage
- type GGUFLayerTensorInfos
- func (ltis GGUFLayerTensorInfos) Bytes() uint64
- func (ltis GGUFLayerTensorInfos) Cut(names []string) (before, after GGUFLayerTensorInfos, found bool)
- func (ltis GGUFLayerTensorInfos) Elements() uint64
- func (ltis GGUFLayerTensorInfos) Get(name string) (info GGUFTensorInfo, found bool)
- func (ltis GGUFLayerTensorInfos) Index(names []string) (infos map[string]GGUFTensorInfo, found int)
- func (ltis GGUFLayerTensorInfos) Search(nameRegex *regexp.Regexp) (infos []GGUFTensorInfo)
- type GGUFMagic
- type GGUFMemoryUsage
- type GGUFMetadataKV
- func (kv GGUFMetadataKV) ValueArray() GGUFMetadataKVArrayValue
- func (kv GGUFMetadataKV) ValueBool() bool
- func (kv GGUFMetadataKV) ValueFloat32() float32
- func (kv GGUFMetadataKV) ValueFloat64() float64
- func (kv GGUFMetadataKV) ValueInt16() int16
- func (kv GGUFMetadataKV) ValueInt32() int32
- func (kv GGUFMetadataKV) ValueInt64() int64
- func (kv GGUFMetadataKV) ValueInt8() int8
- func (kv GGUFMetadataKV) ValueString() string
- func (kv GGUFMetadataKV) ValueUint16() uint16
- func (kv GGUFMetadataKV) ValueUint32() uint32
- func (kv GGUFMetadataKV) ValueUint64() uint64
- func (kv GGUFMetadataKV) ValueUint8() uint8
- type GGUFMetadataKVArrayValue
- func (av GGUFMetadataKVArrayValue) ValuesArray() []GGUFMetadataKVArrayValue
- func (av GGUFMetadataKVArrayValue) ValuesBool() []bool
- func (av GGUFMetadataKVArrayValue) ValuesFloat32() []float32
- func (av GGUFMetadataKVArrayValue) ValuesFloat64() []float64
- func (av GGUFMetadataKVArrayValue) ValuesInt16() []int16
- func (av GGUFMetadataKVArrayValue) ValuesInt32() []int32
- func (av GGUFMetadataKVArrayValue) ValuesInt64() []int64
- func (av GGUFMetadataKVArrayValue) ValuesInt8() []int8
- func (av GGUFMetadataKVArrayValue) ValuesString() []string
- func (av GGUFMetadataKVArrayValue) ValuesUint16() []uint16
- func (av GGUFMetadataKVArrayValue) ValuesUint32() []uint32
- func (av GGUFMetadataKVArrayValue) ValuesUint64() []uint64
- func (av GGUFMetadataKVArrayValue) ValuesUint8() []uint8
- type GGUFMetadataKVs
- type GGUFMetadataValueType
- type GGUFModelMetadata
- type GGUFNamedTensorInfos
- type GGUFParametersScalar
- type GGUFReadOption
- type GGUFTensorInfo
- func (ti GGUFTensorInfo) Bytes() uint64
- func (ti GGUFTensorInfo) Elements() uint64
- func (ti GGUFTensorInfo) Get(name string) (info GGUFTensorInfo, found bool)
- func (ti GGUFTensorInfo) Index(names []string) (infos map[string]GGUFTensorInfo, found int)
- func (ti GGUFTensorInfo) Search(nameRegex *regexp.Regexp) (infos []GGUFTensorInfo)
- type GGUFTensorInfos
- func (tis GGUFTensorInfos) Bytes() uint64
- func (tis GGUFTensorInfos) Elements() uint64
- func (tis GGUFTensorInfos) Get(name string) (info GGUFTensorInfo, found bool)
- func (tis GGUFTensorInfos) Index(names []string) (infos map[string]GGUFTensorInfo, found int)
- func (tis GGUFTensorInfos) Search(nameRegex *regexp.Regexp) (infos []GGUFTensorInfo)
- type GGUFTokenizerMetadata
- type GGUFVersion
- type IGGUFTensorInfos
Constants ¶
This section is empty.
Variables ¶
var ErrGGUFFileInvalidFormat = errors.New("invalid GGUF format")
var GGUFFilenameRegex = regexp.MustCompile(`^(?P<model_name>[A-Za-z0-9\s-]+)(?:-v(?P<major>\d+)\.(?P<minor>\d+))?-(?:(?P<experts_count>\d+)x)?(?P<parameters>\d+[A-Za-z]?)-(?P<encoding_scheme>[\w_]+)(?:-(?P<shard>\d{5})-of-(?P<shardTotal>\d{5}))?\.gguf$`) // nolint:lll
Functions ¶
func ValueNumeric ¶
func ValueNumeric[T constraints.Integer | constraints.Float](kv GGUFMetadataKV) T
ValueNumeric returns the numeric values of the GGUFMetadataKV, and panics if the value type is not numeric.
ValueNumeric is a generic function, and the type T must be constraints.Integer or constraints.Float.
Compare to the GGUFMetadataKV's Value* functions, ValueNumeric will cast the original value to the target type.
func ValuesNumeric ¶
func ValuesNumeric[T constraints.Integer | constraints.Float](av GGUFMetadataKVArrayValue) []T
ValuesNumeric returns the numeric values of the GGUFMetadataKVArrayValue, and panics if the value type is not numeric.
ValuesNumeric is a generic function, and the type T must be constraints.Integer or constraints.Float.
Compare to the GGUFMetadataKVArrayValue's Value* functions, ValuesNumeric will cast the original value to the target type.
Types ¶
type GGMLType ¶
type GGMLType uint32
GGMLType is a type of GGML tensor, see https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#file-structure.
const ( GGMLTypeF32 GGMLType = iota GGMLTypeF16 GGMLTypeQ4_0 GGMLTypeQ4_1 GGMLTypeQ4_2 GGMLTypeQ4_3 GGMLTypeQ5_0 GGMLTypeQ5_1 GGMLTypeQ8_0 GGMLTypeQ8_1 GGMLTypeQ2_K GGMLTypeQ3_K GGMLTypeQ4_K GGMLTypeQ5_K GGMLTypeQ6_K GGMLTypeQ8_K GGMLTypeIQ2_XXS GGMLTypeIQ2_XS GGMLTypeIQ3_XXS GGMLTypeIQ1_S GGMLTypeIQ4_NL GGMLTypeIQ3_S GGMLTypeIQ2_S GGMLTypeIQ4_XS GGMLTypeI8 GGMLTypeI16 GGMLTypeI32 GGMLTypeI64 GGMLTypeF64 GGMLTypeIQ1_M GGMLTypeBF16 )
GGMLType constants.
GGMLTypeQ4_2, GGMLTypeQ4_3 are deprecated.
func (GGMLType) RowSizeOf ¶
RowSizeOf returns the size of the given dimensions according to the GGMLType's GGMLTypeTrait, which is inspired by https://github.com/ggerganov/ggml/blob/0cbb7c0e053f5419cfbebb46fbf4d4ed60182cf5/src/ggml.c#L3142-L3145.
The index of the given dimensions means the number of dimension, i.e. 0 is the first dimension, 1 is the second dimension, and so on.
The value of the item is the number of elements in the corresponding dimension.
func (GGMLType) Trait ¶
func (t GGMLType) Trait() (GGMLTypeTrait, bool)
Trait returns the GGMLTypeTrait of the GGMLType.
type GGMLTypeTrait ¶
type GGMLTypeTrait struct {
BlockSize uint64 // Original is int, in order to reduce conversion, here we use uint64.
TypeSize uint64 // Original is uint32, in order to reduce conversion, here we use uint64.
Quantized bool
}
GGMLTypeTrait holds the trait of a GGMLType, see https://github.com/ggerganov/ggml/blob/0cbb7c0e053f5419cfbebb46fbf4d4ed60182cf5/src/ggml.c#L564-L918.
type GGUFArchitectureMetadata ¶
type GGUFArchitectureMetadata struct {
// MaximumContextLength(n_ctx_train) is the maximum context length of the model.
//
// For most architectures, this is the hard limit on the length of the input.
// Architectures, like RWKV,
// that are not reliant on transformer-style attention may be able to handle larger inputs,
// but this is not guaranteed.
MaximumContextLength uint64 `json:"maximumContextLength"`
// EmbeddingLength(n_embd) is the length of the embedding layer.
EmbeddingLength uint64 `json:"embeddingLength"`
// BlockCount(n_layer) is the number of blocks of attention and feed-forward layers,
// i.e. the bulk of the LLM.
// This does not include the input or embedding layers.
BlockCount uint64 `json:"blockCount"`
// FeedForwardLength(n_ff) is the length of the feed-forward layer.
FeedForwardLength uint64 `json:"feedForwardLength,omitempty"`
// ExpertCount(n_expert) is the number of experts in MoE models.
ExpertCount uint32 `json:"expertCount,omitempty"`
// ExpertUsedCount(n_expert_used) is the number of experts used during each token evaluation in MoE models.
ExpertUsedCount uint32 `json:"expertUsedCount,omitempty"`
// AttentionHeadCount(n_head) is the number of attention heads.
AttentionHeadCount uint64 `json:"attentionHeadCount,omitempty"`
// AttentionHeadCountKV(n_head_kv) is the number of attention heads per group used in Grouped-Query-Attention.
//
// If not provided or equal to AttentionHeadCount,
// the model does not use Grouped-Query-Attention.
AttentionHeadCountKV uint64 `json:"attentionHeadCountKV,omitempty"`
// AttentionMaxALiBIBias is the maximum bias to use for ALiBI.
AttentionMaxALiBIBias float32 `json:"attentionMaxALiBIBias,omitempty"`
// AttentionClampKQV describes a value `C`,
// which is used to clamp the values of the `Q`, `K` and `V` tensors between `[-C, C]`.
AttentionClampKQV float32 `json:"attentionClampKQV,omitempty"`
// AttentionLayerNormEpsilon is the epsilon value used in the LayerNorm(Layer Normalization).
AttentionLayerNormEpsilon float32 `json:"attentionLayerNormEpsilon,omitempty"`
// AttentionLayerNormRMSEpsilon is the epsilon value used in the RMSNorm(Root Mean Square Layer Normalization),
// which is a simplification of the original LayerNorm.
AttentionLayerNormRMSEpsilon float32 `json:"attentionLayerNormRMSEpsilon,omitempty"`
// AttentionKeyLength is the size of a key head.
//
// Defaults to `EmbeddingLength / AttentionHeadCount`.
AttentionKeyLength uint32 `json:"attentionKeyLength"`
// AttentionValueLength is the size of a value head.
//
// Defaults to `EmbeddingLength / AttentionHeadCount`.
AttentionValueLength uint32 `json:"attentionValueLength"`
// RoPEDimensionCount is the number of dimensions in the RoPE(Rotary Positional Encoding).
RoPEDimensionCount uint64 `json:"ropeDimensionCount,omitempty"`
// RoPEFrequencyBase is the base frequency of the RoPE.
RoPEFrequencyBase float32 `json:"ropeFrequencyBase,omitempty"`
// RoPEFrequencyScale is the frequency scale of the RoPE.
RoPEScalingType string `json:"ropeScalingType,omitempty"`
// RoPEScalingFactor is the scaling factor of the RoPE.
RoPEScalingFactor float32 `json:"ropeScalingFactor,omitempty"`
// RoPEScalingOriginalContextLength is the original context length of the RoPE scaling.
RoPEScalingOriginalContextLength uint64 `json:"ropeScalingOriginalContextLength,omitempty"`
// RoPEScalingFinetuned is true if the RoPE scaling is fine-tuned.
RoPEScalingFinetuned bool `json:"ropeScalingFinetuned,omitempty"`
// SSMConvolutionKernel is the size of the convolution kernel used in the SSM(Selective State Space Model).
SSMConvolutionKernel uint32 `json:"ssmConvolutionKernel,omitempty"`
// SSMInnerSize is the embedding size of the state in SSM.
SSMInnerSize uint32 `json:"ssmInnerSize,omitempty"`
// SSMStateSize is the size of the recurrent state in SSM.
SSMStateSize uint32 `json:"ssmStateSize,omitempty"`
// SSMTimeStepRank is the rank of the time steps in SSM.
SSMTimeStepRank uint32 `json:"ssmTimeStepRank,omitempty"`
// VocabularyLength is the size of the vocabulary.
//
// VocabularyLength is the same as the tokenizer's token size.
VocabularyLength uint64 `json:"vocabularyLength"`
}
GGUFArchitectureMetadata represents the architecture metadata of a GGUF file.
type GGUFBitsPerWeightScalar ¶
type GGUFBitsPerWeightScalar float64
GGUFBitsPerWeightScalar is the scalar for bits per weight.
func (GGUFBitsPerWeightScalar) String ¶
func (s GGUFBitsPerWeightScalar) String() string
type GGUFBytesScalar ¶
type GGUFBytesScalar uint64
GGUFBytesScalar is the scalar for bytes.
func (GGUFBytesScalar) String ¶
func (s GGUFBytesScalar) String() string
type GGUFEstimate ¶
type GGUFEstimate struct {
// Offload is the offloaded layers usage.
Offload *GGUFMemoryUsage `json:"offload,omitempty"`
// Total is the total memory usage.
Total GGUFMemoryUsage `json:"total"`
}
GGUFEstimate represents the estimated result of the GGUF file.
type GGUFEstimateOption ¶
type GGUFEstimateOption func(*_GGUFEstimateOptions)
func WithCacheKeyType ¶
func WithCacheKeyType(t GGMLType) GGUFEstimateOption
WithCacheKeyType sets the cache key type for the estimate.
func WithCacheValueType ¶
func WithCacheValueType(t GGMLType) GGUFEstimateOption
WithCacheValueType sets the cache value type for the estimate.
func WithContextSize ¶
func WithContextSize(size int32) GGUFEstimateOption
WithContextSize sets the context size for the estimate.
func WithOffloadLayers ¶ added in v0.0.2
func WithOffloadLayers(layers uint64) GGUFEstimateOption
WithOffloadLayers sets the number of layers to offload.
type GGUFFile ¶
type GGUFFile struct {
// Header is the header of the GGUF file.
Header GGUFHeader `json:"header"`
// TensorInfos are the tensor infos of the GGUF file,
// the size of TensorInfos is equal to `Header.TensorCount`.
TensorInfos GGUFTensorInfos `json:"tensorInfos"`
// Padding is the padding size of the GGUF file,
// which is used to split Header and TensorInfos from tensor data.
Padding int64 `json:"padding"`
// TensorDataStartOffset is the offset in bytes of the tensor data in this file.
//
// The offset is the start of the file.
TensorDataStartOffset int64 `json:"tensorDataStartOffset"`
// ModelSize is the size of the model when loading.
ModelSize GGUFBytesScalar `json:"modelSize"`
// ModelParameters is the number of the model parameters.
ModelParameters GGUFParametersScalar `json:"modelParameters"`
// ModelBitsPerWeight is the bits per weight of the model,
// which describes how many bits are used to store a weight,
// higher is better.
ModelBitsPerWeight GGUFBitsPerWeightScalar `json:"modelBitsPerWeight"`
}
GGUFFile represents a GGUF file, see https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#file-structure.
Compared with the complete GGUF file, this structure lacks the tensor data part.
func ParseGGUFFile ¶
func ParseGGUFFile(path string, opts ...GGUFReadOption) (*GGUFFile, error)
ParseGGUFFile parses a GGUF file from the local given path, and returns the GGUFFile, or an error if any.
func ParseGGUFFileFromHuggingFace ¶
func ParseGGUFFileFromHuggingFace(ctx context.Context, repo, model string, opts ...GGUFReadOption) (*GGUFFile, error)
ParseGGUFFileFromHuggingFace parses a GGUF file from Hugging Face, and returns a GGUFFile, or an error if any.
func ParseGGUFFileRemote ¶
func ParseGGUFFileRemote(ctx context.Context, url string, opts ...GGUFReadOption) (*GGUFFile, error)
ParseGGUFFileRemote parses a GGUF file from a remote URL, and returns a GGUFFile, or an error if any.
func (*GGUFFile) Architecture ¶
func (gf *GGUFFile) Architecture() (ga GGUFArchitectureMetadata)
Architecture returns the architecture metadata of the GGUF file.
func (*GGUFFile) Estimate ¶
func (gf *GGUFFile) Estimate(opts ...GGUFEstimateOption) (ge GGUFEstimate)
Estimate returns the inference usage estimated result of the GGUF file.
func (*GGUFFile) Layers ¶ added in v0.0.2
func (gf *GGUFFile) Layers() GGUFLayerTensorInfos
Layers converts the GGUFTensorInfos to GGUFLayerTensorInfos.
func (*GGUFFile) Model ¶
func (gf *GGUFFile) Model() (gm GGUFModelMetadata)
Model returns the model metadata of the GGUF file.
func (*GGUFFile) Tokenizer ¶
func (gf *GGUFFile) Tokenizer() (gt GGUFTokenizerMetadata)
Tokenizer returns the tokenizer metadata of a GGUF file.
type GGUFFileType ¶
type GGUFFileType uint32
GGUFFileType is a type of GGUF file, see https://github.com/ggerganov/ggml/blob/0cbb7c0e053f5419cfbebb46fbf4d4ed60182cf5/include/ggml/ggml.h#L396-L421.
const ( GGUFFileTypeAllF32 GGUFFileType = iota // F32 GGUFFileTypeMostlyF16 // F16 GGUFFileTypeMostlyQ4_0 // Q4_0 GGUFFileTypeMostlyQ4_1 // Q4_1 GGUFFileTypeMostlyQ4_1_F16 // Q4_1_F16 GGUFFileTypeMostlyQ4_2 // Q4_2 GGUFFileTypeMostlyQ4_3 // Q4_3 GGUFFileTypeMostlyQ8_0 // Q8_0 GGUFFileTypeMostlyQ5_0 // Q5_0 GGUFFileTypeMostlyQ5_1 // Q5_1 GGUFFileTypeMostlyQ2_K // Q2_K GGUFFileTypeMostlyQ3_K // Q3_K/Q3_K_S GGUFFileTypeMostlyQ4_K // Q4_K/Q3_K_M GGUFFileTypeMostlyQ5_K // Q5_K/Q3_K_L GGUFFileTypeMostlyQ6_K // Q6_K/Q4_K_S GGUFFileTypeMostlyIQ2_XXS // IQ2_XXS/Q4_K_M GGUFFileTypeMostlyIQ2_XS // IQ2_XS/Q5_K_S GGUFFileTypeMostlyIQ3_XXS // IQ3_XXS/Q5_K_M GGUFFileTypeMostlyIQ1_S // IQ1_S/Q6_K GGUFFileTypeMostlyIQ4_NL // IQ4_NL GGUFFileTypeMostlyIQ3_S // IQ3_S GGUFFileTypeMostlyIQ2_S // IQ2_S GGUFFileTypeMostlyIQ4_XS // IQ4_XS GGUFFileTypeMostlyIQ1_M // IQ1_M GGUFFileTypeMostlyBF16 // BF16 )
GGUFFileType constants.
GGUFFileTypeMostlyQ4_2, GGUFFileTypeMostlyQ4_3 are deprecated.
GGUFFileTypeMostlyQ4_1_F16 is a special case where the majority of the tensors are Q4_1, but 'token_embd.weight' and 'output.weight' tensors are F16.
func (GGUFFileType) GGMLType ¶
func (t GGUFFileType) GGMLType() GGMLType
GGMLType returns the GGMLType of the GGUFFileType, which is inspired by https://github.com/ggerganov/ggml/blob/a10a8b880c059b3b29356eb9a9f8df72f03cdb6a/src/ggml.c#L2730-L2763.
func (GGUFFileType) String ¶
func (i GGUFFileType) String() string
type GGUFFilename ¶
type GGUFFilename struct {
ModelName string `json:"modelName"`
Major *int `json:"major"`
Minor *int `json:"minor"`
ExpertsCount *int `json:"expertsCount,omitempty"`
Parameters string `json:"parameters"`
EncodingScheme string `json:"encodingScheme"`
Shard *int `json:"shard,omitempty"`
ShardTotal *int `json:"shardTotal,omitempty"`
}
GGUFFilename represents a GGUF filename, see https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#gguf-naming-convention.
func ParseGGUFFilename ¶
func ParseGGUFFilename(name string) *GGUFFilename
ParseGGUFFilename parses the given GGUF filename string, and returns the GGUFFilename, or nil if the filename is invalid.
func (GGUFFilename) IsPreRelease ¶
func (gn GGUFFilename) IsPreRelease() bool
func (GGUFFilename) IsSharding ¶
func (gn GGUFFilename) IsSharding() bool
func (GGUFFilename) String ¶
func (gn GGUFFilename) String() string
type GGUFHeader ¶
type GGUFHeader struct {
// Magic is a magic number that announces that this is a GGUF file.
Magic GGUFMagic `json:"magic"`
// Version is a version of the GGUF file format.
Version GGUFVersion `json:"version"`
// TensorCount is the number of tensors in the file.
TensorCount uint64 `json:"tensorCount"`
// MetadataKVCount is the number of key-value pairs in the metadata.
MetadataKVCount uint64 `json:"metadataKVCount"`
// MetadataKV are the key-value pairs in the metadata,
MetadataKV GGUFMetadataKVs `json:"metadataKV"`
}
GGUFHeader represents the header of a GGUF file.
type GGUFKVCacheUsage ¶ added in v0.0.2
type GGUFKVCacheUsage struct {
// Key is the memory usage of the cached key.
Key GGUFBytesScalar `json:"key"`
// Value is the memory usage of the cached value.
Value GGUFBytesScalar `json:"value"`
}
GGUFKVCacheUsage represents the usage of kv-cache.
func (GGUFKVCacheUsage) Sum ¶ added in v0.0.2
func (c GGUFKVCacheUsage) Sum() GGUFBytesScalar
type GGUFLayerTensorInfos ¶ added in v0.0.2
type GGUFLayerTensorInfos []IGGUFTensorInfos
GGUFLayerTensorInfos represents hierarchical tensor infos of a GGUF file, it can save GGUFNamedTensorInfos, GGUFTensorInfos, and GGUFTensorInfo.
func (GGUFLayerTensorInfos) Bytes ¶ added in v0.0.2
func (ltis GGUFLayerTensorInfos) Bytes() uint64
Bytes returns the number of bytes of the GGUFLayerTensorInfos.
func (GGUFLayerTensorInfos) Cut ¶ added in v0.0.2
func (ltis GGUFLayerTensorInfos) Cut(names []string) (before, after GGUFLayerTensorInfos, found bool)
Cut splits the GGUFLayerTensorInfos into two parts, and returns the GGUFLayerTensorInfos with the names that match the given names at first, and the GGUFLayerTensorInfos without the names at second, and true if the GGUFLayerTensorInfos with the names are found, and false otherwise.
func (GGUFLayerTensorInfos) Elements ¶ added in v0.0.2
func (ltis GGUFLayerTensorInfos) Elements() uint64
Elements returns the number of elements of the GGUFLayerTensorInfos.
func (GGUFLayerTensorInfos) Get ¶ added in v0.0.2
func (ltis GGUFLayerTensorInfos) Get(name string) (info GGUFTensorInfo, found bool)
Get returns the GGUFTensorInfo with the given name, and true if found, and false otherwise.
func (GGUFLayerTensorInfos) Index ¶ added in v0.0.2
func (ltis GGUFLayerTensorInfos) Index(names []string) (infos map[string]GGUFTensorInfo, found int)
Index returns a map value to the GGUFTensorInfos with the given names, and the number of names found.
func (GGUFLayerTensorInfos) Search ¶ added in v0.0.2
func (ltis GGUFLayerTensorInfos) Search(nameRegex *regexp.Regexp) (infos []GGUFTensorInfo)
Search returns a list of GGUFTensorInfo with the names that match the given regex.
type GGUFMagic ¶
type GGUFMagic uint32
GGUFMagic is a magic number of GGUF file, see https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#historical-state-of-affairs.
type GGUFMemoryUsage ¶ added in v0.0.2
type GGUFMemoryUsage struct {
// KVCache is the usage of key-value cache.
KVCache GGUFKVCacheUsage `json:"kvCache"`
// Compute is the usage of transformer layers.
Compute GGUFBytesScalar `json:"compute"`
// IO is the usage of input/output layers.
IO GGUFBytesScalar `json:"io"`
}
GGUFMemoryUsage represents the memory usage of the GGUF file.
func (GGUFMemoryUsage) Sum ¶ added in v0.0.2
func (m GGUFMemoryUsage) Sum() GGUFBytesScalar
type GGUFMetadataKV ¶
type GGUFMetadataKV struct {
// Key is the key of the metadata key-value pair,
// which is no larger than 64 bytes long.
Key string `json:"key"`
// ValueType is the type of the metadata value.
ValueType GGUFMetadataValueType `json:"valueType"`
// Value is the value of the metadata key-value pair.
Value any `json:"value"`
}
GGUFMetadataKV is a key-value pair in the metadata of a GGUF file.
func (GGUFMetadataKV) ValueArray ¶
func (kv GGUFMetadataKV) ValueArray() GGUFMetadataKVArrayValue
func (GGUFMetadataKV) ValueBool ¶
func (kv GGUFMetadataKV) ValueBool() bool
func (GGUFMetadataKV) ValueFloat32 ¶
func (kv GGUFMetadataKV) ValueFloat32() float32
func (GGUFMetadataKV) ValueFloat64 ¶
func (kv GGUFMetadataKV) ValueFloat64() float64
func (GGUFMetadataKV) ValueInt16 ¶
func (kv GGUFMetadataKV) ValueInt16() int16
func (GGUFMetadataKV) ValueInt32 ¶
func (kv GGUFMetadataKV) ValueInt32() int32
func (GGUFMetadataKV) ValueInt64 ¶
func (kv GGUFMetadataKV) ValueInt64() int64
func (GGUFMetadataKV) ValueInt8 ¶
func (kv GGUFMetadataKV) ValueInt8() int8
func (GGUFMetadataKV) ValueString ¶
func (kv GGUFMetadataKV) ValueString() string
func (GGUFMetadataKV) ValueUint16 ¶
func (kv GGUFMetadataKV) ValueUint16() uint16
func (GGUFMetadataKV) ValueUint32 ¶
func (kv GGUFMetadataKV) ValueUint32() uint32
func (GGUFMetadataKV) ValueUint64 ¶
func (kv GGUFMetadataKV) ValueUint64() uint64
func (GGUFMetadataKV) ValueUint8 ¶
func (kv GGUFMetadataKV) ValueUint8() uint8
type GGUFMetadataKVArrayValue ¶
type GGUFMetadataKVArrayValue struct {
// Type is the type of the array item.
Type GGUFMetadataValueType `json:"type"`
// Len is the length of the array.
Len uint64 `json:"len"`
// Array holds all array items.
//
// Array may be empty if skipping.
Array []any `json:"array,omitempty"`
// StartOffset is the offset in bytes of the GGUFMetadataKVArrayValue in the GGUFFile file.
//
// The offset is the start of the file.
StartOffset int64 `json:"startOffset"`
}
GGUFMetadataKVArrayValue is a value of a GGUFMetadataKV with type GGUFMetadataValueTypeArray.
func (GGUFMetadataKVArrayValue) ValuesArray ¶
func (av GGUFMetadataKVArrayValue) ValuesArray() []GGUFMetadataKVArrayValue
func (GGUFMetadataKVArrayValue) ValuesBool ¶
func (av GGUFMetadataKVArrayValue) ValuesBool() []bool
func (GGUFMetadataKVArrayValue) ValuesFloat32 ¶
func (av GGUFMetadataKVArrayValue) ValuesFloat32() []float32
func (GGUFMetadataKVArrayValue) ValuesFloat64 ¶
func (av GGUFMetadataKVArrayValue) ValuesFloat64() []float64
func (GGUFMetadataKVArrayValue) ValuesInt16 ¶
func (av GGUFMetadataKVArrayValue) ValuesInt16() []int16
func (GGUFMetadataKVArrayValue) ValuesInt32 ¶
func (av GGUFMetadataKVArrayValue) ValuesInt32() []int32
func (GGUFMetadataKVArrayValue) ValuesInt64 ¶
func (av GGUFMetadataKVArrayValue) ValuesInt64() []int64
func (GGUFMetadataKVArrayValue) ValuesInt8 ¶
func (av GGUFMetadataKVArrayValue) ValuesInt8() []int8
func (GGUFMetadataKVArrayValue) ValuesString ¶
func (av GGUFMetadataKVArrayValue) ValuesString() []string
func (GGUFMetadataKVArrayValue) ValuesUint16 ¶
func (av GGUFMetadataKVArrayValue) ValuesUint16() []uint16
func (GGUFMetadataKVArrayValue) ValuesUint32 ¶
func (av GGUFMetadataKVArrayValue) ValuesUint32() []uint32
func (GGUFMetadataKVArrayValue) ValuesUint64 ¶
func (av GGUFMetadataKVArrayValue) ValuesUint64() []uint64
func (GGUFMetadataKVArrayValue) ValuesUint8 ¶
func (av GGUFMetadataKVArrayValue) ValuesUint8() []uint8
type GGUFMetadataKVs ¶
type GGUFMetadataKVs []GGUFMetadataKV
GGUFMetadataKVs is a list of GGUFMetadataKV.
func (GGUFMetadataKVs) Get ¶
func (kvs GGUFMetadataKVs) Get(key string) (value GGUFMetadataKV, found bool)
Get returns the GGUFMetadataKV with the given key, and true if found, and false otherwise.
func (GGUFMetadataKVs) Index ¶
func (kvs GGUFMetadataKVs) Index(keys []string) (values map[string]GGUFMetadataKV, found int)
Index returns a map value to the GGUFMetadataKVs with the given keys, and the number of keys found.
func (GGUFMetadataKVs) Search ¶
func (kvs GGUFMetadataKVs) Search(keyRegex *regexp.Regexp) (values []GGUFMetadataKV)
Search returns a list of GGUFMetadataKV with the keys that match the given regex.
type GGUFMetadataValueType ¶
type GGUFMetadataValueType uint32
GGUFMetadataValueType is a type of GGUF metadata value, see https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#file-structure.
const ( GGUFMetadataValueTypeUint8 GGUFMetadataValueType = iota GGUFMetadataValueTypeInt8 GGUFMetadataValueTypeUint16 GGUFMetadataValueTypeInt16 GGUFMetadataValueTypeUint32 GGUFMetadataValueTypeInt32 GGUFMetadataValueTypeFloat32 GGUFMetadataValueTypeBool GGUFMetadataValueTypeString GGUFMetadataValueTypeArray GGUFMetadataValueTypeUint64 GGUFMetadataValueTypeInt64 GGUFMetadataValueTypeFloat64 )
GGUFMetadataValueType constants.
func (GGUFMetadataValueType) String ¶
func (i GGUFMetadataValueType) String() string
type GGUFModelMetadata ¶
type GGUFModelMetadata struct {
// Architecture describes what architecture this model implements.
//
// All lowercase ASCII, with only [a-z0-9]+ characters allowed.
Architecture string `json:"architecture"`
// QuantizationVersion describes the version of the quantization format.
//
// Not required if the model is not quantized (i.e. no tensors are quantized).
// If any tensors are quantized, this must be present.
// This is separate to the quantization scheme of the tensors itself,
// the quantization version may change without changing the scheme's name,
// e.g. the quantization scheme is Q5_K, and the QuantizationVersion is 4.
QuantizationVersion uint32 `json:"quantizationVersion,omitempty"`
// Alignment describes the alignment of the GGUF file.
//
// This can vary to allow for different alignment schemes, but it must be a multiple of 8.
// Some writers may not write the alignment.
//
// Default is 32.
Alignment uint32 `json:"alignment"`
// Name to the model.
//
// This should be a human-readable name that can be used to identify the model.
// It should be unique within the community that the model is defined in.
Name string `json:"name"`
// Author to the model.
Author string `json:"author,omitempty"`
// URL to the model's homepage.
//
// This can be a GitHub repo, a paper, etc.
URL string `json:"url,omitempty"`
// Description to the model.
Description string `json:"description,omitempty"`
// License to the model.
//
// This is expressed as a SPDX license expression, e.g. "MIT OR Apache-2.0".
License string `json:"license,omitempty"`
// FileType describes the type of the majority of the tensors in the GGUF file.
FileType GGUFFileType `json:"fileType"`
// LittleEndian is true if the GGUF file is little-endian,
// and false for big-endian.
LittleEndian bool `json:"littleEndian"`
// Size is the size of the GGUF file in bytes.
Size GGUFBytesScalar `json:"size"`
// Parameters is the parameters of the model.
Parameters GGUFParametersScalar `json:"parameters"`
// BitsPerWeight is the bits per weight of the model.
BitsPerWeight GGUFBitsPerWeightScalar `json:"bitsPerWeight"`
}
GGUFModelMetadata represents the model metadata of a GGUF file.
type GGUFNamedTensorInfos ¶ added in v0.0.2
type GGUFNamedTensorInfos struct {
// Name is the name of the namespace.
Name string `json:"name"`
// GGUFLayerTensorInfos can save GGUFNamedTensorInfos, GGUFTensorInfos, or GGUFTensorInfo.
//
// If the item is type of GGUFTensorInfo, it must be the leaf node.
//
// Any branch nodes are type of GGUFNamedTensorInfos or GGUFTensorInfos,
// which can be nested.
//
// Branch nodes store in type pointer.
GGUFLayerTensorInfos `json:"items,omitempty"`
}
GGUFNamedTensorInfos is the namespace for relevant tensors, which must has a name.
type GGUFParametersScalar ¶
type GGUFParametersScalar uint64
GGUFParametersScalar is the scalar for parameters.
func (GGUFParametersScalar) String ¶
func (s GGUFParametersScalar) String() string
type GGUFReadOption ¶
type GGUFReadOption func(o *_GGUFReadOptions)
func SkipLargeMetadata ¶ added in v0.0.2
func SkipLargeMetadata() GGUFReadOption
SkipLargeMetadata skips reading large GGUFMetadataKV items, which are not necessary for most cases.
func SkipProxy ¶
func SkipProxy() GGUFReadOption
SkipProxy skips the proxy when reading from a remote URL.
func SkipTLSVerification ¶
func SkipTLSVerification() GGUFReadOption
SkipTLSVerification skips the TLS verification when reading from a remote URL.
func UseBufferSize ¶
func UseBufferSize(size int) GGUFReadOption
UseBufferSize sets the buffer size when reading from a remote URL.
func UseProxy ¶
func UseProxy(url *url.URL) GGUFReadOption
UseProxy uses the given url as a proxy when reading from a remote URL.
type GGUFTensorInfo ¶
type GGUFTensorInfo struct {
// Name is the name of the tensor,
// which is no larger than 64 bytes long.
Name string `json:"name"`
// NDimensions is the number of dimensions of the tensor.
NDimensions uint32 `json:"nDimensions"`
// Dimensions is the dimensions of the tensor,
// the length is NDimensions.
Dimensions []uint64 `json:"dimensions"`
// Type is the type of the tensor.
Type GGMLType `json:"type"`
// Offset is the offset in bytes of the tensor's data in this file.
//
// The offset is relative to tensor data, not to the start of the file.
Offset uint64 `json:"offset"`
// StartOffset is the offset in bytes of the GGUFTensorInfo in the GGUFFile file.
//
// The offset is the start of the file.
StartOffset int64 `json:"startOffset"`
}
GGUFTensorInfo represents a tensor info in a GGUF file.
func (GGUFTensorInfo) Bytes ¶
func (ti GGUFTensorInfo) Bytes() uint64
Bytes returns the number of bytes of the GGUFTensorInfo, which is inspired by https://github.com/ggerganov/ggml/blob/a10a8b880c059b3b29356eb9a9f8df72f03cdb6a/src/ggml.c#L2609-L2626.
func (GGUFTensorInfo) Elements ¶
func (ti GGUFTensorInfo) Elements() uint64
Elements returns the number of elements of the GGUFTensorInfo, which is inspired by https://github.com/ggerganov/ggml/blob/a10a8b880c059b3b29356eb9a9f8df72f03cdb6a/src/ggml.c#L2597-L2601.
func (GGUFTensorInfo) Get ¶ added in v0.0.2
func (ti GGUFTensorInfo) Get(name string) (info GGUFTensorInfo, found bool)
Get returns the GGUFTensorInfo with the given name, and true if found, and false otherwise.
func (GGUFTensorInfo) Index ¶ added in v0.0.2
func (ti GGUFTensorInfo) Index(names []string) (infos map[string]GGUFTensorInfo, found int)
Index returns a map value to the GGUFTensorInfo with the given names, and the number of names found.
func (GGUFTensorInfo) Search ¶ added in v0.0.2
func (ti GGUFTensorInfo) Search(nameRegex *regexp.Regexp) (infos []GGUFTensorInfo)
Search returns a list of GGUFTensorInfo with the names that match the given regex.
type GGUFTensorInfos ¶
type GGUFTensorInfos []GGUFTensorInfo
GGUFTensorInfos is a list of GGUFTensorInfo.
func (GGUFTensorInfos) Bytes ¶ added in v0.0.2
func (tis GGUFTensorInfos) Bytes() uint64
Bytes returns the number of bytes of the GGUFTensorInfos.
func (GGUFTensorInfos) Elements ¶ added in v0.0.2
func (tis GGUFTensorInfos) Elements() uint64
Elements returns the number of elements of the GGUFTensorInfos.
func (GGUFTensorInfos) Get ¶
func (tis GGUFTensorInfos) Get(name string) (info GGUFTensorInfo, found bool)
Get returns the GGUFTensorInfo with the given name, and true if found, and false otherwise.
func (GGUFTensorInfos) Index ¶
func (tis GGUFTensorInfos) Index(names []string) (infos map[string]GGUFTensorInfo, found int)
Index returns a map value to the GGUFTensorInfos with the given names, and the number of names found.
func (GGUFTensorInfos) Search ¶
func (tis GGUFTensorInfos) Search(nameRegex *regexp.Regexp) (infos []GGUFTensorInfo)
Search returns a list of GGUFTensorInfo with the names that match the given regex.
type GGUFTokenizerMetadata ¶
type GGUFTokenizerMetadata struct {
// Model is the model of the tokenizer.
Model string `json:"model"`
// TokensLength is the size of tokens.
TokensLength uint64 `json:"tokenLength"`
// AddedTokensLength is the size of added tokens after training.
AddedTokensLength uint64 `json:"addedTokenLength"`
// BOSTokenID is the ID of the beginning of sentence token.
//
// Use -1 if the token is not found.
BOSTokenID int64 `json:"bosTokenID"`
// EOSTokenID is the ID of the end of sentence token.
//
// Use -1 if the token is not found.
EOSTokenID int64 `json:"eosTokenID"`
// UnknownTokenID is the ID of the unknown token.
//
// Use -1 if the token is not found.
UnknownTokenID int64 `json:"unknownTokenID"`
// SeparatorTokenID is the ID of the separator token.
//
// Use -1 if the token is not found.
SeparatorTokenID int64 `json:"separatorTokenID"`
// PaddingTokenID is the ID of the padding token.
//
// Use -1 if the token is not found.
PaddingTokenID int64 `json:"paddingTokenID"`
}
GGUFTokenizerMetadata represents the tokenizer metadata of a GGUF file.
type GGUFVersion ¶
type GGUFVersion uint32
GGUFVersion is a version of GGUF file format, see https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#version-history.
const ( GGUFVersionV1 GGUFVersion = iota + 1 GGUFVersionV2 GGUFVersionV3 )
GGUFVersion constants.
func (GGUFVersion) String ¶
func (i GGUFVersion) String() string
type IGGUFTensorInfos ¶ added in v0.0.2
type IGGUFTensorInfos interface {
// Get returns the GGUFTensorInfo with the given name,
// and true if found, and false otherwise.
Get(name string) (info GGUFTensorInfo, found bool)
// Search returns a list of GGUFTensorInfo with the names that match the given regex.
Search(nameRegex *regexp.Regexp) (infos []GGUFTensorInfo)
// Index returns a map value to the GGUFTensorInfo with the given names,
// and the number of names found.
Index(names []string) (infos map[string]GGUFTensorInfo, found int)
// Elements returns the number of elements of the GGUFTensorInfo.
Elements() uint64
// Bytes returns the number of bytes of the GGUFTensorInfo.
Bytes() uint64
}
IGGUFTensorInfos is an interface for GGUFTensorInfos.
Source Files
¶
- file.go
- file_architecture.go
- file_estimate.go
- file_estimate_option.go
- file_model.go
- file_option.go
- file_tokenizer.go
- filename.go
- gen.go
- zz_generated.ggmltype.stringer.go
- zz_generated.gguffiletype.stringer.go
- zz_generated.ggufmagic.stringer.go
- zz_generated.ggufmetadatavaluetype.stringer.go
- zz_generated.ggufversion.stringer.go