Documentation
¶
Index ¶
- Constants
- func SampleLogits(tensor []float32, temperature float32, topP float32, logitBias map[int]float32) (int, error)
- type CRwkv
- type CRwkvImpl
- func (c *CRwkvImpl) RwkvCloneContext(ctx *RwkvCtx, threads uint32) *RwkvCtx
- func (c *CRwkvImpl) RwkvEval(ctx *RwkvCtx, token uint32, stateIn []float32, stateOut []float32, ...) error
- func (c *CRwkvImpl) RwkvEvalSequence(ctx *RwkvCtx, token uint32, sequenceLen uint64, stateIn []float32, ...) error
- func (c *CRwkvImpl) RwkvFree(ctx *RwkvCtx) error
- func (c *CRwkvImpl) RwkvGetLastError(ctx *RwkvCtx) error
- func (c *CRwkvImpl) RwkvGetLogitsLength(ctx *RwkvCtx) uint64
- func (c *CRwkvImpl) RwkvGetNEmbedding(ctx *RwkvCtx) uint64
- func (c *CRwkvImpl) RwkvGetNLayer(ctx *RwkvCtx) uint64
- func (c *CRwkvImpl) RwkvGetNVocab(ctx *RwkvCtx) uint64
- func (c *CRwkvImpl) RwkvGetPrintErrors(ctx *RwkvCtx) bool
- func (c *CRwkvImpl) RwkvGetStateLength(ctx *RwkvCtx) uint64
- func (c *CRwkvImpl) RwkvGetSystemInfoString() string
- func (c *CRwkvImpl) RwkvGpuOffloadLayers(ctx *RwkvCtx, nGpuLayers uint32) error
- func (c *CRwkvImpl) RwkvInitFromFile(filePath string, threads uint32) *RwkvCtx
- func (c *CRwkvImpl) RwkvInitState(ctx *RwkvCtx, state []float32)
- func (c *CRwkvImpl) RwkvQuantizeModelFile(ctx *RwkvCtx, in, out string, format QuantizedFormat) error
- func (c *CRwkvImpl) RwkvSetPrintErrors(ctx *RwkvCtx, enable bool)
- type GpuType
- type NormalTokenizer
- type QuantizedFormat
- type RwkvCtx
- type RwkvErrors
- type RwkvModel
- type RwkvOptions
- type RwkvState
- func (s *RwkvState) CleanState(prompt ...string) (*RwkvState, error)
- func (s *RwkvState) GetEmbedding(input string, distill bool) ([]float32, error)
- func (s *RwkvState) LoadState(state []float32) error
- func (s *RwkvState) Predict(input string) (string, error)
- func (s *RwkvState) PredictStream(input string, output chan string)
- func (s *RwkvState) SaveState() ([]float32, error)
- type Tokenizer
- type TokenizerType
- type Trie
- type TrieNode
- type WorldTokenizer
Constants ¶
const ( RwkvErrorArgs RwkvErrors = 1 << 8 RwkvErrorFile = 2 << 8 RwkvErrorModel = 3 << 8 RwkvErrorModelParams = 4 << 8 RwkvErrorGraph = 5 << 8 RwkvErrorCtx = 6 << 8 )
Variables ¶
This section is empty.
Functions ¶
Types ¶
type CRwkv ¶
type CRwkv interface { // RwkvSetPrintErrors Sets whether errors are automatically printed to stderr. // If this is set to false, you are responsible for calling rwkv_last_error manually if an operation fails. // - ctx: the context to suppress error messages for. // If NULL, affects model load (rwkv_init_from_file) and quantization (rwkv_quantize_model_file) errors, // as well as the default for new context. // - print_errors: whether error messages should be automatically printed. RwkvSetPrintErrors(ctx *RwkvCtx, enable bool) // RwkvGetPrintErrors Gets whether errors are automatically printed to stderr. // - ctx: the context to retrieve the setting for, or NULL for the global setting. RwkvGetPrintErrors(ctx *RwkvCtx) bool // RwkvGetLastError Retrieves and clears the error flags. // - ctx: the context the retrieve the error for, or NULL for the global error. RwkvGetLastError(ctx *RwkvCtx) error // RwkvInitFromFile Loads the model from a file and prepares it for inference. // Returns NULL on any error. // - model_file_path: path to model file in ggml format. // - n_threads: count of threads to use, must be positive. RwkvInitFromFile(filePath string, threads uint32) *RwkvCtx // RwkvCloneContext Creates a new context from an existing one. // This can allow you to run multiple rwkv_eval's in parallel, without having to load a single model multiple times. // Each rwkv_context can have one eval running at a time. // Every rwkv_context must be freed using rwkv_free. // - ctx: context to be cloned. // - n_threads: count of threads to use, must be positive. RwkvCloneContext(ctx *RwkvCtx, threads uint32) *RwkvCtx // RwkvGpuOffloadLayers Offloads specified layers of context onto GPU using cuBLAS, if it is enabled. // If rwkv.cpp was compiled without cuBLAS support, this function is a no-op. RwkvGpuOffloadLayers(ctx *RwkvCtx, nGpuLayers uint32) error // RwkvEval Evaluates the model for a single token. // Not thread-safe. For parallel inference, call rwkv_clone_context to create one rwkv_context for each thread. // Returns false on any error. // - token: next token index, in range 0 <= token < n_vocab. // - state_in: FP32 buffer of size rwkv_get_state_len(); or NULL, if this is a first pass. // - state_out: FP32 buffer of size rwkv_get_state_len(). This buffer will be written to if non-NULL. // - logits_out: FP32 buffer of size rwkv_get_logits_len(). This buffer will be written to if non-NULL. RwkvEval(ctx *RwkvCtx, token uint32, stateIn []float32, stateOut []float32, logitsOut []float32) error // RwkvEvalSequence Evaluates the model for a sequence of tokens. // Uses a faster algorithm than rwkv_eval if you do not need the state and logits for every token. Best used with batch sizes of 64 or so. // Has to build a computation graph on the first call for a given sequence, but will use this cached graph for subsequent calls of the same sequence length. // Not thread-safe. For parallel inference, call rwkv_clone_context to create one rwkv_context for each thread. // Returns false on any error. // - tokens: pointer to an array of tokens. If NULL, the graph will be built and cached, but not executed: this can be useful for initialization. // - sequence_len: number of tokens to read from the array. // - state_in: FP32 buffer of size rwkv_get_state_len(), or NULL if this is a first pass. // - state_out: FP32 buffer of size rwkv_get_state_len(). This buffer will be written to if non-NULL. // - logits_out: FP32 buffer of size rwkv_get_logits_len(). This buffer will be written to if non-NULL. RwkvEvalSequence(ctx *RwkvCtx, token uint32, sequenceLen uint64, stateIn []float32, stateOut []float32, logitsOut []float32) error // RwkvGetNVocab Returns the number of tokens in the given model's vocabulary. // Useful for telling 20B_tokenizer models (n_vocab = 50277) apart from World models (n_vocab = 65536). RwkvGetNVocab(ctx *RwkvCtx) uint64 // RwkvGetNEmbedding Returns the number of elements in the given model's embedding. // Useful for reading individual fields of a model's hidden state. RwkvGetNEmbedding(ctx *RwkvCtx) uint64 // RwkvGetNLayer Returns the number of layers in the given model. // Useful for always offloading the entire model to GPU. RwkvGetNLayer(ctx *RwkvCtx) uint64 // RwkvGetStateLength Returns the number of float elements in a complete state for the given model. // This is the number of elements you'll need to allocate for a call to rwkv_eval, rwkv_eval_sequence, or rwkv_init_state. RwkvGetStateLength(ctx *RwkvCtx) uint64 // RwkvGetLogitsLength Returns the number of float elements in the logits output of a given model. // This is currently always identical to n_vocab. RwkvGetLogitsLength(ctx *RwkvCtx) uint64 // RwkvInitState Initializes the given state so that passing it to rwkv_eval or rwkv_eval_sequence would be identical to passing NULL. // Useful in cases where tracking the first call to these functions may be annoying or expensive. // State must be initialized for behavior to be defined, passing a zeroed state to rwkv.cpp functions will result in NaNs. // - state: FP32 buffer of size rwkv_get_state_len() to initialize RwkvInitState(ctx *RwkvCtx, state []float32) // RwkvFree Frees all allocated memory and the context. // Does not need to be called on the same thread that created the rwkv_context. RwkvFree(ctx *RwkvCtx) error // RwkvQuantizeModelFile Quantizes FP32 or FP16 model to one of quantized formats. // Returns false on any error. Error messages would be printed to stderr. // - model_file_path_in: path to model file in ggml format, must be either FP32 or FP16. // - model_file_path_out: quantized model will be written here. // - format_name: must be one of available format names below. // Available format names: // - Q4_0 // - Q4_1 // - Q5_0 // - Q5_1 // - Q8_0 RwkvQuantizeModelFile(ctx *RwkvCtx, in, out string, format QuantizedFormat) error // RwkvGetSystemInfoString Returns system information string. RwkvGetSystemInfoString() string }
type CRwkvImpl ¶
type CRwkvImpl struct {
// contains filtered or unexported fields
}
func (*CRwkvImpl) RwkvCloneContext ¶
func (*CRwkvImpl) RwkvEvalSequence ¶
func (*CRwkvImpl) RwkvGetLastError ¶
func (*CRwkvImpl) RwkvGetLogitsLength ¶
func (*CRwkvImpl) RwkvGetNEmbedding ¶
func (*CRwkvImpl) RwkvGetNLayer ¶
func (*CRwkvImpl) RwkvGetNVocab ¶
func (*CRwkvImpl) RwkvGetPrintErrors ¶
func (*CRwkvImpl) RwkvGetStateLength ¶
func (*CRwkvImpl) RwkvGetSystemInfoString ¶
func (*CRwkvImpl) RwkvGpuOffloadLayers ¶
func (*CRwkvImpl) RwkvInitFromFile ¶
func (*CRwkvImpl) RwkvInitState ¶
func (*CRwkvImpl) RwkvQuantizeModelFile ¶
func (c *CRwkvImpl) RwkvQuantizeModelFile(ctx *RwkvCtx, in, out string, format QuantizedFormat) error
func (*CRwkvImpl) RwkvSetPrintErrors ¶
type NormalTokenizer ¶
type NormalTokenizer struct {
// contains filtered or unexported fields
}
func NewNormalTokenizer ¶
func NewNormalTokenizer() (*NormalTokenizer, error)
func (*NormalTokenizer) Decode ¶
func (t *NormalTokenizer) Decode(ids []int) string
type QuantizedFormat ¶
type QuantizedFormat string
const ( Q4_0 QuantizedFormat = "Q4_0" Q4_1 QuantizedFormat = "Q4_1" Q5_0 QuantizedFormat = "Q5_0" Q5_1 QuantizedFormat = "Q5_0" Q8_0 QuantizedFormat = "Q8_0" )
type RwkvErrors ¶
type RwkvErrors uint32
const ( RwkvErrorNone RwkvErrors = iota RwkvErrorAlloc RwkvErrorFileOpen RwkvErrorFileStat RwkvErrorFileRead RwkvErrorFileWrite RwkvErrorFileMagic RwkvErrorFileVersion RwkvErrorDataType RwkvErrorUnsupported RwkvErrorShape RwkvErrorDimension RwkvErrorKey RwkvErrorData RwkvErrorParamMissing )
Represents an error encountered during a function call. These are flags, so an actual value might contain multiple errors.
func (RwkvErrors) Error ¶
func (err RwkvErrors) Error() string
type RwkvModel ¶
type RwkvModel struct {
// contains filtered or unexported fields
}
func NewRwkvAutoModel ¶
func NewRwkvAutoModel(options RwkvOptions) (*RwkvModel, error)
func NewRwkvModel ¶
func NewRwkvModel(dylibPath string, options RwkvOptions) (*RwkvModel, error)
func (*RwkvModel) LoadFromFile ¶
func (*RwkvModel) QuantizeModelFile ¶
func (m *RwkvModel) QuantizeModelFile(in, out string, format QuantizedFormat) error
type RwkvOptions ¶
type RwkvState ¶
type RwkvState struct {
// contains filtered or unexported fields
}
func (*RwkvState) CleanState ¶
CleanState will clean old state and set new state for new chat context state
func (*RwkvState) GetEmbedding ¶
GetEmbedding give the model embedding. the embedding in rwkv is hidden state the len is n_emb*5*n_layer=46080. So if distillation is true, we split len to n_emb = 768
func (*RwkvState) PredictStream ¶
type Trie ¶
type Trie struct {
Root *TrieNode
}
Trie represents the trie data structure
func (*Trie) FindLongest ¶
FindLongest finds the longest match in the trie for the given key
type TrieNode ¶
type TrieNode struct {
// contains filtered or unexported fields
}
TrieNode represents a node in the trie
type WorldTokenizer ¶
WorldTokenizer represents a tokenizer for encoding and decoding bytes to tokens
func NewWorldTokenizer ¶
func NewWorldTokenizer() (*WorldTokenizer, error)
NewWorldTokenizer initializes a new world tokenizer
func (*WorldTokenizer) Decode ¶
func (wt *WorldTokenizer) Decode(tokens []int) string
Decode decodes tokens to a string
func (*WorldTokenizer) DecodeBytes ¶
func (wt *WorldTokenizer) DecodeBytes(tokens []int) []rune
DecodeBytes decodes tokens to bytes
func (*WorldTokenizer) Encode ¶
func (wt *WorldTokenizer) Encode(src string) ([]int, error)
Encode encodes a string to tokens
func (*WorldTokenizer) EncodeBytes ¶
func (wt *WorldTokenizer) EncodeBytes(src []rune) ([]int, error)
EncodeBytes encodes bytes to tokens