deepseek

package module
v1.0.9 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 24, 2026 License: MIT Imports: 10 Imported by: 0

README

Go DeepSeek

Go Reference Go Report Card

go-deepseek is an unofficial Go client for DeepSeek-compatible APIs. It can be used with DeepSeek, Qwen3/QwQ via DashScope-compatible endpoints, OpenAI-compatible APIs, and local Ollama models.

Features

  • Chat completions
  • Streaming chat completions
  • Function calling / tool calling
  • FIM (Fill-in-Middle) completions
  • Embeddings through Ollama
  • DeepSeek account balance query
  • Configurable BaseUrl, timeout, and custom http.Client

Installation

go get github.com/p9966/go-deepseek

Requires Go 1.23+.

Quick Start

This is the fastest way to call the default DeepSeek API:

package main

import (
	"context"
	"fmt"
	"log"
	"os"

	"github.com/p9966/go-deepseek"
)

func main() {
	client := deepseek.NewClient(os.Getenv("DEEPSEEK_API_KEY"))

	req := deepseek.ChatCompletionRequest{
		Model: deepseek.DeepseekV4Pro,
		Messages: []deepseek.ChatCompletionMessage{
			{
				Role:    deepseek.ChatMessageRoleUser,
				Content: "Explain why the sky is blue in one paragraph.",
			},
		},
	}

	resp, err := client.CreateChatCompletion(context.Background(), &req)
	if err != nil {
		log.Fatal(err)
	}

	if len(resp.Choices) == 0 {
		log.Fatal("no response choices returned")
	}

	fmt.Println(resp.Choices[0].Message.Content)
}

Configure Another Compatible Endpoint

The client defaults to https://api.deepseek.com, but you can point it to any compatible endpoint.

Qwen3 / QwQ via DashScope
client := deepseek.NewClient(
	os.Getenv("QWEN3_AUTH_TOKEN"),
	deepseek.WithBaseURL("https://dashscope.aliyuncs.com/compatible-mode/v1"),
)
OpenAI-compatible endpoint
client := deepseek.NewClient(
	os.Getenv("OPENAI_API_KEY"),
	deepseek.WithBaseURL("https://api.openai.com/v1"),
)
Custom timeout or HTTP client
client := deepseek.NewClient(
	os.Getenv("DEEPSEEK_API_KEY"),
	deepseek.WithTimeout(30*time.Second),
)

Or:

client := deepseek.NewClient(
	os.Getenv("DEEPSEEK_API_KEY"),
	deepseek.WithHTTPClient(myHTTPClient),
)

Streaming Chat Completion

Use CreateChatCompletionStream when you want tokens as they arrive:

package main

import (
	"context"
	"errors"
	"fmt"
	"io"
	"log"
	"os"

	"github.com/p9966/go-deepseek"
)

func main() {
	client := deepseek.NewClient(os.Getenv("DEEPSEEK_API_KEY"))

	stream, err := client.CreateChatCompletionStream(context.Background(), deepseek.StreamChatCompletionRequest{
		Model: deepseek.DeepseekV4Flash,
		Messages: []deepseek.ChatCompletionMessage{
			{
				Role:    deepseek.ChatMessageRoleUser,
				Content: "Write a short haiku about Go concurrency.",
			},
		},
	})
	if err != nil {
		log.Fatal(err)
	}
	defer stream.Close()

	for {
		resp, err := stream.Recv()
		if errors.Is(err, io.EOF) {
			break
		}
		if err != nil {
			log.Fatal(err)
		}

		if len(resp.Choices) == 0 {
			continue
		}

		fmt.Print(resp.Choices[0].Delta.Content)
	}
}

Function Calling

package main

import (
	"context"
	"fmt"
	"log"
	"os"

	"github.com/p9966/go-deepseek"
)

func main() {
	client := deepseek.NewClient(os.Getenv("DEEPSEEK_API_KEY"))

	req := deepseek.ChatCompletionRequest{
		Model: deepseek.DeepseekV4Pro,
		Messages: []deepseek.ChatCompletionMessage{
			{
				Role:    deepseek.ChatMessageRoleUser,
				Content: "What's the weather in Hangzhou?",
			},
		},
		Tools: []deepseek.Tools{
			{
				Type: "function",
				Function: deepseek.Function{
					Name:        "get_weather",
					Description: "Get weather for a city",
					Parameters: &deepseek.Parameters{
						Type: "object",
						Properties: map[string]interface{}{
							"location": map[string]interface{}{
								"type":        "string",
								"description": "City name",
							},
						},
						Required: []string{"location"},
					},
				},
			},
		},
	}

	resp, err := client.CreateChatCompletion(context.Background(), &req)
	if err != nil {
		log.Fatal(err)
	}

	if len(resp.Choices) == 0 || len(resp.Choices[0].Message.ToolCalls) == 0 {
		log.Fatal("no tool call returned")
	}

	call := resp.Choices[0].Message.ToolCalls[0]
	fmt.Printf("tool=%s args=%s\n", call.Function.Name, call.Function.Arguments)
}

Local Models with Ollama

You can also use local models through Ollama:

package main

import (
	"context"
	"fmt"
	"log"

	"github.com/p9966/go-deepseek"
)

func main() {
	client := deepseek.NewClient(
		"",
		deepseek.WithBaseURL("http://localhost:11434"),
	)

	req := deepseek.OllamaChatRequest{
		Model: "deepseek-r1:7b",
		Messages: []deepseek.OllamaChatMessage{
			{
				Role:    "user",
				Content: "Hello!",
			},
		},
	}

	resp, err := client.CreateOllamaChatCompletion(context.Background(), &req)
	if err != nil {
		log.Fatal(err)
	}

	fmt.Println(resp.Message.Content)
}

If you do not have Ollama installed yet:

ollama run deepseek-r1

Example Index

Runnable examples live in examples/:

Scenario Path
Chat completion examples/chat
Stream chat completion examples/chat_stream
Function calling examples/function_calling
FIM completion examples/fin
Balance query examples/balance
Ollama chat examples/ollama_chat
Ollama generate examples/ollama_generate
Ollama embeddings examples/ollama_embed
Ollama function calling examples/ollama_function_calling
Qwen3 stream chat examples/qwen3_chat_stream
Qwen3 function calling examples/qwen3_function_calling
QwQ stream chat examples/qwq_chat_stream
QwQ function calling examples/qwq_function_calling

Notes

  • NewClient(token) uses DeepSeek's default base URL automatically.
  • Plain tokens are sent as Bearer <token> automatically.
  • For compatible providers, set a custom base URL with WithBaseURL(...).
  • For local Ollama usage, no auth token is required.

Development

Run tests with:

go test ./...

License

This project is licensed under the MIT License.

Documentation

Overview

Package deepseek 提供 DeepSeek API 的非官方 Go 客户端。 当前支持与 DeepSeek 的聊天能力进行交互。

Index

Constants

View Source
const (
	ChatMessageRoleSystem    = "system"
	ChatMessageRoleUser      = "user"
	ChatMessageRoleAssistant = "assistant"
)
View Source
const (
	QWEN3_235B_A22B  = "qwen3-235b-a22b"
	QWEN3_32B        = "qwen3-32b"
	QWEB3_30B_A3B    = "qwen3-30b-a3b"
	QWEN3_14B        = "qwen3-14b"
	QWEN3_8B         = "qwen3-8b"
	QWEN3_4B         = "qwen3-4b"
	QWEN3_1_7B       = "qwen3-1.7b"
	QWEN3_0_6B       = "qwen3-0.6b"
	DeepSeekChat     = "deepseek-chat"     //  (将于 2026/07/24 弃用)
	DeepseekReasoner = "deepseek-reasoner" // (将于 2026/07/24 弃用)
	QWen2_5_7b       = "qwen2.5:7b"
	QwQ_plus         = "qwq-plus"
	QwQ_plus_latest  = "qwq-plus-latest"
	QwQ_32b          = "qwq-32b"
	DeepsSeekCode    = "deepseek-code"
	DeepseekV4Flash  = "deepseek-v4-flash"
	DeepseekV4Pro    = "deepseek-v4-pro"
)
View Source
const BalanceSuffix = "/user/balance"

Variables

This section is empty.

Functions

This section is empty.

Types

type APIError added in v1.0.9

type APIError struct {
	StatusCode int
	Message    string
	Body       string
}

func (*APIError) Error added in v1.0.9

func (e *APIError) Error() string

type BalanceInfo added in v1.0.4

type BalanceInfo struct {
	Currency        string `json:"currency"`
	TotalBalance    string `json:"total_balance"`
	GrantedBalance  string `json:"granted_balance"`
	ToppedUpBalance string `json:"topped_up_balance"`
}

type BalanceResponse added in v1.0.4

type BalanceResponse struct {
	IsAvailable  bool          `json:"is_available"`
	BalanceInfos []BalanceInfo `json:"balance_infos"`
}

type ChatCompletionMessage

type ChatCompletionMessage struct {
	Role    string `json:"role"`
	Content string `json:"content"`
}

type ChatCompletionRequest

type ChatCompletionRequest struct {
	Model            string                  `json:"model"`
	Messages         []ChatCompletionMessage `json:"messages"`
	FrequencyPenalty float32                 `json:"frequency_penalty"`
	MaxTokens        int                     `json:"max_tokens,omitempty"`       // 可选:最大输出 token 数,需大于 1
	PresencePenalty  float32                 `json:"presence_penalty,omitempty"` // 可选:出现惩罚,取值范围为 [-2, 2]
	Temperature      float32                 `json:"temperature,omitempty"`      // 可选:采样温度,最大为 2
	TopP             float32                 `json:"top_p,omitempty"`            // 可选:核采样参数,最大为 1
	ResponseFormat   *ResponseFormat         `json:"response_format,omitempty"`  // 可选:自定义响应格式
	Stop             []string                `json:"stop,omitempty"`             // 可选:停止生成的标记
	Tools            []Tools                 `json:"tools,omitempty"`            // 可选:工具列表
	LogProbs         bool                    `json:"logprobs,omitempty"`         // 可选:是否返回对数概率
	TopLogProbs      int                     `json:"top_logprobs,omitempty"`     // 可选:返回对数概率最高的 token 数量,最大为 20
	ToolChoice       string                  `json:"tool_choice,omitempty"`      // 可选:工具选择策略,可为 "auto" 或 "none"
	ExtraBody        ExtraBody               `json:"extra_body,omitempty"`       // 可选:额外请求体,用于思考相关配置
	ReasoningEffort  ReasoningEffortModel    `json:"reasoning_effort,omitempty"` // 可选:思考强度,可为 "high" 或 "max"
}

type ChatCompletionResponse

type ChatCompletionResponse struct {
	ID                string   `json:"id"`                 // 聊天补全结果的唯一标识
	Object            string   `json:"object"`             // 对象类型,通常为 "chat.completion"
	Created           int64    `json:"created"`            // 聊天补全创建时间戳
	Model             string   `json:"model"`              // 生成该结果所使用的模型
	Choices           []Choice `json:"choices"`            // 模型生成的候选结果列表
	Usage             Usage    `json:"usage"`              // token 使用统计信息
	SystemFingerprint string   `json:"system_fingerprint"` // 系统配置指纹
}

type ChatCompletionStream added in v1.0.5

type ChatCompletionStream interface {
	Recv() (*StreamChatCompletionResponse, error)
	Close() error
}

type Choice

type Choice struct {
	Index        int       `json:"index"`              // 当前候选结果在列表中的索引
	Message      Message   `json:"message"`            // 模型生成的消息内容
	LogProbs     *LogProbs `json:"logprobs,omitempty"` // token 的对数概率信息
	FinishReason string    `json:"finish_reason"`      // 生成结束原因
}

type Client

type Client struct {
	AuthToken string
	BaseUrl   string
	// contains filtered or unexported fields
}

func NewClient

func NewClient(token string, opts ...ClientOption) *Client

NewClient 使用给定的 API Key 创建一个新的 DeepSeek 客户端。

func (*Client) CreateChatCompletion

func (c *Client) CreateChatCompletion(ctx context.Context, req *ChatCompletionRequest) (*ChatCompletionResponse, error)

func (*Client) CreateChatCompletionStream added in v1.0.5

func (c *Client) CreateChatCompletionStream(ctx context.Context, req StreamChatCompletionRequest) (ChatCompletionStream, error)

func (*Client) CreateFINCompletion added in v1.0.1

func (c *Client) CreateFINCompletion(ctx context.Context, req *FINCompletionRequest) (*FINCompletionResponse, error)

func (*Client) CreateOllamaChatCompletion added in v1.0.2

func (c *Client) CreateOllamaChatCompletion(ctx context.Context, req *OllamaChatRequest) (*OllamaChatResponse, error)

func (*Client) CreateOllamaEmbed added in v1.0.2

func (c *Client) CreateOllamaEmbed(ctx context.Context, req *OllamaEmbedRequest) (*OllamaEmbedResponse, error)

func (*Client) CreateOllamaGenerate added in v1.0.2

func (c *Client) CreateOllamaGenerate(ctx context.Context, req *OllamaGenerateRequest) (*OllamaGenerateResponse, error)

func (*Client) Do

func (c *Client) Do(req *http.Request) (*http.Response, error)

func (*Client) GetBalance added in v1.0.4

func (c *Client) GetBalance(ctx context.Context) (*BalanceResponse, error)

type ClientOption added in v1.0.9

type ClientOption func(*Client)

func WithBaseURL added in v1.0.9

func WithBaseURL(baseURL string) ClientOption

func WithHTTPClient added in v1.0.9

func WithHTTPClient(httpClient *http.Client) ClientOption

func WithTimeout added in v1.0.9

func WithTimeout(timeout time.Duration) ClientOption

type ExtraBody added in v1.0.9

type ExtraBody struct {
	Thinking ExtraBodyThinking `json:"thinking,omitempty"`
}

type ExtraBodyThinking added in v1.0.9

type ExtraBodyThinking struct {
	Type ExtraBodyThinkingModel `json:"type,omitempty"`
}

type ExtraBodyThinkingModel added in v1.0.9

type ExtraBodyThinkingModel string
const (
	ExtraBodyThinkingModelEnabled  ExtraBodyThinkingModel = "enabled"
	ExtraBodyThinkingModelDisabled ExtraBodyThinkingModel = "disabled"
)

type FINCompletionChoice added in v1.0.1

type FINCompletionChoice struct {
	FinishReason string                      `json:"finish_reason"`
	Index        int                         `json:"index"`
	Logprobs     FINCompletionChoiceLogprobs `json:"logprobs"`
	Text         string                      `json:"text"`
}

type FINCompletionChoiceLogprobs added in v1.0.1

type FINCompletionChoiceLogprobs struct {
	Tokens        []string  `json:"tokens"`
	TokenLogprobs []float64 `json:"token_logprobs"`
}

type FINCompletionRequest added in v1.0.1

type FINCompletionRequest struct {
	Model            string    `json:"model"`                       // 必填:模型 ID
	Prompt           string    `json:"prompt"`                      // 必填:用于生成补全内容的提示词
	Echo             bool      `json:"echo,omitempty"`              // 可选:是否返回输入提示词
	FrequencyPenalty float64   `json:"frequency_penalty,omitempty"` // 可选:控制生成内容重复度,取值范围 [-2, 2]
	Logprobs         int       `json:"logprobs,omitempty"`          // 可选:返回最可能输出 token 的对数概率,同时包含已采样 token 的对数概率,最大值为 20
	MaxTokens        int       `json:"max_tokens,omitempty"`        // 可选:生成内容的最大长度
	PresencePenalty  float64   `json:"presence_penalty,omitempty"`  // 可选:控制生成内容多样性,取值范围 [-2, 2]
	Stop             *[]string `json:"stop,omitempty"`              // 可选:停止生成的字符串或字符串数组
	Stream           bool      `json:"stream,omitempty"`            // 可选:是否以流式方式返回结果
	Suffix           *string   `json:"suffix,omitempty"`            // 可选:被补全内容的后缀
	Temperature      float64   `json:"temperature,omitempty"`       // 可选:采样温度,建议与 top_p 二选一调整,不建议同时修改
	TopP             float64   `json:"top_p,omitempty"`             // 可选:核采样参数,建议与 temperature 二选一调整,不建议同时修改
}

type FINCompletionResponse added in v1.0.1

type FINCompletionResponse struct {
	ID                string                `json:"id"`
	Choices           []FINCompletionChoice `json:"choices"`
	Created           int                   `json:"created"`
	Model             string                `json:"model"`
	SystemFingerprint string                `json:"system_fingerprint"`
	Object            string                `json:"object"`
	Usage             struct {
		CompletionTokens      int                                   `json:"completion_tokens"`
		PromptTokens          int                                   `json:"prompt_tokens"`
		PromptCacheHitTokens  int                                   `json:"prompt_cache_hit_tokens"`
		PromptCacheMissTokens int                                   `json:"prompt_cache_miss_tokens"`
		TotalTokens           int                                   `json:"total_tokens"`
		PromptTokensDetails   FINCompletionUsagePromptTokensDetails `json:"prompt_tokens_details"`
	} `json:"usage"`
}

type FINCompletionUsagePromptTokensDetails added in v1.0.1

type FINCompletionUsagePromptTokensDetails struct {
	CachedTokens int `json:"cached_tokens"`
}

type Function

type Function struct {
	Name        string      `json:"name"`                 // 必填:函数名称
	Description string      `json:"description"`          // 必填:函数描述
	Parameters  *Parameters `json:"parameters,omitempty"` // 可选:参数结构定义
}

type FunctionCall added in v1.0.1

type FunctionCall struct {
	Name      string `json:"name"`      // 要调用的函数名称
	Arguments string `json:"arguments"` // 函数调用参数内容
}

type LogProbs

type LogProbs struct {
	Tokens        []string             `json:"tokens,omitempty"`         // token 列表
	TokenLogProbs []float64            `json:"token_logprobs,omitempty"` // 每个 token 的对数概率
	TopLogProbs   []map[string]float64 `json:"top_logprobs,omitempty"`   // 每个 token 对应的最高对数概率集合
}

type Message

type Message struct {
	Role             string     `json:"role"`                        // 消息发送者角色,例如 "user"、"assistant"
	Content          string     `json:"content"`                     // 消息内容
	ReasoningContent string     `json:"reasoning_content,omitempty"` // 可选:思考过程内容
	ToolCalls        []ToolCall `json:"tool_calls,omitempty"`        // 可选:工具调用列表
}

type OllamaChatMessage added in v1.0.2

type OllamaChatMessage struct {
	Role      string       `json:"role"`
	Content   string       `json:"content"`
	Images    []string     `json:"images,omitempty"`
	ToolCalls []OllamaTool `json:"tool_calls,omitempty"`
}

type OllamaChatRequest added in v1.0.2

type OllamaChatRequest struct {
	Model     string              `json:"model"`
	Messages  []OllamaChatMessage `json:"messages"`             // 聊天消息列表,可用于维持会话上下文
	Tools     []Tools             `json:"tools,omitempty"`      // 可选:本次聊天可用的工具列表
	Format    map[string]any      `json:"format,omitempty"`     // 可选:返回格式,可为 json 或 JSON Schema
	Stream    bool                `json:"stream"`               // 可选:为 false 时返回单个完整响应,而不是流式响应
	Options   *Options            `json:"options,omitempty"`    // 可选:模型附加参数,例如 temperature 等 Modelfile 支持的参数
	KeepAlive int                 `json:"keep_alive,omitempty"` // 可选:请求结束后模型在内存中的保活时长,默认 5 分钟
}

type OllamaChatResponse added in v1.0.2

type OllamaChatResponse struct {
	Model              string             `json:"model"`
	CreatedAt          string             `json:"created_at"`
	Message            *OllamaChatMessage `json:"message"`
	DoneReason         string             `json:"done_reason"`
	Done               bool               `json:"done"`
	TotalDuration      int64              `json:"total_duration"`
	LoadDuration       int64              `json:"load_duration"`
	PromptEvalCount    int                `json:"prompt_eval_count"`
	PromptEvalDuration int64              `json:"prompt_eval_duration"`
	EvalCount          int                `json:"eval_count"`
	EvalDuration       int64              `json:"eval_duration"`
}

type OllamaEmbedRequest added in v1.0.2

type OllamaEmbedRequest struct {
	Model     string   `json:"model"`                // 用于生成向量的模型名称
	Input     any      `json:"input"`                // 需要生成向量的文本,支持单条或多条
	Truncate  bool     `json:"truncate,omitempty"`   // 可选:是否在超出上下文长度时截断输入,默认开启;关闭时若超限将返回错误
	Options   *Options `json:"options,omitempty"`    // 可选:模型附加参数,例如 temperature 等 Modelfile 支持的参数
	KeepAlive int      `json:"keep_alive,omitempty"` // 可选:请求结束后模型在内存中的保活时长,默认 5 分钟
}

type OllamaEmbedResponse added in v1.0.2

type OllamaEmbedResponse struct {
	Model           string      `json:"model"`
	Embeddings      [][]float64 `json:"embeddings"`
	TotalDuration   int64       `json:"total_duration"`
	LoadDuration    int64       `json:"load_duration"`
	PromptEvalCount int         `json:"prompt_eval_count"`
}

type OllamaGenerateRequest added in v1.0.2

type OllamaGenerateRequest struct {
	Model     string   `json:"model"`
	Prompt    string   `json:"prompt"`               // 可选:用于生成响应的提示词
	Stream    bool     `json:"stream"`               // 可选:为 false 时返回单个完整响应,而不是流式响应
	Suffix    string   `json:"suffix,omitempty"`     // 可选:模型响应后的后缀文本
	Images    []string `json:"images,omitempty"`     // 可选:Base64 编码图片列表,适用于 llava 等多模态模型
	Format    any      `json:"format,omitempty"`     // 可选:返回格式,可为 json 或 JSON Schema
	Options   *Options `json:"options,omitempty"`    // 可选:模型附加参数,例如 temperature 等 Modelfile 支持的参数
	System    string   `json:"system,omitempty"`     // 可选:系统消息,会覆盖 Modelfile 中定义的 system
	Template  string   `json:"template,omitempty"`   // 可选:提示模板,会覆盖 Modelfile 中定义的 template
	Raw       bool     `json:"raw,omitempty"`        // 可选:为 true 时不对提示词做额外格式化,适合自行传入完整模板
	KeepAlive int      `json:"keep_alive,omitempty"` // 可选:请求结束后模型在内存中的保活时长,默认 5 分钟
}

type OllamaGenerateResponse added in v1.0.2

type OllamaGenerateResponse struct {
	Model              string `json:"model"`
	CreatedAt          string `json:"created_at"`
	Response           string `json:"response"` // 流式响应时该字段为空,非流式响应时包含完整结果
	Done               bool   `json:"done"`
	Context            []int  `json:"context,omitempty"`              // 本次响应使用的上下文编码,可在下次请求中继续复用以保持会话记忆
	TotalDuration      int64  `json:"total_duration,omitempty"`       // 生成响应的总耗时
	LoadDuration       int64  `json:"load_duration,omitempty"`        // 加载模型耗时,单位为纳秒
	PromptEvalCount    int    `json:"prompt_eval_count,omitempty"`    // 提示词 token 数量
	PromptEvalDuration int64  `json:"prompt_eval_duration,omitempty"` // 评估提示词耗时,单位为纳秒
	EvalCount          int    `json:"eval_count,omitempty"`           // 响应 token 数量
	EvalDuration       int64  `json:"eval_duration,omitempty"`        // 生成响应耗时,单位为纳秒
}

type OllamaTool added in v1.0.3

type OllamaTool struct {
	Function struct {
		Name      string         `json:"name"`
		Arguments map[string]any `json:"arguments"`
	} `json:"function"`
}

type Options added in v1.0.2

type Options struct {
	NumKeep          int      `json:"num_keep,omitempty"`
	Seed             int      `json:"seed,omitempty"`
	NumPredict       int      `json:"num_predict,omitempty"`
	TopK             int      `json:"top_k,omitempty"`
	TopP             float64  `json:"top_p,omitempty"`
	MinP             float64  `json:"min_p,omitempty"`
	TypicalP         float64  `json:"typical_p,omitempty"`
	RepeatLastN      int      `json:"repeat_last_n,omitempty"`
	Temperature      float64  `json:"temperature,omitempty"`
	RepeatPenalty    float64  `json:"repeat_penalty,omitempty"`
	PresencePenalty  float64  `json:"presence_penalty,omitempty"`
	FrequencyPenalty float64  `json:"frequency_penalty,omitempty"`
	Mirostat         int      `json:"mirostat,omitempty"`
	MirostatTau      float64  `json:"mirostat_tau,omitempty"`
	MirostatEta      float64  `json:"mirostat_eta,omitempty"`
	PenalizeNewline  bool     `json:"penalize_newline,omitempty"`
	Stop             []string `json:"stop,omitempty"`
	Numa             bool     `json:"numa,omitempty"`
	NumCtx           int      `json:"num_ctx,omitempty"`
	NumBatch         int      `json:"num_batch,omitempty"`
	NumGpu           int      `json:"num_gpu,omitempty"`
	MainGpu          int      `json:"main_gpu,omitempty"`
	LowVram          bool     `json:"low_vram,omitempty"`
	VocabOnly        bool     `json:"vocab_only,omitempty"`
	UseMmap          bool     `json:"use_mmap,omitempty"`
	UseMlock         bool     `json:"use_mlock,omitempty"`
	NumThread        int      `json:"num_thread,omitempty"`
}

type Parameters

type Parameters struct {
	Type       string                 `json:"type"` // 必填:参数类型,例如 "object"
	Properties map[string]interface{} `json:"properties,omitempty"`
	Required   []string               `json:"required,omitempty"`
}

type ReasoningEffortModel added in v1.0.9

type ReasoningEffortModel string
const (
	ReasoningEffortModelHigh ReasoningEffortModel = "high"
	ReasoningEffortModelMax  ReasoningEffortModel = "max"
)

type ResponseFormat

type ResponseFormat struct {
	Type string `json:"type"`
}

type StreamChatChoiceData added in v1.0.5

type StreamChatChoiceData struct {
	Content          string     `json:"content"`
	ReasoningContent string     `json:"reasoning_content"`
	ToolCalls        []ToolCall `json:"tool_calls"`
}

type StreamChatChoices added in v1.0.5

type StreamChatChoices struct {
	Index        int                  `json:"index"`
	Delta        StreamChatChoiceData `json:"delta"`
	LogProbs     *LogProbs            `json:"logprobs,omitempty"`
	FinishReason string               `json:"finish_reason"`
}

type StreamChatCompletionRequest added in v1.0.5

type StreamChatCompletionRequest struct {
	Stream           bool                    `json:"stream"`
	Model            string                  `json:"model"`
	Messages         []ChatCompletionMessage `json:"messages"`
	FrequencyPenalty float32                 `json:"frequency_penalty"`
	MaxTokens        int                     `json:"max_tokens,omitempty"`
	PresencePenalty  float32                 `json:"presence_penalty,omitempty"`
	Temperature      float32                 `json:"temperature,omitempty"`
	TopP             float32                 `json:"top_p,omitempty"`
	ResponseFormat   *ResponseFormat         `json:"response_format,omitempty"`
	Stop             []string                `json:"stop,omitempty"`
	Tools            []Tools                 `json:"tools,omitempty"`
	LogProbs         bool                    `json:"logprobs,omitempty"`
	TopLogProbs      int                     `json:"top_logprobs,omitempty"`
	EnableThink      bool                    `json:"enable_thinking"`
}

type StreamChatCompletionResponse added in v1.0.5

type StreamChatCompletionResponse struct {
	ID                string              `json:"id"`
	Object            string              `json:"object"`
	Created           int64               `json:"created"`
	Model             string              `json:"model"`
	Choices           []StreamChatChoices `json:"choices"`
	SystemFingerprint string              `json:"system_fingerprint"`
}

type ToolCall added in v1.0.1

type ToolCall struct {
	Index    int          `json:"index"` // 当前工具调用在列表中的索引
	Id       string       `json:"id"`    // 工具调用 ID
	Type     string       `json:"type"`  // 工具调用类型,例如 "function"
	Function FunctionCall `json:"function"`
}

type Tools

type Tools struct {
	Type     string   `json:"type"`
	Function Function `json:"function"`
}

type Usage

type Usage struct {
	PromptTokens          int `json:"prompt_tokens"`            // 提示词使用的 token 数
	CompletionTokens      int `json:"completion_tokens"`        // 补全内容使用的 token 数
	TotalTokens           int `json:"total_tokens"`             // 总 token 数
	PromptCacheHitTokens  int `json:"prompt_cache_hit_tokens"`  // 命中缓存的 token 数
	PromptCacheMissTokens int `json:"prompt_cache_miss_tokens"` // 未命中缓存的 token 数
}

Directories

Path Synopsis
examples
balance command
chat command
chat_stream command
fin command
ollama_chat command
ollama_embed command
ollama_generate command
qwq_chat_stream command

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL