api

package
v0.2.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 12, 2025 License: MIT Imports: 8 Imported by: 0

Documentation

Index

Constants

View Source
const ChatCompletionRoute = "/chat/completions"
View Source
const ChatCompletionStreamPrefix = "data: "
View Source
const DefaultgroqEndpoint = "https://api.groq.com/openai/v1"

Variables

View Source
var ErrChatCompletion = errors.New("API.ChatCompletion")

Functions

func NewErrChatCompletion

func NewErrChatCompletion(err error) error

Types

type API

type API struct {
	// contains filtered or unexported fields
}

func New

func New(apiKey string) *API

func (*API) ChatCompletion

func (api *API) ChatCompletion(ctx context.Context, request ChatCompletionRequest) (*http.Response, error)

func (*API) SetEndpoint

func (api *API) SetEndpoint(endpoint string)

type ChatCompletionChoice

type ChatCompletionChoice struct {
	// The reason the model stopped generating tokens. This will be `stop` if the model
	// hit a natural stop point or a provided stop sequence, `length` if the maximum
	// number of tokens specified in the request was reached, `tool_calls` if the model
	// called a tool, or `function_call` (deprecated) if the model called a function.
	FinishReason ChatCompletionChoiceFinishReason `json:"finish_reason"`
	// The index of the choice in the list of choices.
	Index int `json:"index"`
	// Log probability information for the choice.
	Logprobs ChatCompletionChoiceLogProbs `json:"logprobs"`
	// A chat completion message generated by the model.
	Message ChatCompletionMessage `json:"message"`
}

type ChatCompletionChoiceFinishReason

type ChatCompletionChoiceFinishReason string
const (
	ChatCompletionChoiceFinishReasonStop         ChatCompletionChoiceFinishReason = "stop"
	ChatCompletionChoiceFinishReasonLength       ChatCompletionChoiceFinishReason = "length"
	ChatCompletionChoiceFinishReasonToolCalls    ChatCompletionChoiceFinishReason = "tool_calls"
	ChatCompletionChoiceFinishReasonFunctionCall ChatCompletionChoiceFinishReason = "function_call"
)

type ChatCompletionChoiceLogProbs

type ChatCompletionChoiceLogProbs struct {
	// A list of message content tokens with log probability information.
	Content []ChatCompletionTokenLogprob `json:"content,omitempty"`
}

type ChatCompletionChunkChoice

type ChatCompletionChunkChoice struct {
	// The reason the model stopped generating tokens. This will be `stop` if the model
	// hit a natural stop point or a provided stop sequence, `length` if the maximum
	// number of tokens specified in the request was reached, `tool_calls` if the model
	// called a tool, or `function_call` (deprecated) if the model called a function.
	FinishReason *ChatCompletionChoiceFinishReason `json:"finish_reason"`
	// Log probability information for the choice.
	Logprobs *ChatCompletionChoiceLogProbs `json:"logprobs"`
	// The index of the choice in the list of choices.
	Index int `json:"index"`
	// A chat completion message generated by the model.
	Delta ChatCompletionChunkDelta `json:"delta"`
}

type ChatCompletionChunkDelta

type ChatCompletionChunkDelta struct {
	// The contents of the message.
	Content string `json:"content,omitempty"`
}

type ChatCompletionChunkResponse

type ChatCompletionChunkResponse struct {
	// A unique identifier for the chat completion.
	ID string `json:"id"`
	// A list of chat completion choices. Can be more than one if `n` is greater than 1.
	Choices []ChatCompletionChunkChoice `json:"choices"`
	// The Unix timestamp (in seconds) of when the chat completion was created.
	Created int `json:"created"`
	// The model used for the chat completion.
	Model models.Model `json:"model"`
	// The object type, which is always `chat.completion.chunk`.
	Object string `json:"object"`
	// This fingerprint represents the backend configuration that the model runs with.
	//
	// Can be used in conjunction with the `seed` request parameter to understand when
	// backend changes have been made that might impact determinism.
	SystemFingerprint string `json:"system_fingerprint,omitempty"`

	XGroq *ChatCompletionXGroq `json:"x_groq,omitempty"`
}

type ChatCompletionMessage

type ChatCompletionMessage struct {
	// The contents of the message.
	Content string `json:"content,omitempty"`
	// The role of the author of this message.
	Role models.MessageRole `json:"role,omitempty"`
	// The model's reasoning for a response. Only available for reasoning models when requests parameter
	// ChatCompletionRequest.ReasoningFormat has value `parsed.
	Reasoning string `json:"reasoning,omitempty"`
	// The tool calls generated by the model, such as function calls.
	ToolCalls []ChatCompletionToolCall `json:"tool_calls,omitempty"`
}

type ChatCompletionRequest

type ChatCompletionRequest struct {
	// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the
	// text so far, decreasing the model's likelihood to repeat the same line verbatim.
	FrequencyPenalty *FrequencyPenalty `json:"frequency_penalty,omitempty"`

	// This is not yet supported by any of our models. Whether to return log probabilities of the output tokens or not.
	// If true, returns the log probabilities of each output token returned in the content of message.
	LogProbs *LogProbs `json:"logprobs,omitempty"`

	// The maximum number of tokens that can be generated in the chat completion. The total length of input tokens and
	// generated tokens is limited by the model's context length.
	MaxCompletionTokens *MaxCompletionTokens `json:"max_completion_tokens,omitempty"`

	// A list of messages comprising the conversation so far.
	Messages models.AnyMessages `json:"messages"`

	// ID of the model to use. For details on which models are compatible with the Chat API, see available models
	// https://console.api.com/docs/models.
	Model models.Model `json:"model"`

	// How many chat completion choices to generate for each input message. Note that the current moment, only n=1 is
	// supported. Other values will result in a 400 response.
	//
	// Defaults to 1.
	SuggestionsCount *SuggestionsCount `json:"suggestions_count,omitempty"`

	// Whether to enable parallel function calling during tool use.
	// Defaults to true.
	ParallelToolCalls *ParallelToolCalls `json:"parallel_tool_calls,omitempty"`

	// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so
	// far, increasing the model's likelihood to talk about new topics.
	PresencePenalty *PresencePenalty `json:"presence_penalty,omitempty"`

	// Specifies how to output reasoning tokens.
	ReasoningFormat ReasoningFormat `json:"reasoning_format,omitempty"`

	// An object specifying the format that the model must output.
	//
	// Setting to { "type": "json_object" } enables JSON mode, which guarantees the message the model generates is
	// valid JSON.
	//
	// Important: when using JSON mode, you must also instruct the model to produce JSON yourself via a system or user
	// message.
	ResponseFormat ResponseFormat `json:"response_format,omitempty"`

	// If specified, our system will make a best effort to sample deterministically, such that repeated requests with
	// the same seed and parameters should return the same result. Determinism is not guaranteed, and you should refer
	// to the system_fingerprint response parameter to monitor changes in the backend.
	Seed *Seed `json:"seed,omitempty"`

	// The service tier to use for the request. Defaults to ServiceTierOnDemand.
	//
	// ServiceTierAuto will automatically select the highest tier available within the rate limits of your organization.
	// ServiceTierFlex uses the flex tier, which will succeed or fail quickly.
	ServiceTier ServiceTier `json:"service_tier,omitempty"`

	// Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the
	// stop sequence.
	Stop *Stop `json:"stop,omitempty"`

	// If set, partial message deltas will be sent. Tokens will be sent as data-only server-sent events as they become
	// available, with the stream terminated by a data: [DONE] message.
	Stream *Stream `json:"stream,omitempty"`

	// Options for streaming response. Only set this when you set Stream: true.
	StreamOptions *StreamOptions `json:"stream_options,omitempty"`

	// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random,
	// while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this
	// or top_p but not both.
	Temperature *Temperature `json:"temperature,omitempty"`

	// Controls which (if any) tool is called by the model. ToolChoiceStaticNone means the model will not call
	// any tool and instead generates a message. ToolChoiceStaticAuto means the model can pick between generating a
	// message or calling one or more tools. Required means the model must call one or more tools. Specifying a
	// particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool.
	//
	// ToolChoiceStaticNone is the default when no tools are present. auto is the default if tools are present.\
	ToolChoice *models.ToolChoice `json:"tool_choice,omitempty"`

	// A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a
	// list of functions the model may generate JSON inputs for. A max of 128 functions are supported.
	Tools []models.Tool `json:"tools,omitempty"`

	// This is not yet supported by any of our models. An integer between 0 and 20 specifying the number of most
	// likely tokens to return at each token position, each with an associated log probability. logprobs must be set
	// to true if this parameter is used.
	TopLogProbs *TopLogProbs `json:"top_logprobs,omitempty"`

	// An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of
	// the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass
	// are considered. We generally recommend altering this or temperature but not both.
	TopP *TopP `json:"top_p,omitempty"`

	// A unique identifier representing your end-user, which can help us monitor and detect abuse.
	User string `json:"user,omitempty"`
}

ChatCompletionRequest creates a model response for the given chat conversation.

https://console.api.com/docs/api-reference#chat

type ChatCompletionResponse

type ChatCompletionResponse struct {
	// A unique identifier for the chat completion.
	ID string `json:"id"`
	// The Unix timestamp (in seconds) of when the chat completion was created.
	Created int `json:"created"`
	// The model used for the chat completion.
	Model models.Model `json:"model"`
	// The object type, which is always `chat.completion`.
	Object string `json:"object"`
	// This fingerprint represents the backend configuration that the model runs with.
	//
	// Can be used in conjunction with the `seed` request parameter to understand when
	// backend changes have been made that might impact determinism.
	SystemFingerprint string `json:"system_fingerprint,omitempty"`
	// A list of chat completion choices. Can be more than one if `n` is greater than 1.
	Choices []ChatCompletionChoice `json:"choices"`
	// Usage statistics for the completion request.
	Usage ChatCompletionUsage `json:"usage"`
}

type ChatCompletionTokenLogprob

type ChatCompletionTokenLogprob struct {
	// The token.
	Token string `json:"token"`
	// A list of integers representing the UTF-8 bytes representation of the token.
	// Useful in instances where characters are represented by multiple tokens and
	// their byte representations must be combined to generate the correct text
	// representation. Can be `null` if there is no bytes representation for the token.
	Bytes []byte `json:"bytes"`
	// The log probability of this token, if it is within the top 20 most likely
	// tokens. Otherwise, the value `-9999.0` is used to signify that the token is very
	// unlikely.
	LogProb float64 `json:"logprob"`
	// List of the most likely tokens and their log probability, at this token
	// position. In rare cases, there may be fewer than the number of requested
	// `top_logprobs` returned.
	TopLogprobs []ChatCompletionTokenTopLogprob `json:"top_logprobs"`
}

type ChatCompletionTokenTopLogprob

type ChatCompletionTokenTopLogprob struct {
	// The token.
	Token string `json:"token"`
	// A list of integers representing the UTF-8 bytes representation of the token.
	// Useful in instances where characters are represented by multiple tokens and
	// their byte representations must be combined to generate the correct text
	// representation. Can be `null` if there is no bytes representation for the token.
	Bytes []byte `json:"bytes"`
	// The log probability of this token, if it is within the top 20 most likely
	// tokens. Otherwise, the value `-9999.0` is used to signify that the token is very
	// unlikely.
	LogProb float64 `json:"logprob"`
}

type ChatCompletionToolCall

type ChatCompletionToolCall struct {
	// The ID of the tool call.
	ID string `json:"id"`
	// The type of the tool. Currently, only ToolTypeFunction is supported.
	Type models.ToolType `json:"type,omitempty"`
	// The function that the model called.
	Function *models.ToolCallFunction `json:"function,omitempty"`
}

type ChatCompletionUsage

type ChatCompletionUsage struct {
	// Number of tokens in the generated completion.
	CompletionTokens int `json:"completion_tokens"`
	// Number of tokens in the prompt.
	PromptTokens int `json:"prompt_tokens"`
	// Total number of tokens used in the request (prompt + completion).
	TotalTokens int `json:"total_tokens"`
	// Time spent generating tokens
	GenerationTime float64 `json:"generation_time"`
	// Time spent processing input tokens
	ProcessingTime float64 `json:"processing_time"`
	// Time the requests was spent queued
	QueuedTime float64 `json:"queued_time"`
	// Completion time and prompt time combined.
	TotalTime float64 `json:"total_time"`
}

type ChatCompletionXGroq

type ChatCompletionXGroq struct {
	// A groq request ID which can be used by to refer to a specific request to groq support Only sent with the first
	// chunk.
	ID string `json:"id"`
	// An error string indicating why a stream was stopped early.
	Error string `json:"error"`
	// Usage information for the stream. Only sent in the final chunk.
	Usage ChatCompletionUsage `json:"usage"`
}

type FrequencyPenalty

type FrequencyPenalty float64

FrequencyPenalty is a number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.

func NewFrequencyPenalty

func NewFrequencyPenalty(f float64) *FrequencyPenalty

type LogProbs

type LogProbs bool

LogProbs determines whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message.

func NewLogProbs

func NewLogProbs(l bool) *LogProbs

type MaxCompletionTokens

type MaxCompletionTokens int

MaxCompletionTokens is the maximum number of tokens that can be generated in the chat completion. The total length of input tokens and generated tokens is limited by the model's context length.

func NewMaxCompletionTokens

func NewMaxCompletionTokens(m int) *MaxCompletionTokens

type ParallelToolCalls

type ParallelToolCalls bool

ParallelToolCalls enables parallel function calling during tool use.

func NewParallelToolCalls

func NewParallelToolCalls(p bool) *ParallelToolCalls

type PresencePenalty

type PresencePenalty float64

PresencePenalty is a number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.

func NewPresencePenalty

func NewPresencePenalty(p float64) *PresencePenalty

type ReasoningFormat

type ReasoningFormat string

ReasoningFormat specifies how to output reasoning tokens.

type ResponseFormat

type ResponseFormat string
const (
	ResponseFormatText ResponseFormat = "text"
	ResponseFormatJSON ResponseFormat = "json_object"
)

func (ResponseFormat) MarshalJSON

func (responseFormat ResponseFormat) MarshalJSON() ([]byte, error)

func (*ResponseFormat) UnmarshalJSON

func (responseFormat *ResponseFormat) UnmarshalJSON(data []byte) error

type Seed

type Seed int

Seed of the generation. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed, band you should refer to the system_fingerprint response parameter to monitor changes in the backend.

func NewSeed

func NewSeed(s int) *Seed

type ServiceTier

type ServiceTier string
const (
	// ServiceTierOnDemand is the default service tier.
	ServiceTierOnDemand ServiceTier = "on_demand"
	// ServiceTierAuto automatically select the highest tier available within the rate limits of your organization.
	ServiceTierAuto ServiceTier = "auto"
	// ServiceTierFlex uses the flex tier, which will succeed or fail quickly.
	ServiceTierFlex ServiceTier = "flex"
)

type Stop

type Stop [4]string

func (Stop) MarshalJSON

func (stop Stop) MarshalJSON() ([]byte, error)

func (*Stop) UnmarshalJSON

func (stop *Stop) UnmarshalJSON(data []byte) error

type Stream

type Stream bool

Stream allows partial message deltas to be sent. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a data: [DONE] message.

func NewStream

func NewStream(s bool) *Stream

type StreamOptions

type StreamOptions struct {
	// If set, an additional chunk will be streamed before the data: [DONE] message. The usage field on this chunk
	// shows the token usage statistics for the entire request, and the choices field will always be an empty array.
	// All other chunks will also include a usage field, but with a null value.
	IncludeUsage bool `json:"include_usage"`
}

type SuggestionsCount

type SuggestionsCount int

SuggestionsCount sets how many chat completion choices to generate for each input message. Note that the current moment, only n=1 is supported. Other values will result in a 400 response.

func NewSuggestionsCount

func NewSuggestionsCount(s int) *SuggestionsCount

type Temperature

type Temperature float64

Temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.

func NewTemperature

func NewTemperature(t float64) *Temperature

type TopLogProbs

type TopLogProbs int

TopLogProbs is an integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to true if this parameter is used.

func NewTopLogProbs

func NewTopLogProbs(t int) *TopLogProbs

type TopP

type TopP float64

TopP is an alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.

func NewTopP

func NewTopP(t float64) *TopP

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL