api

package

v0.2.0 Latest Latest Go to latest Published: Mar 12, 2025 License: MIT Imports: 8 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/a-novel-kit/groq

Links

Open Source Insights

Documentation ¶

Index ¶

Constants
Variables
func NewErrChatCompletion(err error) error
type API
- func New(apiKey string) *API
- func (api *API) ChatCompletion(ctx context.Context, request ChatCompletionRequest) (*http.Response, error)
- func (api *API) SetEndpoint(endpoint string)
type ChatCompletionChoice
type ChatCompletionChoiceFinishReason
type ChatCompletionChoiceLogProbs
type ChatCompletionChunkChoice
type ChatCompletionChunkDelta
type ChatCompletionChunkResponse
type ChatCompletionMessage
type ChatCompletionRequest
type ChatCompletionResponse
type ChatCompletionTokenLogprob
type ChatCompletionTokenTopLogprob
type ChatCompletionToolCall
type ChatCompletionUsage
type ChatCompletionXGroq
type FrequencyPenalty
- func NewFrequencyPenalty(f float64) *FrequencyPenalty
type LogProbs
- func NewLogProbs(l bool) *LogProbs
type MaxCompletionTokens
- func NewMaxCompletionTokens(m int) *MaxCompletionTokens
type ParallelToolCalls
- func NewParallelToolCalls(p bool) *ParallelToolCalls
type PresencePenalty
- func NewPresencePenalty(p float64) *PresencePenalty
type ReasoningFormat
type ResponseFormat
- func (responseFormat ResponseFormat) MarshalJSON() ([]byte, error)
- func (responseFormat *ResponseFormat) UnmarshalJSON(data []byte) error
type Seed
- func NewSeed(s int) *Seed
type ServiceTier
type Stop
- func (stop Stop) MarshalJSON() ([]byte, error)
- func (stop *Stop) UnmarshalJSON(data []byte) error
type Stream
- func NewStream(s bool) *Stream
type StreamOptions
type SuggestionsCount
- func NewSuggestionsCount(s int) *SuggestionsCount
type Temperature
- func NewTemperature(t float64) *Temperature
type TopLogProbs
- func NewTopLogProbs(t int) *TopLogProbs
type TopP
- func NewTopP(t float64) *TopP

Constants ¶

View Source

const ChatCompletionRoute = "/chat/completions"

View Source

const ChatCompletionStreamPrefix = "data: "

View Source

const DefaultgroqEndpoint = "https://api.groq.com/openai/v1"

Variables ¶

View Source

var ErrChatCompletion = errors.New("API.ChatCompletion")

Functions ¶

func NewErrChatCompletion ¶

func NewErrChatCompletion(err error) error

Types ¶

type API ¶

type API struct {
	// contains filtered or unexported fields
}

func New ¶

func New(apiKey string) *API

func (*API) ChatCompletion ¶

func (api *API) ChatCompletion(ctx context.Context, request ChatCompletionRequest) (*http.Response, error)

func (*API) SetEndpoint ¶

func (api *API) SetEndpoint(endpoint string)

type ChatCompletionChoice ¶

type ChatCompletionChoice struct {
	// The reason the model stopped generating tokens. This will be `stop` if the model
	// hit a natural stop point or a provided stop sequence, `length` if the maximum
	// number of tokens specified in the request was reached, `tool_calls` if the model
	// called a tool, or `function_call` (deprecated) if the model called a function.
	FinishReason ChatCompletionChoiceFinishReason `json:"finish_reason"`
	// The index of the choice in the list of choices.
	Index int `json:"index"`
	// Log probability information for the choice.
	Logprobs ChatCompletionChoiceLogProbs `json:"logprobs"`
	// A chat completion message generated by the model.
	Message ChatCompletionMessage `json:"message"`
}

type ChatCompletionChoiceFinishReason ¶

type ChatCompletionChoiceFinishReason string

const (
	ChatCompletionChoiceFinishReasonStop         ChatCompletionChoiceFinishReason = "stop"
	ChatCompletionChoiceFinishReasonLength       ChatCompletionChoiceFinishReason = "length"
	ChatCompletionChoiceFinishReasonToolCalls    ChatCompletionChoiceFinishReason = "tool_calls"
	ChatCompletionChoiceFinishReasonFunctionCall ChatCompletionChoiceFinishReason = "function_call"
)

type ChatCompletionChoiceLogProbs ¶

type ChatCompletionChoiceLogProbs struct {
	// A list of message content tokens with log probability information.
	Content []ChatCompletionTokenLogprob `json:"content,omitempty"`
}

type ChatCompletionChunkChoice ¶

type ChatCompletionChunkChoice struct {
	// The reason the model stopped generating tokens. This will be `stop` if the model
	// hit a natural stop point or a provided stop sequence, `length` if the maximum
	// number of tokens specified in the request was reached, `tool_calls` if the model
	// called a tool, or `function_call` (deprecated) if the model called a function.
	FinishReason *ChatCompletionChoiceFinishReason `json:"finish_reason"`
	// Log probability information for the choice.
	Logprobs *ChatCompletionChoiceLogProbs `json:"logprobs"`
	// The index of the choice in the list of choices.
	Index int `json:"index"`
	// A chat completion message generated by the model.
	Delta ChatCompletionChunkDelta `json:"delta"`
}

type ChatCompletionChunkDelta ¶

type ChatCompletionChunkDelta struct {
	// The contents of the message.
	Content string `json:"content,omitempty"`
}

type ChatCompletionChunkResponse ¶

type ChatCompletionChunkResponse struct {
	// A unique identifier for the chat completion.
	ID string `json:"id"`
	// A list of chat completion choices. Can be more than one if `n` is greater than 1.
	Choices []ChatCompletionChunkChoice `json:"choices"`
	// The Unix timestamp (in seconds) of when the chat completion was created.
	Created int `json:"created"`
	// The model used for the chat completion.
	Model models.Model `json:"model"`
	// The object type, which is always `chat.completion.chunk`.
	Object string `json:"object"`
	// This fingerprint represents the backend configuration that the model runs with.
	//
	// Can be used in conjunction with the `seed` request parameter to understand when
	// backend changes have been made that might impact determinism.
	SystemFingerprint string `json:"system_fingerprint,omitempty"`

	XGroq *ChatCompletionXGroq `json:"x_groq,omitempty"`
}

type ChatCompletionMessage ¶

type ChatCompletionMessage struct {
	// The contents of the message.
	Content string `json:"content,omitempty"`
	// The role of the author of this message.
	Role models.MessageRole `json:"role,omitempty"`
	// The model's reasoning for a response. Only available for reasoning models when requests parameter
	// ChatCompletionRequest.ReasoningFormat has value `parsed.
	Reasoning string `json:"reasoning,omitempty"`
	// The tool calls generated by the model, such as function calls.
	ToolCalls []ChatCompletionToolCall `json:"tool_calls,omitempty"`
}

type ChatCompletionRequest ¶

type ChatCompletionRequest struct {
	// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the
	// text so far, decreasing the model's likelihood to repeat the same line verbatim.
	FrequencyPenalty *FrequencyPenalty `json:"frequency_penalty,omitempty"`

	// This is not yet supported by any of our models. Whether to return log probabilities of the output tokens or not.
	// If true, returns the log probabilities of each output token returned in the content of message.
	LogProbs *LogProbs `json:"logprobs,omitempty"`

	// The maximum number of tokens that can be generated in the chat completion. The total length of input tokens and
	// generated tokens is limited by the model's context length.
	MaxCompletionTokens *MaxCompletionTokens `json:"max_completion_tokens,omitempty"`

	// A list of messages comprising the conversation so far.
	Messages models.AnyMessages `json:"messages"`

	// ID of the model to use. For details on which models are compatible with the Chat API, see available models
	// https://console.api.com/docs/models.
	Model models.Model `json:"model"`

	// How many chat completion choices to generate for each input message. Note that the current moment, only n=1 is
	// supported. Other values will result in a 400 response.
	//
	// Defaults to 1.
	SuggestionsCount *SuggestionsCount `json:"suggestions_count,omitempty"`

	// Whether to enable parallel function calling during tool use.
	// Defaults to true.
	ParallelToolCalls *ParallelToolCalls `json:"parallel_tool_calls,omitempty"`

	// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so
	// far, increasing the model's likelihood to talk about new topics.
	PresencePenalty *PresencePenalty `json:"presence_penalty,omitempty"`

	// Specifies how to output reasoning tokens.
	ReasoningFormat ReasoningFormat `json:"reasoning_format,omitempty"`

	// An object specifying the format that the model must output.
	//
	// Setting to { "type": "json_object" } enables JSON mode, which guarantees the message the model generates is
	// valid JSON.
	//
	// Important: when using JSON mode, you must also instruct the model to produce JSON yourself via a system or user
	// message.
	ResponseFormat ResponseFormat `json:"response_format,omitempty"`

	// If specified, our system will make a best effort to sample deterministically, such that repeated requests with
	// the same seed and parameters should return the same result. Determinism is not guaranteed, and you should refer
	// to the system_fingerprint response parameter to monitor changes in the backend.
	Seed *Seed `json:"seed,omitempty"`

	// The service tier to use for the request. Defaults to ServiceTierOnDemand.
	//
	// ServiceTierAuto will automatically select the highest tier available within the rate limits of your organization.
	// ServiceTierFlex uses the flex tier, which will succeed or fail quickly.
	ServiceTier ServiceTier `json:"service_tier,omitempty"`

	// Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the
	// stop sequence.
	Stop *Stop `json:"stop,omitempty"`

	// If set, partial message deltas will be sent. Tokens will be sent as data-only server-sent events as they become
	// available, with the stream terminated by a data: [DONE] message.
	Stream *Stream `json:"stream,omitempty"`

	// Options for streaming response. Only set this when you set Stream: true.
	StreamOptions *StreamOptions `json:"stream_options,omitempty"`

	// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random,
	// while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this
	// or top_p but not both.
	Temperature *Temperature `json:"temperature,omitempty"`

	// Controls which (if any) tool is called by the model. ToolChoiceStaticNone means the model will not call
	// any tool and instead generates a message. ToolChoiceStaticAuto means the model can pick between generating a
	// message or calling one or more tools. Required means the model must call one or more tools. Specifying a
	// particular tool via {"type": "function", "function": {"name": "my_function"}} forces the model to call that tool.
	//
	// ToolChoiceStaticNone is the default when no tools are present. auto is the default if tools are present.\
	ToolChoice *models.ToolChoice `json:"tool_choice,omitempty"`

	// A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a
	// list of functions the model may generate JSON inputs for. A max of 128 functions are supported.
	Tools []models.Tool `json:"tools,omitempty"`

	// This is not yet supported by any of our models. An integer between 0 and 20 specifying the number of most
	// likely tokens to return at each token position, each with an associated log probability. logprobs must be set
	// to true if this parameter is used.
	TopLogProbs *TopLogProbs `json:"top_logprobs,omitempty"`

	// An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of
	// the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass
	// are considered. We generally recommend altering this or temperature but not both.
	TopP *TopP `json:"top_p,omitempty"`

	// A unique identifier representing your end-user, which can help us monitor and detect abuse.
	User string `json:"user,omitempty"`
}

ChatCompletionRequest creates a model response for the given chat conversation.

https://console.api.com/docs/api-reference#chat

type ChatCompletionResponse ¶

type ChatCompletionResponse struct {
	// A unique identifier for the chat completion.
	ID string `json:"id"`
	// The Unix timestamp (in seconds) of when the chat completion was created.
	Created int `json:"created"`
	// The model used for the chat completion.
	Model models.Model `json:"model"`
	// The object type, which is always `chat.completion`.
	Object string `json:"object"`
	// This fingerprint represents the backend configuration that the model runs with.
	//
	// Can be used in conjunction with the `seed` request parameter to understand when
	// backend changes have been made that might impact determinism.
	SystemFingerprint string `json:"system_fingerprint,omitempty"`
	// A list of chat completion choices. Can be more than one if `n` is greater than 1.
	Choices []ChatCompletionChoice `json:"choices"`
	// Usage statistics for the completion request.
	Usage ChatCompletionUsage `json:"usage"`
}

type ChatCompletionTokenLogprob ¶

type ChatCompletionTokenLogprob struct {
	// The token.
	Token string `json:"token"`
	// A list of integers representing the UTF-8 bytes representation of the token.
	// Useful in instances where characters are represented by multiple tokens and
	// their byte representations must be combined to generate the correct text
	// representation. Can be `null` if there is no bytes representation for the token.
	Bytes []byte `json:"bytes"`
	// The log probability of this token, if it is within the top 20 most likely
	// tokens. Otherwise, the value `-9999.0` is used to signify that the token is very
	// unlikely.
	LogProb float64 `json:"logprob"`
	// List of the most likely tokens and their log probability, at this token
	// position. In rare cases, there may be fewer than the number of requested
	// `top_logprobs` returned.
	TopLogprobs []ChatCompletionTokenTopLogprob `json:"top_logprobs"`
}

type ChatCompletionTokenTopLogprob ¶

type ChatCompletionTokenTopLogprob struct {
	// The token.
	Token string `json:"token"`
	// A list of integers representing the UTF-8 bytes representation of the token.
	// Useful in instances where characters are represented by multiple tokens and
	// their byte representations must be combined to generate the correct text
	// representation. Can be `null` if there is no bytes representation for the token.
	Bytes []byte `json:"bytes"`
	// The log probability of this token, if it is within the top 20 most likely
	// tokens. Otherwise, the value `-9999.0` is used to signify that the token is very
	// unlikely.
	LogProb float64 `json:"logprob"`
}

type ChatCompletionToolCall ¶

type ChatCompletionToolCall struct {
	// The ID of the tool call.
	ID string `json:"id"`
	// The type of the tool. Currently, only ToolTypeFunction is supported.
	Type models.ToolType `json:"type,omitempty"`
	// The function that the model called.
	Function *models.ToolCallFunction `json:"function,omitempty"`
}

type ChatCompletionUsage ¶

type ChatCompletionUsage struct {
	// Number of tokens in the generated completion.
	CompletionTokens int `json:"completion_tokens"`
	// Number of tokens in the prompt.
	PromptTokens int `json:"prompt_tokens"`
	// Total number of tokens used in the request (prompt + completion).
	TotalTokens int `json:"total_tokens"`
	// Time spent generating tokens
	GenerationTime float64 `json:"generation_time"`
	// Time spent processing input tokens
	ProcessingTime float64 `json:"processing_time"`
	// Time the requests was spent queued
	QueuedTime float64 `json:"queued_time"`
	// Completion time and prompt time combined.
	TotalTime float64 `json:"total_time"`
}

type ChatCompletionXGroq ¶

type ChatCompletionXGroq struct {
	// A groq request ID which can be used by to refer to a specific request to groq support Only sent with the first
	// chunk.
	ID string `json:"id"`
	// An error string indicating why a stream was stopped early.
	Error string `json:"error"`
	// Usage information for the stream. Only sent in the final chunk.
	Usage ChatCompletionUsage `json:"usage"`
}

type FrequencyPenalty ¶

type FrequencyPenalty float64

FrequencyPenalty is a number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.

func NewFrequencyPenalty ¶

func NewFrequencyPenalty(f float64) *FrequencyPenalty

type LogProbs ¶

type LogProbs bool

LogProbs determines whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message.

func NewLogProbs ¶

func NewLogProbs(l bool) *LogProbs

type MaxCompletionTokens ¶

type MaxCompletionTokens int

MaxCompletionTokens is the maximum number of tokens that can be generated in the chat completion. The total length of input tokens and generated tokens is limited by the model's context length.

func NewMaxCompletionTokens ¶

func NewMaxCompletionTokens(m int) *MaxCompletionTokens

type ParallelToolCalls ¶

type ParallelToolCalls bool

ParallelToolCalls enables parallel function calling during tool use.

func NewParallelToolCalls ¶

func NewParallelToolCalls(p bool) *ParallelToolCalls

type PresencePenalty ¶

type PresencePenalty float64

PresencePenalty is a number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.

func NewPresencePenalty ¶

func NewPresencePenalty(p float64) *PresencePenalty

type ReasoningFormat ¶

type ReasoningFormat string

ReasoningFormat specifies how to output reasoning tokens.

type ResponseFormat ¶

type ResponseFormat string

const (
	ResponseFormatText ResponseFormat = "text"
	ResponseFormatJSON ResponseFormat = "json_object"
)

func (ResponseFormat) MarshalJSON ¶

func (responseFormat ResponseFormat) MarshalJSON() ([]byte, error)

func (*ResponseFormat) UnmarshalJSON ¶

func (responseFormat *ResponseFormat) UnmarshalJSON(data []byte) error

type Seed ¶

type Seed int

Seed of the generation. If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed, band you should refer to the system_fingerprint response parameter to monitor changes in the backend.

func NewSeed ¶

func NewSeed(s int) *Seed

type ServiceTier ¶

type ServiceTier string

const (
	// ServiceTierOnDemand is the default service tier.
	ServiceTierOnDemand ServiceTier = "on_demand"
	// ServiceTierAuto automatically select the highest tier available within the rate limits of your organization.
	ServiceTierAuto ServiceTier = "auto"
	// ServiceTierFlex uses the flex tier, which will succeed or fail quickly.
	ServiceTierFlex ServiceTier = "flex"
)

type Stop ¶

type Stop [4]string

func (Stop) MarshalJSON ¶

func (stop Stop) MarshalJSON() ([]byte, error)

func (*Stop) UnmarshalJSON ¶

func (stop *Stop) UnmarshalJSON(data []byte) error

type Stream ¶

type Stream bool

Stream allows partial message deltas to be sent. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a data: [DONE] message.

func NewStream ¶

func NewStream(s bool) *Stream

type StreamOptions ¶

type StreamOptions struct {
	// If set, an additional chunk will be streamed before the data: [DONE] message. The usage field on this chunk
	// shows the token usage statistics for the entire request, and the choices field will always be an empty array.
	// All other chunks will also include a usage field, but with a null value.
	IncludeUsage bool `json:"include_usage"`
}

type SuggestionsCount ¶

type SuggestionsCount int

SuggestionsCount sets how many chat completion choices to generate for each input message. Note that the current moment, only n=1 is supported. Other values will result in a 400 response.

func NewSuggestionsCount ¶

func NewSuggestionsCount(s int) *SuggestionsCount

type Temperature ¶

type Temperature float64

Temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or top_p but not both.

func NewTemperature ¶

func NewTemperature(t float64) *Temperature

type TopLogProbs ¶

type TopLogProbs int

TopLogProbs is an integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to true if this parameter is used.

func NewTopLogProbs ¶

func NewTopLogProbs(t int) *TopLogProbs

type TopP ¶

type TopP float64

TopP is an alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered. We generally recommend altering this or temperature but not both.

func NewTopP ¶

func NewTopP(t float64) *TopP

Source Files ¶

View all Source files

Directories ¶

Path	Synopsis
utils

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL