gemini

package

v1.1.6 Latest Latest Go to latest Published: Dec 23, 2025 License: Apache-2.0 Imports: 19 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/AltairaLabs/PromptKit

Links

Open Source Insights

Documentation ¶

Overview ¶

Package gemini provides Gemini Live API streaming support.

IMPORTANT: Response Modality Limitation

The Gemini Live API does NOT support requesting both TEXT and AUDIO response modalities simultaneously. Attempting to set ResponseModalities to ["TEXT", "AUDIO"] will result in a WebSocket error:

websocket: close 1007 (invalid payload data): Request contains an invalid argument.

Valid configurations:

["TEXT"] - Text responses only (default)
["AUDIO"] - Audio responses only

If you need both text and audio, you must choose one primary modality. For audio responses with transcription, the API may provide output transcription separately via the OutputTranscription field.

Index ¶

Constants
Variables
func ClassifyError(apiErr *APIError) error
type APIError
- func (e *APIError) Error() string
- func (e *APIError) IsAuthError() bool
- func (e *APIError) IsPolicyViolation() bool
- func (e *APIError) IsRetryable() bool
type AudioEncoder
- func NewAudioEncoder() *AudioEncoder
- func NewAudioEncoderWithChunkSize(chunkSize int) (*AudioEncoder, error)
- func (e *AudioEncoder) AssembleChunks(chunks []*types.MediaChunk) ([]byte, error)
- func (e *AudioEncoder) ConvertInt16ToPCM(samples []int16) []byte
- func (e *AudioEncoder) ConvertPCMToInt16(pcmData []byte) ([]int16, error)
- func (e *AudioEncoder) CreateChunks(ctx context.Context, pcmData []byte) ([]*types.MediaChunk, error)
- func (e *AudioEncoder) DecodePCM(base64Data string) ([]byte, error)
- func (e *AudioEncoder) EncodePCM(pcmData []byte) (string, error)
- func (e *AudioEncoder) GenerateSineWave(frequency float64, durationMs int, amplitude float64) []byte
- func (e *AudioEncoder) GetChunkDurationMs(chunkSize int) float64
- func (e *AudioEncoder) GetChunkSize() int
- func (e *AudioEncoder) GetSampleRate() int
- func (e *AudioEncoder) ReadChunks(ctx context.Context, reader io.Reader) (chunkStream <-chan *types.MediaChunk, errStream <-chan error)
- func (e *AudioEncoder) ValidateConfig(config *types.StreamingMediaConfig) error
type EmbeddingOption
- func WithGeminiEmbeddingAPIKey(key string) EmbeddingOption
- func WithGeminiEmbeddingBaseURL(url string) EmbeddingOption
- func WithGeminiEmbeddingHTTPClient(client *http.Client) EmbeddingOption
- func WithGeminiEmbeddingModel(model string) EmbeddingOption
type EmbeddingProvider
- func NewEmbeddingProvider(opts ...EmbeddingOption) (*EmbeddingProvider, error)
- func (p *EmbeddingProvider) Embed(ctx context.Context, req providers.EmbeddingRequest) (providers.EmbeddingResponse, error)
- func (p *EmbeddingProvider) EstimateCost(tokens int) float64
type ErrorResponse
type FunctionCall
type InlineData
type ModelTurn
type Part
type PromptFeedback
- func (f *PromptFeedback) GetBlockReason() string
- func (f *PromptFeedback) IsBlocked() bool
type Provider
- func NewProvider(id, model, baseURL string, defaults providers.ProviderDefaults, ...) *Provider
- func (p *Provider) CalculateCost(tokensIn, tokensOut, cachedTokens int) types.CostInfo
- func (p *Provider) CreateStreamSession(ctx context.Context, req *providers.StreamingInputConfig) (providers.StreamInputSession, error)
- func (p *Provider) GetMultimodalCapabilities() providers.MultimodalCapabilities
- func (p *Provider) GetStreamingCapabilities() providers.StreamingCapabilities
- func (p *Provider) Predict(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)
- func (p *Provider) PredictMultimodal(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)
- func (p *Provider) PredictMultimodalStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)
- func (p *Provider) PredictStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)
- func (p *Provider) SupportsStreamInput() []string
type RecoveryStrategy
- func DetermineRecoveryStrategy(err error) RecoveryStrategy
type SafetyRating
type ServerContent
type ServerMessage
- func (s *ServerMessage) UnmarshalJSON(data []byte) error
type SetupComplete
type StreamSession
- func NewStreamSession(ctx context.Context, wsURL, apiKey string, config *StreamSessionConfig) (*StreamSession, error)
- func (s *StreamSession) Close() error
- func (s *StreamSession) CompleteTurn(ctx context.Context) error
- func (s *StreamSession) Done() <-chan struct{}
- func (s *StreamSession) EndInput()
- func (s *StreamSession) Error() error
- func (s *StreamSession) Response() <-chan providers.StreamChunk
- func (s *StreamSession) SendChunk(ctx context.Context, chunk *types.MediaChunk) error
- func (s *StreamSession) SendSystemContext(ctx context.Context, text string) error
- func (s *StreamSession) SendText(ctx context.Context, text string) error
- func (s *StreamSession) SendToolResponse(ctx context.Context, toolCallID, result string) error
- func (s *StreamSession) SendToolResponses(ctx context.Context, responses []providers.ToolResponse) error
type StreamSessionConfig
type ToolCallMsg
type ToolDefinition
type ToolProvider
- func NewToolProvider(id, model, baseURL string, defaults providers.ProviderDefaults, ...) *ToolProvider
- func (p *ToolProvider) BuildTooling(descriptors []*providers.ToolDescriptor) (interface{}, error)
- func (p *ToolProvider) CreateStreamSession(ctx context.Context, req *providers.StreamingInputConfig) (providers.StreamInputSession, error)
- func (p *ToolProvider) GetStreamingCapabilities() providers.StreamingCapabilities
- func (p *ToolProvider) PredictStreamWithTools(ctx context.Context, req providers.PredictionRequest, tools interface{}, ...) (<-chan providers.StreamChunk, error)
- func (p *ToolProvider) PredictWithTools(ctx context.Context, req providers.PredictionRequest, tools interface{}, ...) (providers.PredictionResponse, []types.MessageToolCall, error)
- func (p *ToolProvider) SupportsStreamInput() []string
type Transcription
type UsageMetadata
type VADConfig
type WebSocketManager
- func NewWebSocketManager(url, apiKey string) *WebSocketManager
- func (wm *WebSocketManager) Close() error
- func (wm *WebSocketManager) Connect(ctx context.Context) error
- func (wm *WebSocketManager) ConnectWithRetry(ctx context.Context) error
- func (wm *WebSocketManager) IsConnected() bool
- func (wm *WebSocketManager) Receive(ctx context.Context, v interface{}) error
- func (wm *WebSocketManager) Send(msg interface{}) error
- func (wm *WebSocketManager) SendPing() error
- func (wm *WebSocketManager) StartHeartbeat(ctx context.Context, interval time.Duration)

Constants ¶

View Source

const (

	// DefaultChunkDuration is 100ms of audio
	DefaultChunkDuration = 100 // milliseconds
	// DefaultChunkSize is the number of bytes for 100ms at 16kHz 16-bit mono
	// 16000 Hz * 0.1 sec * 2 bytes/sample = 3200 bytes
	DefaultChunkSize = (geminiSampleRate * DefaultChunkDuration / 1000) * bytesPerSample
)

View Source

const (
	// DefaultGeminiEmbeddingModel is the default model for embeddings
	DefaultGeminiEmbeddingModel = "text-embedding-004"

	// EmbeddingModel004 is the current recommended model
	EmbeddingModel004 = "text-embedding-004"

	// EmbeddingModel001 is the legacy embedding model
	EmbeddingModel001 = "embedding-001"
)

Embedding model constants

View Source

const (
	ErrNotConnected  = "not connected"
	ErrManagerClosed = "manager is closed"
)

Common error messages

View Source

const (
	ErrSessionClosed = "session is closed"
)

Common error messages

Variables ¶

View Source

var (
	// ErrInvalidSampleRate indicates an unsupported sample rate
	ErrInvalidSampleRate = errors.New("invalid sample rate: must be 16000 Hz")
	// ErrInvalidChannels indicates an unsupported channel count
	ErrInvalidChannels = errors.New("invalid channels: must be mono (1 channel)")
	// ErrInvalidBitDepth indicates an unsupported bit depth
	ErrInvalidBitDepth = errors.New("invalid bit depth: must be 16 bits")
	// ErrInvalidChunkSize indicates chunk size is not aligned
	ErrInvalidChunkSize = errors.New("invalid chunk size: must be multiple of sample size")
	// ErrEmptyAudioData indicates no audio data provided
	ErrEmptyAudioData = errors.New("empty audio data")
)

View Source

var (
	// ErrInvalidAudioFormat indicates audio format doesn't meet Gemini requirements
	ErrInvalidAudioFormat = errors.New("invalid audio format")

	// ErrRateLimitExceeded indicates too many requests
	ErrRateLimitExceeded = errors.New("rate limit exceeded")

	// ErrAuthenticationFailed indicates invalid API key
	ErrAuthenticationFailed = errors.New("authentication failed")

	// ErrServiceUnavailable indicates temporary service issue
	ErrServiceUnavailable = errors.New("service unavailable")

	// ErrPolicyViolation indicates content policy violation
	ErrPolicyViolation = errors.New("policy violation")

	// ErrInvalidRequest indicates malformed request
	ErrInvalidRequest = errors.New("invalid request")
)

Common errors for Gemini streaming

Functions ¶

func ClassifyError ¶

func ClassifyError(apiErr *APIError) error

ClassifyError converts an API error code to a standard error

Types ¶

type APIError ¶ added in v1.1.3

type APIError struct {
	Code    int    `json:"code"`
	Message string `json:"message"`
	Status  string `json:"status"`
}

GeminiAPIError represents an error from the Gemini API

func (*APIError) Error ¶ added in v1.1.3

func (e *APIError) Error() string

Error implements the error interface

func (*APIError) IsAuthError ¶ added in v1.1.3

func (e *APIError) IsAuthError() bool

IsAuthError returns true if the error is authentication-related

func (*APIError) IsPolicyViolation ¶ added in v1.1.3

func (e *APIError) IsPolicyViolation() bool

IsPolicyViolation returns true if the error is a content policy violation

func (*APIError) IsRetryable ¶ added in v1.1.3

func (e *APIError) IsRetryable() bool

IsRetryable returns true if the error can be retried

type AudioEncoder ¶

type AudioEncoder struct {
	// contains filtered or unexported fields
}

AudioEncoder handles PCM Linear16 audio encoding for Gemini Live API

func NewAudioEncoder ¶

func NewAudioEncoder() *AudioEncoder

NewAudioEncoder creates a new audio encoder with Gemini Live API specifications

func NewAudioEncoderWithChunkSize ¶

func NewAudioEncoderWithChunkSize(chunkSize int) (*AudioEncoder, error)

NewAudioEncoderWithChunkSize creates an encoder with custom chunk size

func (*AudioEncoder) AssembleChunks ¶

func (e *AudioEncoder) AssembleChunks(chunks []*types.MediaChunk) ([]byte, error)

AssembleChunks reassembles MediaChunks back into continuous PCM data.

func (*AudioEncoder) ConvertInt16ToPCM ¶

func (e *AudioEncoder) ConvertInt16ToPCM(samples []int16) []byte

ConvertInt16ToPCM converts []int16 samples to PCM bytes (little-endian)

func (*AudioEncoder) ConvertPCMToInt16 ¶

func (e *AudioEncoder) ConvertPCMToInt16(pcmData []byte) ([]int16, error)

ConvertPCMToInt16 converts PCM bytes to []int16 samples (little-endian)

func (*AudioEncoder) CreateChunks ¶

func (e *AudioEncoder) CreateChunks(ctx context.Context, pcmData []byte) ([]*types.MediaChunk, error)

CreateChunks splits PCM audio data into appropriately sized chunks

func (*AudioEncoder) DecodePCM ¶

func (e *AudioEncoder) DecodePCM(base64Data string) ([]byte, error)

DecodePCM decodes base64-encoded audio data back to raw PCM

func (*AudioEncoder) EncodePCM ¶

func (e *AudioEncoder) EncodePCM(pcmData []byte) (string, error)

EncodePCM encodes raw PCM audio data to base64 for WebSocket transmission

func (*AudioEncoder) GenerateSineWave ¶

func (e *AudioEncoder) GenerateSineWave(frequency float64, durationMs int, amplitude float64) []byte

GenerateSineWave generates PCM audio for a sine wave (useful for testing)

func (*AudioEncoder) GetChunkDurationMs ¶

func (e *AudioEncoder) GetChunkDurationMs(chunkSize int) float64

GetChunkDurationMs calculates the duration of a chunk in milliseconds

func (*AudioEncoder) GetChunkSize ¶

func (e *AudioEncoder) GetChunkSize() int

GetChunkSize returns the configured chunk size in bytes

func (*AudioEncoder) GetSampleRate ¶

func (e *AudioEncoder) GetSampleRate() int

GetSampleRate returns the configured sample rate

func (*AudioEncoder) ReadChunks ¶

func (e *AudioEncoder) ReadChunks(ctx context.Context, reader io.Reader) (chunkStream <-chan *types.MediaChunk, errStream <-chan error)

ReadChunks reads audio from an io.Reader and creates chunks on-the-fly

func (*AudioEncoder) ValidateConfig ¶

func (e *AudioEncoder) ValidateConfig(config *types.StreamingMediaConfig) error

ValidateConfig validates audio configuration against Gemini requirements

type EmbeddingOption ¶ added in v1.1.6

type EmbeddingOption func(*EmbeddingProvider)

EmbeddingOption configures the EmbeddingProvider.

func WithGeminiEmbeddingAPIKey ¶ added in v1.1.6

func WithGeminiEmbeddingAPIKey(key string) EmbeddingOption

WithGeminiEmbeddingAPIKey sets the API key explicitly.

func WithGeminiEmbeddingBaseURL ¶ added in v1.1.6

func WithGeminiEmbeddingBaseURL(url string) EmbeddingOption

WithGeminiEmbeddingBaseURL sets a custom base URL.

func WithGeminiEmbeddingHTTPClient ¶ added in v1.1.6

func WithGeminiEmbeddingHTTPClient(client *http.Client) EmbeddingOption

WithGeminiEmbeddingHTTPClient sets a custom HTTP client.

func WithGeminiEmbeddingModel ¶ added in v1.1.6

func WithGeminiEmbeddingModel(model string) EmbeddingOption

WithGeminiEmbeddingModel sets the embedding model.

type EmbeddingProvider ¶ added in v1.1.6

type EmbeddingProvider struct {
	*providers.BaseEmbeddingProvider
}

EmbeddingProvider implements embedding generation via Gemini API.

func NewEmbeddingProvider ¶ added in v1.1.6

func NewEmbeddingProvider(opts ...EmbeddingOption) (*EmbeddingProvider, error)

NewEmbeddingProvider creates a Gemini embedding provider.

func (*EmbeddingProvider) Embed ¶ added in v1.1.6

func (p *EmbeddingProvider) Embed(
	ctx context.Context,
	req providers.EmbeddingRequest,
) (providers.EmbeddingResponse, error)

Embed generates embeddings for the given texts.

func (*EmbeddingProvider) EstimateCost ¶ added in v1.1.6

func (p *EmbeddingProvider) EstimateCost(tokens int) float64

EstimateCost estimates the cost for embedding the given number of tokens. Note: Gemini embeddings are currently free tier.

type ErrorResponse ¶

type ErrorResponse struct {
	Error *APIError `json:"error"`
}

ErrorResponse wraps a GeminiAPIError in a message format

type FunctionCall ¶ added in v1.1.6

type FunctionCall struct {
	Name string                 `json:"name,omitempty"`
	ID   string                 `json:"id,omitempty"`
	Args map[string]interface{} `json:"args,omitempty"`
}

FunctionCall represents a function call

type ModelTurn ¶

type ModelTurn struct {
	Parts []Part `json:"parts,omitempty"`
}

ModelTurn represents a model response turn

type Part ¶

type Part struct {
	Text       string      `json:"text,omitempty"`
	InlineData *InlineData `json:"inlineData,omitempty"` // camelCase!
}

Part represents a content part (text or inline data)

type PromptFeedback ¶

type PromptFeedback struct {
	SafetyRatings []SafetyRating `json:"safetyRatings,omitempty"`
	BlockReason   string         `json:"blockReason,omitempty"`
}

PromptFeedback contains safety ratings and block reason

func (*PromptFeedback) GetBlockReason ¶

func (f *PromptFeedback) GetBlockReason() string

GetBlockReason returns a human-readable block reason

func (*PromptFeedback) IsBlocked ¶

func (f *PromptFeedback) IsBlocked() bool

IsBlocked returns true if content was blocked by safety filters

type Provider ¶ added in v1.1.3

type Provider struct {
	providers.BaseProvider
	Model    string
	BaseURL  string
	ApiKey   string
	Defaults providers.ProviderDefaults
}

Provider implements the Provider interface for Google Gemini

func NewProvider ¶ added in v1.1.3

func NewProvider(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool) *Provider

NewProvider creates a new Gemini provider

func (*Provider) CalculateCost ¶ added in v1.1.3

func (p *Provider) CalculateCost(tokensIn, tokensOut, cachedTokens int) types.CostInfo

CalculateCost calculates detailed cost breakdown including optional cached tokens

func (*Provider) CreateStreamSession ¶ added in v1.1.3

func (p *Provider) CreateStreamSession(
	ctx context.Context,
	req *providers.StreamingInputConfig,
) (providers.StreamInputSession, error)

CreateStreamSession creates a new bidirectional streaming session with Gemini Live API

Response Modalities: By default, the session is configured to return TEXT responses only. To request audio responses, pass "response_modalities" in the request metadata:

req := providers.StreamInputRequest{
    Config: config,
    Metadata: map[string]interface{}{
        "response_modalities": []string{"AUDIO"}, // Audio only (TEXT+AUDIO not supported)
    },
}

Audio responses will be delivered in the StreamChunk.Metadata["audio_data"] field as base64-encoded PCM.

func (*Provider) GetMultimodalCapabilities ¶ added in v1.1.3

func (p *Provider) GetMultimodalCapabilities() providers.MultimodalCapabilities

GetMultimodalCapabilities returns Gemini's multimodal support capabilities

func (*Provider) GetStreamingCapabilities ¶ added in v1.1.3

func (p *Provider) GetStreamingCapabilities() providers.StreamingCapabilities

GetStreamingCapabilities returns detailed information about Gemini's streaming support

func (*Provider) Predict ¶ added in v1.1.3

func (p *Provider) Predict(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)

Predict sends a predict request to Gemini

func (*Provider) PredictMultimodal ¶ added in v1.1.3

func (p *Provider) PredictMultimodal(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)

PredictMultimodal performs a predict request with multimodal content

func (*Provider) PredictMultimodalStream ¶ added in v1.1.3

func (p *Provider) PredictMultimodalStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)

PredictMultimodalStream performs a streaming predict request with multimodal content

func (*Provider) PredictStream ¶ added in v1.1.3

func (p *Provider) PredictStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)

PredictStream streams a predict response from Gemini

func (*Provider) SupportsStreamInput ¶ added in v1.1.3

func (p *Provider) SupportsStreamInput() []string

SupportsStreamInput returns the media types supported for streaming input

type RecoveryStrategy ¶

type RecoveryStrategy int

RecoveryStrategy defines how to handle different error types

const (
	// RecoveryRetry indicates the operation should be retried
	RecoveryRetry RecoveryStrategy = iota

	// RecoveryFailFast indicates the operation should fail immediately
	RecoveryFailFast

	// RecoveryGracefulDegradation indicates fallback to a simpler mode
	RecoveryGracefulDegradation

	// RecoveryWaitAndRetry indicates retry after a delay
	RecoveryWaitAndRetry
)

func DetermineRecoveryStrategy ¶

func DetermineRecoveryStrategy(err error) RecoveryStrategy

DetermineRecoveryStrategy determines how to handle an error

type SafetyRating ¶

type SafetyRating struct {
	Category    string `json:"category"`
	Probability string `json:"probability"`
}

SafetyRating represents content safety assessment

type ServerContent ¶

type ServerContent struct {
	ModelTurn           *ModelTurn     `json:"modelTurn,omitempty"`
	TurnComplete        bool           `json:"turnComplete,omitempty"`
	GenerationComplete  bool           `json:"generationComplete,omitempty"`
	Interrupted         bool           `json:"interrupted,omitempty"`
	InputTranscription  *Transcription `json:"inputTranscription,omitempty"`  // User speech transcription
	OutputTranscription *Transcription `json:"outputTranscription,omitempty"` // Model speech transcription
}

ServerContent represents the server content (BidiGenerateContentServerContent)

type ServerMessage ¶

type ServerMessage struct {
	SetupComplete *SetupComplete `json:"setupComplete,omitempty"`
	ServerContent *ServerContent `json:"serverContent,omitempty"`
	ToolCall      *ToolCallMsg   `json:"toolCall,omitempty"`
	UsageMetadata *UsageMetadata `json:"usageMetadata,omitempty"`
}

ServerMessage represents a message from the Gemini server (BidiGenerateContentServerMessage)

func (*ServerMessage) UnmarshalJSON ¶

func (s *ServerMessage) UnmarshalJSON(data []byte) error

UnmarshalJSON unmarshals ServerMessage from JSON with custom handling.

type SetupComplete ¶

type SetupComplete struct{}

SetupComplete indicates setup is complete (empty object per docs)

type StreamSession ¶ added in v1.1.3

type StreamSession struct {
	// contains filtered or unexported fields
}

StreamSession implements StreamInputSession for Gemini Live API with automatic reconnection on unexpected connection drops.

func NewStreamSession ¶ added in v1.1.3

func NewStreamSession(ctx context.Context, wsURL, apiKey string, config *StreamSessionConfig) (*StreamSession, error)

NewStreamSession creates a new streaming session

func (*StreamSession) Close ¶ added in v1.1.3

func (s *StreamSession) Close() error

Close closes the session

func (*StreamSession) CompleteTurn ¶ added in v1.1.3

func (s *StreamSession) CompleteTurn(ctx context.Context) error

CompleteTurn signals that the current turn is complete

func (*StreamSession) Done ¶ added in v1.1.3

func (s *StreamSession) Done() <-chan struct{}

Done returns a channel that's closed when the session ends

func (*StreamSession) EndInput ¶ added in v1.1.6

func (s *StreamSession) EndInput()

EndInput implements the EndInputter interface expected by DuplexProviderStage. It signals that the user's input turn is complete and the model should respond.

Behavior depends on VAD configuration: - If VAD is disabled: sends activityEnd signal for explicit turn control - If VAD is enabled: sends silence frames to trigger VAD end-of-speech detection

func (*StreamSession) Error ¶ added in v1.1.3

func (s *StreamSession) Error() error

Err returns the error that caused the session to close

func (*StreamSession) Response ¶ added in v1.1.3

func (s *StreamSession) Response() <-chan providers.StreamChunk

Response returns the channel for receiving responses

func (*StreamSession) SendChunk ¶ added in v1.1.3

func (s *StreamSession) SendChunk(ctx context.Context, chunk *types.MediaChunk) error

SendChunk sends a media chunk to the server. When VAD is disabled (manual turn control), automatically sends activityStart before the first audio chunk of a turn.

func (*StreamSession) SendSystemContext ¶ added in v1.1.6

func (s *StreamSession) SendSystemContext(ctx context.Context, text string) error

SendSystemContext sends a text message as context without completing the turn. Use this for system prompts that should provide context but not trigger a response. The audio/text that follows will be processed with this context in mind.

func (*StreamSession) SendText ¶ added in v1.1.3

func (s *StreamSession) SendText(ctx context.Context, text string) error

SendText sends a text message to the server and marks the turn as complete

func (*StreamSession) SendToolResponse ¶ added in v1.1.6

func (s *StreamSession) SendToolResponse(ctx context.Context, toolCallID, result string) error

SendToolResponse sends a single tool execution result back to Gemini. The toolCallID must match the ID from the FunctionCall. The result should be a JSON-serializable string (typically JSON).

func (*StreamSession) SendToolResponses ¶ added in v1.1.6

func (s *StreamSession) SendToolResponses(ctx context.Context, responses []providers.ToolResponse) error

SendToolResponses sends multiple tool execution results back to Gemini. This is used when the model makes parallel tool calls. After receiving the tool responses, Gemini will continue generating.

type StreamSessionConfig ¶

type StreamSessionConfig struct {
	Model              string   // Model name (will be prefixed with "models/" automatically)
	ResponseModalities []string // "TEXT" or "AUDIO" - NOT both! See package doc for details.
	SystemInstruction  string   // System prompt/instruction for the model
	InputCostPer1K     float64  // Cost per 1K input tokens (for USD calculation)
	OutputCostPer1K    float64  // Cost per 1K output tokens (for USD calculation)

	// VAD configures Voice Activity Detection settings.
	// If nil, Gemini uses its default VAD settings.
	VAD *VADConfig

	// Tools defines the function declarations available to the model.
	// When tools are configured, the model will return structured tool calls
	// instead of speaking them as text. Tool definitions should match the
	// OpenAPI schema subset supported by Gemini.
	Tools []ToolDefinition

	// AutoReconnect enables automatic reconnection on unexpected connection drops.
	// When enabled, the session will attempt to reconnect and continue receiving
	// responses. Note: conversation context may be lost on reconnection.
	AutoReconnect     bool
	MaxReconnectTries int // Maximum reconnection attempts (default: 3)
}

StreamSessionConfig configures a streaming session

type ToolCallMsg ¶ added in v1.1.6

type ToolCallMsg struct {
	FunctionCalls []FunctionCall `json:"functionCalls,omitempty"`
}

ToolCallMsg represents a tool call from the model

type ToolDefinition ¶ added in v1.1.6

type ToolDefinition struct {
	Name        string                 `json:"name"`
	Description string                 `json:"description,omitempty"`
	Parameters  map[string]interface{} `json:"parameters,omitempty"` // JSON Schema for parameters
}

ToolDefinition represents a function/tool that the model can call. This follows the Gemini function calling schema.

type ToolProvider ¶ added in v1.1.3

type ToolProvider struct {
	*Provider
	// contains filtered or unexported fields
}

ToolProvider extends GeminiProvider with tool support

func NewToolProvider ¶ added in v1.1.3

func NewToolProvider(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool) *ToolProvider

NewToolProvider creates a new Gemini provider with tool support

func (*ToolProvider) BuildTooling ¶ added in v1.1.3

func (p *ToolProvider) BuildTooling(descriptors []*providers.ToolDescriptor) (interface{}, error)

BuildTooling converts tool descriptors to Gemini format

func (*ToolProvider) CreateStreamSession ¶ added in v1.1.6

func (p *ToolProvider) CreateStreamSession(
	ctx context.Context,
	req *providers.StreamingInputConfig,
) (providers.StreamInputSession, error)

CreateStreamSession forwards to the embedded Provider's CreateStreamSession. This enables duplex streaming with tool support.

func (*ToolProvider) GetStreamingCapabilities ¶ added in v1.1.6

func (p *ToolProvider) GetStreamingCapabilities() providers.StreamingCapabilities

GetStreamingCapabilities forwards to the embedded Provider's GetStreamingCapabilities.

func (*ToolProvider) PredictStreamWithTools ¶ added in v1.1.5

func (p *ToolProvider) PredictStreamWithTools(
	ctx context.Context,
	req providers.PredictionRequest,
	tools interface{},
	toolChoice string,
) (<-chan providers.StreamChunk, error)

PredictStreamWithTools performs a streaming predict request with tool support

func (*ToolProvider) PredictWithTools ¶ added in v1.1.3

func (p *ToolProvider) PredictWithTools(ctx context.Context, req providers.PredictionRequest, tools interface{}, toolChoice string) (providers.PredictionResponse, []types.MessageToolCall, error)

PredictWithTools performs a predict request with tool support

func (*ToolProvider) SupportsStreamInput ¶ added in v1.1.6

func (p *ToolProvider) SupportsStreamInput() []string

SupportsStreamInput forwards to the embedded Provider's SupportsStreamInput.

type Transcription ¶ added in v1.1.6

type Transcription struct {
	Text string `json:"text,omitempty"`
}

Transcription represents audio transcription (BidiGenerateContentTranscription)

type UsageMetadata ¶ added in v1.1.6

type UsageMetadata struct {
	PromptTokenCount   int `json:"promptTokenCount,omitempty"`
	ResponseTokenCount int `json:"responseTokenCount,omitempty"`
	TotalTokenCount    int `json:"totalTokenCount,omitempty"`
}

UsageMetadata contains token usage information

type VADConfig ¶ added in v1.1.6

type VADConfig struct {
	// Disabled turns off automatic VAD (manual turn control only)
	Disabled bool
	// StartOfSpeechSensitivity controls how sensitive the VAD is to detecting speech start.
	// Valid values: "UNSPECIFIED", "LOW", "MEDIUM", "HIGH"
	StartOfSpeechSensitivity string
	// EndOfSpeechSensitivity controls how sensitive the VAD is to detecting silence.
	// Valid values: "UNSPECIFIED", "LOW", "MEDIUM", "HIGH"
	// Lower sensitivity = longer silence needed to trigger end of speech
	EndOfSpeechSensitivity string
	// PrefixPaddingMs is extra padding in milliseconds before speech detection
	PrefixPaddingMs int
	// SilenceThresholdMs is the duration of silence (in ms) to trigger end of speech.
	// This maps to Gemini's "suffixPaddingMs" parameter.
	// Default is typically ~500ms. Increase for TTS audio with natural pauses.
	SilenceThresholdMs int
}

VADConfig configures Voice Activity Detection settings for Gemini Live API. These settings control when Gemini detects the end of speech and starts responding.

type WebSocketManager ¶

type WebSocketManager struct {
	// contains filtered or unexported fields
}

WebSocketManager manages a WebSocket connection with reconnection logic.

func NewWebSocketManager ¶

func NewWebSocketManager(url, apiKey string) *WebSocketManager

NewWebSocketManager creates a new WebSocket manager

func (*WebSocketManager) Close ¶

func (wm *WebSocketManager) Close() error

Close gracefully closes the WebSocket connection

func (*WebSocketManager) Connect ¶

func (wm *WebSocketManager) Connect(ctx context.Context) error

Connect establishes a WebSocket connection to the Gemini Live API

func (*WebSocketManager) ConnectWithRetry ¶

func (wm *WebSocketManager) ConnectWithRetry(ctx context.Context) error

ConnectWithRetry connects with exponential backoff retry logic

func (*WebSocketManager) IsConnected ¶

func (wm *WebSocketManager) IsConnected() bool

IsConnected returns true if the WebSocket is connected

func (*WebSocketManager) Receive ¶

func (wm *WebSocketManager) Receive(ctx context.Context, v interface{}) error

Receive reads a message from the WebSocket

func (*WebSocketManager) Send ¶

func (wm *WebSocketManager) Send(msg interface{}) error

Send sends a message through the WebSocket

func (*WebSocketManager) SendPing ¶

func (wm *WebSocketManager) SendPing() error

SendPing sends a WebSocket ping to keep the connection alive

func (*WebSocketManager) StartHeartbeat ¶

func (wm *WebSocketManager) StartHeartbeat(ctx context.Context, interval time.Duration)

StartHeartbeat starts a goroutine that sends periodic pings

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL

Documentation ¶

Overview ¶

Index ¶

Constants ¶

Variables ¶

Functions ¶

func ClassifyError ¶

Types ¶

type APIError ¶ added in v1.1.3

func (*APIError) Error ¶ added in v1.1.3

func (*APIError) IsAuthError ¶ added in v1.1.3

func (*APIError) IsPolicyViolation ¶ added in v1.1.3

func (*APIError) IsRetryable ¶ added in v1.1.3

type AudioEncoder ¶

func NewAudioEncoder ¶

func NewAudioEncoderWithChunkSize ¶

func (*AudioEncoder) AssembleChunks ¶

func (*AudioEncoder) ConvertInt16ToPCM ¶

func (*AudioEncoder) ConvertPCMToInt16 ¶

func (*AudioEncoder) CreateChunks ¶

func (*AudioEncoder) DecodePCM ¶

func (*AudioEncoder) EncodePCM ¶

func (*AudioEncoder) GenerateSineWave ¶

func (*AudioEncoder) GetChunkDurationMs ¶

func (*AudioEncoder) GetChunkSize ¶

func (*AudioEncoder) GetSampleRate ¶

func (*AudioEncoder) ReadChunks ¶

func (*AudioEncoder) ValidateConfig ¶

type EmbeddingOption ¶ added in v1.1.6

func WithGeminiEmbeddingAPIKey ¶ added in v1.1.6

func WithGeminiEmbeddingBaseURL ¶ added in v1.1.6

func WithGeminiEmbeddingHTTPClient ¶ added in v1.1.6

func WithGeminiEmbeddingModel ¶ added in v1.1.6

type EmbeddingProvider ¶ added in v1.1.6

func NewEmbeddingProvider ¶ added in v1.1.6

func (*EmbeddingProvider) Embed ¶ added in v1.1.6

func (*EmbeddingProvider) EstimateCost ¶ added in v1.1.6

type ErrorResponse ¶

type FunctionCall ¶ added in v1.1.6

type InlineData ¶

type ModelTurn ¶

type Part ¶

type PromptFeedback ¶

func (*PromptFeedback) GetBlockReason ¶

func (*PromptFeedback) IsBlocked ¶

type Provider ¶ added in v1.1.3

func NewProvider ¶ added in v1.1.3

func (*Provider) CalculateCost ¶ added in v1.1.3

func (*Provider) CreateStreamSession ¶ added in v1.1.3

func (*Provider) GetMultimodalCapabilities ¶ added in v1.1.3

func (*Provider) GetStreamingCapabilities ¶ added in v1.1.3

func (*Provider) Predict ¶ added in v1.1.3

func (*Provider) PredictMultimodal ¶ added in v1.1.3

func (*Provider) PredictMultimodalStream ¶ added in v1.1.3

func (*Provider) PredictStream ¶ added in v1.1.3

func (*Provider) SupportsStreamInput ¶ added in v1.1.3

type RecoveryStrategy ¶

func DetermineRecoveryStrategy ¶

type SafetyRating ¶

type ServerContent ¶

type ServerMessage ¶

func (*ServerMessage) UnmarshalJSON ¶

type SetupComplete ¶

type StreamSession ¶ added in v1.1.3

func NewStreamSession ¶ added in v1.1.3

func (*StreamSession) Close ¶ added in v1.1.3

func (*StreamSession) CompleteTurn ¶ added in v1.1.3

func (*StreamSession) Done ¶ added in v1.1.3

func (*StreamSession) EndInput ¶ added in v1.1.6

func (*StreamSession) Error ¶ added in v1.1.3

func (*StreamSession) Response ¶ added in v1.1.3

func (*StreamSession) SendChunk ¶ added in v1.1.3

func (*StreamSession) SendSystemContext ¶ added in v1.1.6

func (*StreamSession) SendText ¶ added in v1.1.3

func (*StreamSession) SendToolResponse ¶ added in v1.1.6

func (*StreamSession) SendToolResponses ¶ added in v1.1.6

type StreamSessionConfig ¶

type ToolCallMsg ¶ added in v1.1.6

type ToolDefinition ¶ added in v1.1.6

type ToolProvider ¶ added in v1.1.3