Documentation
¶
Overview ¶
Package gemini provides Gemini Live API streaming support.
IMPORTANT: Response Modality Limitation
The Gemini Live API does NOT support requesting both TEXT and AUDIO response modalities simultaneously. Attempting to set ResponseModalities to ["TEXT", "AUDIO"] will result in a WebSocket error:
websocket: close 1007 (invalid payload data): Request contains an invalid argument.
Valid configurations:
- ["TEXT"] - Text responses only (default)
- ["AUDIO"] - Audio responses only
If you need both text and audio, you must choose one primary modality. For audio responses with transcription, the API may provide output transcription separately via the OutputTranscription field.
Index ¶
- Constants
- Variables
- func ClassifyError(apiErr *APIError) error
- type APIError
- type AudioEncoder
- func (e *AudioEncoder) AssembleChunks(chunks []*types.MediaChunk) ([]byte, error)
- func (e *AudioEncoder) ConvertInt16ToPCM(samples []int16) []byte
- func (e *AudioEncoder) ConvertPCMToInt16(pcmData []byte) ([]int16, error)
- func (e *AudioEncoder) CreateChunks(ctx context.Context, pcmData []byte) ([]*types.MediaChunk, error)
- func (e *AudioEncoder) DecodePCM(base64Data string) ([]byte, error)
- func (e *AudioEncoder) EncodePCM(pcmData []byte) (string, error)
- func (e *AudioEncoder) GenerateSineWave(frequency float64, durationMs int, amplitude float64) []byte
- func (e *AudioEncoder) GetChunkDurationMs(chunkSize int) float64
- func (e *AudioEncoder) GetChunkSize() int
- func (e *AudioEncoder) GetSampleRate() int
- func (e *AudioEncoder) ReadChunks(ctx context.Context, reader io.Reader) (chunkStream <-chan *types.MediaChunk, errStream <-chan error)
- func (e *AudioEncoder) ValidateConfig(config *types.StreamingMediaConfig) error
- type EmbeddingOption
- type EmbeddingProvider
- type ErrorResponse
- type FunctionCall
- type InlineData
- type ModelTurn
- type Part
- type PromptFeedback
- type Provider
- func (p *Provider) CalculateCost(tokensIn, tokensOut, cachedTokens int) types.CostInfo
- func (p *Provider) CreateStreamSession(ctx context.Context, req *providers.StreamingInputConfig) (providers.StreamInputSession, error)
- func (p *Provider) GetMultimodalCapabilities() providers.MultimodalCapabilities
- func (p *Provider) GetStreamingCapabilities() providers.StreamingCapabilities
- func (p *Provider) Predict(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)
- func (p *Provider) PredictMultimodal(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)
- func (p *Provider) PredictMultimodalStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)
- func (p *Provider) PredictStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)
- func (p *Provider) SupportsStreamInput() []string
- type RecoveryStrategy
- type SafetyRating
- type ServerContent
- type ServerMessage
- type SetupComplete
- type StreamSession
- func (s *StreamSession) Close() error
- func (s *StreamSession) CompleteTurn(ctx context.Context) error
- func (s *StreamSession) Done() <-chan struct{}
- func (s *StreamSession) EndInput()
- func (s *StreamSession) Error() error
- func (s *StreamSession) Response() <-chan providers.StreamChunk
- func (s *StreamSession) SendChunk(ctx context.Context, chunk *types.MediaChunk) error
- func (s *StreamSession) SendSystemContext(ctx context.Context, text string) error
- func (s *StreamSession) SendText(ctx context.Context, text string) error
- func (s *StreamSession) SendToolResponse(ctx context.Context, toolCallID, result string) error
- func (s *StreamSession) SendToolResponses(ctx context.Context, responses []providers.ToolResponse) error
- type StreamSessionConfig
- type ToolCallMsg
- type ToolDefinition
- type ToolProvider
- func (p *ToolProvider) BuildTooling(descriptors []*providers.ToolDescriptor) (interface{}, error)
- func (p *ToolProvider) CreateStreamSession(ctx context.Context, req *providers.StreamingInputConfig) (providers.StreamInputSession, error)
- func (p *ToolProvider) GetStreamingCapabilities() providers.StreamingCapabilities
- func (p *ToolProvider) PredictStreamWithTools(ctx context.Context, req providers.PredictionRequest, tools interface{}, ...) (<-chan providers.StreamChunk, error)
- func (p *ToolProvider) PredictWithTools(ctx context.Context, req providers.PredictionRequest, tools interface{}, ...) (providers.PredictionResponse, []types.MessageToolCall, error)
- func (p *ToolProvider) SupportsStreamInput() []string
- type Transcription
- type UsageMetadata
- type VADConfig
- type WebSocketManager
- func (wm *WebSocketManager) Close() error
- func (wm *WebSocketManager) Connect(ctx context.Context) error
- func (wm *WebSocketManager) ConnectWithRetry(ctx context.Context) error
- func (wm *WebSocketManager) IsConnected() bool
- func (wm *WebSocketManager) Receive(ctx context.Context, v interface{}) error
- func (wm *WebSocketManager) Send(msg interface{}) error
- func (wm *WebSocketManager) SendPing() error
- func (wm *WebSocketManager) StartHeartbeat(ctx context.Context, interval time.Duration)
Constants ¶
const ( // DefaultChunkDuration is 100ms of audio DefaultChunkDuration = 100 // milliseconds // DefaultChunkSize is the number of bytes for 100ms at 16kHz 16-bit mono // 16000 Hz * 0.1 sec * 2 bytes/sample = 3200 bytes DefaultChunkSize = (geminiSampleRate * DefaultChunkDuration / 1000) * bytesPerSample )
const ( // DefaultGeminiEmbeddingModel is the default model for embeddings DefaultGeminiEmbeddingModel = "text-embedding-004" // EmbeddingModel004 is the current recommended model EmbeddingModel004 = "text-embedding-004" // EmbeddingModel001 is the legacy embedding model EmbeddingModel001 = "embedding-001" )
Embedding model constants
const ( ErrNotConnected = "not connected" ErrManagerClosed = "manager is closed" )
Common error messages
const (
ErrSessionClosed = "session is closed"
)
Common error messages
Variables ¶
var ( // ErrInvalidSampleRate indicates an unsupported sample rate ErrInvalidSampleRate = errors.New("invalid sample rate: must be 16000 Hz") // ErrInvalidChannels indicates an unsupported channel count ErrInvalidChannels = errors.New("invalid channels: must be mono (1 channel)") // ErrInvalidBitDepth indicates an unsupported bit depth ErrInvalidBitDepth = errors.New("invalid bit depth: must be 16 bits") // ErrInvalidChunkSize indicates chunk size is not aligned ErrInvalidChunkSize = errors.New("invalid chunk size: must be multiple of sample size") // ErrEmptyAudioData indicates no audio data provided ErrEmptyAudioData = errors.New("empty audio data") )
var ( // ErrInvalidAudioFormat indicates audio format doesn't meet Gemini requirements ErrInvalidAudioFormat = errors.New("invalid audio format") // ErrRateLimitExceeded indicates too many requests ErrRateLimitExceeded = errors.New("rate limit exceeded") // ErrAuthenticationFailed indicates invalid API key ErrAuthenticationFailed = errors.New("authentication failed") ErrServiceUnavailable = errors.New("service unavailable") // ErrPolicyViolation indicates content policy violation ErrPolicyViolation = errors.New("policy violation") // ErrInvalidRequest indicates malformed request ErrInvalidRequest = errors.New("invalid request") )
Common errors for Gemini streaming
Functions ¶
func ClassifyError ¶
ClassifyError converts an API error code to a standard error
Types ¶
type APIError ¶ added in v1.1.3
type APIError struct {
Code int `json:"code"`
Message string `json:"message"`
Status string `json:"status"`
}
GeminiAPIError represents an error from the Gemini API
func (*APIError) IsAuthError ¶ added in v1.1.3
IsAuthError returns true if the error is authentication-related
func (*APIError) IsPolicyViolation ¶ added in v1.1.3
IsPolicyViolation returns true if the error is a content policy violation
func (*APIError) IsRetryable ¶ added in v1.1.3
IsRetryable returns true if the error can be retried
type AudioEncoder ¶
type AudioEncoder struct {
// contains filtered or unexported fields
}
AudioEncoder handles PCM Linear16 audio encoding for Gemini Live API
func NewAudioEncoder ¶
func NewAudioEncoder() *AudioEncoder
NewAudioEncoder creates a new audio encoder with Gemini Live API specifications
func NewAudioEncoderWithChunkSize ¶
func NewAudioEncoderWithChunkSize(chunkSize int) (*AudioEncoder, error)
NewAudioEncoderWithChunkSize creates an encoder with custom chunk size
func (*AudioEncoder) AssembleChunks ¶
func (e *AudioEncoder) AssembleChunks(chunks []*types.MediaChunk) ([]byte, error)
AssembleChunks reassembles MediaChunks back into continuous PCM data.
func (*AudioEncoder) ConvertInt16ToPCM ¶
func (e *AudioEncoder) ConvertInt16ToPCM(samples []int16) []byte
ConvertInt16ToPCM converts []int16 samples to PCM bytes (little-endian)
func (*AudioEncoder) ConvertPCMToInt16 ¶
func (e *AudioEncoder) ConvertPCMToInt16(pcmData []byte) ([]int16, error)
ConvertPCMToInt16 converts PCM bytes to []int16 samples (little-endian)
func (*AudioEncoder) CreateChunks ¶
func (e *AudioEncoder) CreateChunks(ctx context.Context, pcmData []byte) ([]*types.MediaChunk, error)
CreateChunks splits PCM audio data into appropriately sized chunks
func (*AudioEncoder) DecodePCM ¶
func (e *AudioEncoder) DecodePCM(base64Data string) ([]byte, error)
DecodePCM decodes base64-encoded audio data back to raw PCM
func (*AudioEncoder) EncodePCM ¶
func (e *AudioEncoder) EncodePCM(pcmData []byte) (string, error)
EncodePCM encodes raw PCM audio data to base64 for WebSocket transmission
func (*AudioEncoder) GenerateSineWave ¶
func (e *AudioEncoder) GenerateSineWave(frequency float64, durationMs int, amplitude float64) []byte
GenerateSineWave generates PCM audio for a sine wave (useful for testing)
func (*AudioEncoder) GetChunkDurationMs ¶
func (e *AudioEncoder) GetChunkDurationMs(chunkSize int) float64
GetChunkDurationMs calculates the duration of a chunk in milliseconds
func (*AudioEncoder) GetChunkSize ¶
func (e *AudioEncoder) GetChunkSize() int
GetChunkSize returns the configured chunk size in bytes
func (*AudioEncoder) GetSampleRate ¶
func (e *AudioEncoder) GetSampleRate() int
GetSampleRate returns the configured sample rate
func (*AudioEncoder) ReadChunks ¶
func (e *AudioEncoder) ReadChunks(ctx context.Context, reader io.Reader) (chunkStream <-chan *types.MediaChunk, errStream <-chan error)
ReadChunks reads audio from an io.Reader and creates chunks on-the-fly
func (*AudioEncoder) ValidateConfig ¶
func (e *AudioEncoder) ValidateConfig(config *types.StreamingMediaConfig) error
ValidateConfig validates audio configuration against Gemini requirements
type EmbeddingOption ¶ added in v1.1.6
type EmbeddingOption func(*EmbeddingProvider)
EmbeddingOption configures the EmbeddingProvider.
func WithGeminiEmbeddingAPIKey ¶ added in v1.1.6
func WithGeminiEmbeddingAPIKey(key string) EmbeddingOption
WithGeminiEmbeddingAPIKey sets the API key explicitly.
func WithGeminiEmbeddingBaseURL ¶ added in v1.1.6
func WithGeminiEmbeddingBaseURL(url string) EmbeddingOption
WithGeminiEmbeddingBaseURL sets a custom base URL.
func WithGeminiEmbeddingHTTPClient ¶ added in v1.1.6
func WithGeminiEmbeddingHTTPClient(client *http.Client) EmbeddingOption
WithGeminiEmbeddingHTTPClient sets a custom HTTP client.
func WithGeminiEmbeddingModel ¶ added in v1.1.6
func WithGeminiEmbeddingModel(model string) EmbeddingOption
WithGeminiEmbeddingModel sets the embedding model.
type EmbeddingProvider ¶ added in v1.1.6
type EmbeddingProvider struct {
*providers.BaseEmbeddingProvider
}
EmbeddingProvider implements embedding generation via Gemini API.
func NewEmbeddingProvider ¶ added in v1.1.6
func NewEmbeddingProvider(opts ...EmbeddingOption) (*EmbeddingProvider, error)
NewEmbeddingProvider creates a Gemini embedding provider.
func (*EmbeddingProvider) Embed ¶ added in v1.1.6
func (p *EmbeddingProvider) Embed( ctx context.Context, req providers.EmbeddingRequest, ) (providers.EmbeddingResponse, error)
Embed generates embeddings for the given texts.
func (*EmbeddingProvider) EstimateCost ¶ added in v1.1.6
func (p *EmbeddingProvider) EstimateCost(tokens int) float64
EstimateCost estimates the cost for embedding the given number of tokens. Note: Gemini embeddings are currently free tier.
type ErrorResponse ¶
type ErrorResponse struct {
Error *APIError `json:"error"`
}
ErrorResponse wraps a GeminiAPIError in a message format
type FunctionCall ¶ added in v1.1.6
type FunctionCall struct {
Name string `json:"name,omitempty"`
ID string `json:"id,omitempty"`
Args map[string]interface{} `json:"args,omitempty"`
}
FunctionCall represents a function call
type InlineData ¶
type InlineData struct {
MimeType string `json:"mimeType,omitempty"` // camelCase!
Data string `json:"data,omitempty"` // Base64 encoded
}
InlineData represents inline media data
type ModelTurn ¶
type ModelTurn struct {
Parts []Part `json:"parts,omitempty"`
}
ModelTurn represents a model response turn
type Part ¶
type Part struct {
Text string `json:"text,omitempty"`
InlineData *InlineData `json:"inlineData,omitempty"` // camelCase!
}
Part represents a content part (text or inline data)
type PromptFeedback ¶
type PromptFeedback struct {
SafetyRatings []SafetyRating `json:"safetyRatings,omitempty"`
BlockReason string `json:"blockReason,omitempty"`
}
PromptFeedback contains safety ratings and block reason
func (*PromptFeedback) GetBlockReason ¶
func (f *PromptFeedback) GetBlockReason() string
GetBlockReason returns a human-readable block reason
func (*PromptFeedback) IsBlocked ¶
func (f *PromptFeedback) IsBlocked() bool
IsBlocked returns true if content was blocked by safety filters
type Provider ¶ added in v1.1.3
type Provider struct {
providers.BaseProvider
Model string
BaseURL string
ApiKey string
Defaults providers.ProviderDefaults
}
Provider implements the Provider interface for Google Gemini
func NewProvider ¶ added in v1.1.3
func NewProvider(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool) *Provider
NewProvider creates a new Gemini provider
func (*Provider) CalculateCost ¶ added in v1.1.3
CalculateCost calculates detailed cost breakdown including optional cached tokens
func (*Provider) CreateStreamSession ¶ added in v1.1.3
func (p *Provider) CreateStreamSession( ctx context.Context, req *providers.StreamingInputConfig, ) (providers.StreamInputSession, error)
CreateStreamSession creates a new bidirectional streaming session with Gemini Live API
Response Modalities: By default, the session is configured to return TEXT responses only. To request audio responses, pass "response_modalities" in the request metadata:
req := providers.StreamInputRequest{
Config: config,
Metadata: map[string]interface{}{
"response_modalities": []string{"AUDIO"}, // Audio only (TEXT+AUDIO not supported)
},
}
Audio responses will be delivered in the StreamChunk.Metadata["audio_data"] field as base64-encoded PCM.
func (*Provider) GetMultimodalCapabilities ¶ added in v1.1.3
func (p *Provider) GetMultimodalCapabilities() providers.MultimodalCapabilities
GetMultimodalCapabilities returns Gemini's multimodal support capabilities
func (*Provider) GetStreamingCapabilities ¶ added in v1.1.3
func (p *Provider) GetStreamingCapabilities() providers.StreamingCapabilities
GetStreamingCapabilities returns detailed information about Gemini's streaming support
func (*Provider) Predict ¶ added in v1.1.3
func (p *Provider) Predict(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)
Predict sends a predict request to Gemini
func (*Provider) PredictMultimodal ¶ added in v1.1.3
func (p *Provider) PredictMultimodal(ctx context.Context, req providers.PredictionRequest) (providers.PredictionResponse, error)
PredictMultimodal performs a predict request with multimodal content
func (*Provider) PredictMultimodalStream ¶ added in v1.1.3
func (p *Provider) PredictMultimodalStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)
PredictMultimodalStream performs a streaming predict request with multimodal content
func (*Provider) PredictStream ¶ added in v1.1.3
func (p *Provider) PredictStream(ctx context.Context, req providers.PredictionRequest) (<-chan providers.StreamChunk, error)
PredictStream streams a predict response from Gemini
func (*Provider) SupportsStreamInput ¶ added in v1.1.3
SupportsStreamInput returns the media types supported for streaming input
type RecoveryStrategy ¶
type RecoveryStrategy int
RecoveryStrategy defines how to handle different error types
const ( // RecoveryRetry indicates the operation should be retried RecoveryRetry RecoveryStrategy = iota // RecoveryFailFast indicates the operation should fail immediately RecoveryFailFast // RecoveryGracefulDegradation indicates fallback to a simpler mode RecoveryGracefulDegradation // RecoveryWaitAndRetry indicates retry after a delay RecoveryWaitAndRetry )
func DetermineRecoveryStrategy ¶
func DetermineRecoveryStrategy(err error) RecoveryStrategy
DetermineRecoveryStrategy determines how to handle an error
type SafetyRating ¶
type SafetyRating struct {
Category string `json:"category"`
Probability string `json:"probability"`
}
SafetyRating represents content safety assessment
type ServerContent ¶
type ServerContent struct {
ModelTurn *ModelTurn `json:"modelTurn,omitempty"`
TurnComplete bool `json:"turnComplete,omitempty"`
GenerationComplete bool `json:"generationComplete,omitempty"`
Interrupted bool `json:"interrupted,omitempty"`
InputTranscription *Transcription `json:"inputTranscription,omitempty"` // User speech transcription
OutputTranscription *Transcription `json:"outputTranscription,omitempty"` // Model speech transcription
}
ServerContent represents the server content (BidiGenerateContentServerContent)
type ServerMessage ¶
type ServerMessage struct {
SetupComplete *SetupComplete `json:"setupComplete,omitempty"`
ServerContent *ServerContent `json:"serverContent,omitempty"`
ToolCall *ToolCallMsg `json:"toolCall,omitempty"`
UsageMetadata *UsageMetadata `json:"usageMetadata,omitempty"`
}
ServerMessage represents a message from the Gemini server (BidiGenerateContentServerMessage)
func (*ServerMessage) UnmarshalJSON ¶
func (s *ServerMessage) UnmarshalJSON(data []byte) error
UnmarshalJSON unmarshals ServerMessage from JSON with custom handling.
type SetupComplete ¶
type SetupComplete struct{}
SetupComplete indicates setup is complete (empty object per docs)
type StreamSession ¶ added in v1.1.3
type StreamSession struct {
// contains filtered or unexported fields
}
StreamSession implements StreamInputSession for Gemini Live API with automatic reconnection on unexpected connection drops.
func NewStreamSession ¶ added in v1.1.3
func NewStreamSession(ctx context.Context, wsURL, apiKey string, config *StreamSessionConfig) (*StreamSession, error)
NewStreamSession creates a new streaming session
func (*StreamSession) Close ¶ added in v1.1.3
func (s *StreamSession) Close() error
Close closes the session
func (*StreamSession) CompleteTurn ¶ added in v1.1.3
func (s *StreamSession) CompleteTurn(ctx context.Context) error
CompleteTurn signals that the current turn is complete
func (*StreamSession) Done ¶ added in v1.1.3
func (s *StreamSession) Done() <-chan struct{}
Done returns a channel that's closed when the session ends
func (*StreamSession) EndInput ¶ added in v1.1.6
func (s *StreamSession) EndInput()
EndInput implements the EndInputter interface expected by DuplexProviderStage. It signals that the user's input turn is complete and the model should respond.
Behavior depends on VAD configuration: - If VAD is disabled: sends activityEnd signal for explicit turn control - If VAD is enabled: sends silence frames to trigger VAD end-of-speech detection
func (*StreamSession) Error ¶ added in v1.1.3
func (s *StreamSession) Error() error
Err returns the error that caused the session to close
func (*StreamSession) Response ¶ added in v1.1.3
func (s *StreamSession) Response() <-chan providers.StreamChunk
Response returns the channel for receiving responses
func (*StreamSession) SendChunk ¶ added in v1.1.3
func (s *StreamSession) SendChunk(ctx context.Context, chunk *types.MediaChunk) error
SendChunk sends a media chunk to the server. When VAD is disabled (manual turn control), automatically sends activityStart before the first audio chunk of a turn.
func (*StreamSession) SendSystemContext ¶ added in v1.1.6
func (s *StreamSession) SendSystemContext(ctx context.Context, text string) error
SendSystemContext sends a text message as context without completing the turn. Use this for system prompts that should provide context but not trigger a response. The audio/text that follows will be processed with this context in mind.
func (*StreamSession) SendText ¶ added in v1.1.3
func (s *StreamSession) SendText(ctx context.Context, text string) error
SendText sends a text message to the server and marks the turn as complete
func (*StreamSession) SendToolResponse ¶ added in v1.1.6
func (s *StreamSession) SendToolResponse(ctx context.Context, toolCallID, result string) error
SendToolResponse sends a single tool execution result back to Gemini. The toolCallID must match the ID from the FunctionCall. The result should be a JSON-serializable string (typically JSON).
func (*StreamSession) SendToolResponses ¶ added in v1.1.6
func (s *StreamSession) SendToolResponses(ctx context.Context, responses []providers.ToolResponse) error
SendToolResponses sends multiple tool execution results back to Gemini. This is used when the model makes parallel tool calls. After receiving the tool responses, Gemini will continue generating.
type StreamSessionConfig ¶
type StreamSessionConfig struct {
Model string // Model name (will be prefixed with "models/" automatically)
ResponseModalities []string // "TEXT" or "AUDIO" - NOT both! See package doc for details.
SystemInstruction string // System prompt/instruction for the model
InputCostPer1K float64 // Cost per 1K input tokens (for USD calculation)
OutputCostPer1K float64 // Cost per 1K output tokens (for USD calculation)
// VAD configures Voice Activity Detection settings.
// If nil, Gemini uses its default VAD settings.
VAD *VADConfig
// Tools defines the function declarations available to the model.
// When tools are configured, the model will return structured tool calls
// instead of speaking them as text. Tool definitions should match the
// OpenAPI schema subset supported by Gemini.
Tools []ToolDefinition
// AutoReconnect enables automatic reconnection on unexpected connection drops.
// When enabled, the session will attempt to reconnect and continue receiving
// responses. Note: conversation context may be lost on reconnection.
AutoReconnect bool
MaxReconnectTries int // Maximum reconnection attempts (default: 3)
}
StreamSessionConfig configures a streaming session
type ToolCallMsg ¶ added in v1.1.6
type ToolCallMsg struct {
FunctionCalls []FunctionCall `json:"functionCalls,omitempty"`
}
ToolCallMsg represents a tool call from the model
type ToolDefinition ¶ added in v1.1.6
type ToolDefinition struct {
Name string `json:"name"`
Description string `json:"description,omitempty"`
Parameters map[string]interface{} `json:"parameters,omitempty"` // JSON Schema for parameters
}
ToolDefinition represents a function/tool that the model can call. This follows the Gemini function calling schema.
type ToolProvider ¶ added in v1.1.3
type ToolProvider struct {
*Provider
// contains filtered or unexported fields
}
ToolProvider extends GeminiProvider with tool support
func NewToolProvider ¶ added in v1.1.3
func NewToolProvider(id, model, baseURL string, defaults providers.ProviderDefaults, includeRawOutput bool) *ToolProvider
NewToolProvider creates a new Gemini provider with tool support
func (*ToolProvider) BuildTooling ¶ added in v1.1.3
func (p *ToolProvider) BuildTooling(descriptors []*providers.ToolDescriptor) (interface{}, error)
BuildTooling converts tool descriptors to Gemini format
func (*ToolProvider) CreateStreamSession ¶ added in v1.1.6
func (p *ToolProvider) CreateStreamSession( ctx context.Context, req *providers.StreamingInputConfig, ) (providers.StreamInputSession, error)
CreateStreamSession forwards to the embedded Provider's CreateStreamSession. This enables duplex streaming with tool support.
func (*ToolProvider) GetStreamingCapabilities ¶ added in v1.1.6
func (p *ToolProvider) GetStreamingCapabilities() providers.StreamingCapabilities
GetStreamingCapabilities forwards to the embedded Provider's GetStreamingCapabilities.
func (*ToolProvider) PredictStreamWithTools ¶ added in v1.1.5
func (p *ToolProvider) PredictStreamWithTools( ctx context.Context, req providers.PredictionRequest, tools interface{}, toolChoice string, ) (<-chan providers.StreamChunk, error)
PredictStreamWithTools performs a streaming predict request with tool support
func (*ToolProvider) PredictWithTools ¶ added in v1.1.3
func (p *ToolProvider) PredictWithTools(ctx context.Context, req providers.PredictionRequest, tools interface{}, toolChoice string) (providers.PredictionResponse, []types.MessageToolCall, error)
PredictWithTools performs a predict request with tool support
func (*ToolProvider) SupportsStreamInput ¶ added in v1.1.6
func (p *ToolProvider) SupportsStreamInput() []string
SupportsStreamInput forwards to the embedded Provider's SupportsStreamInput.
type Transcription ¶ added in v1.1.6
type Transcription struct {
Text string `json:"text,omitempty"`
}
Transcription represents audio transcription (BidiGenerateContentTranscription)
type UsageMetadata ¶ added in v1.1.6
type UsageMetadata struct {
PromptTokenCount int `json:"promptTokenCount,omitempty"`
ResponseTokenCount int `json:"responseTokenCount,omitempty"`
TotalTokenCount int `json:"totalTokenCount,omitempty"`
}
UsageMetadata contains token usage information
type VADConfig ¶ added in v1.1.6
type VADConfig struct {
// Disabled turns off automatic VAD (manual turn control only)
Disabled bool
// StartOfSpeechSensitivity controls how sensitive the VAD is to detecting speech start.
// Valid values: "UNSPECIFIED", "LOW", "MEDIUM", "HIGH"
StartOfSpeechSensitivity string
// EndOfSpeechSensitivity controls how sensitive the VAD is to detecting silence.
// Valid values: "UNSPECIFIED", "LOW", "MEDIUM", "HIGH"
// Lower sensitivity = longer silence needed to trigger end of speech
EndOfSpeechSensitivity string
// PrefixPaddingMs is extra padding in milliseconds before speech detection
PrefixPaddingMs int
// SilenceThresholdMs is the duration of silence (in ms) to trigger end of speech.
// This maps to Gemini's "suffixPaddingMs" parameter.
// Default is typically ~500ms. Increase for TTS audio with natural pauses.
SilenceThresholdMs int
}
VADConfig configures Voice Activity Detection settings for Gemini Live API. These settings control when Gemini detects the end of speech and starts responding.
type WebSocketManager ¶
type WebSocketManager struct {
// contains filtered or unexported fields
}
WebSocketManager manages a WebSocket connection with reconnection logic.
func NewWebSocketManager ¶
func NewWebSocketManager(url, apiKey string) *WebSocketManager
NewWebSocketManager creates a new WebSocket manager
func (*WebSocketManager) Close ¶
func (wm *WebSocketManager) Close() error
Close gracefully closes the WebSocket connection
func (*WebSocketManager) Connect ¶
func (wm *WebSocketManager) Connect(ctx context.Context) error
Connect establishes a WebSocket connection to the Gemini Live API
func (*WebSocketManager) ConnectWithRetry ¶
func (wm *WebSocketManager) ConnectWithRetry(ctx context.Context) error
ConnectWithRetry connects with exponential backoff retry logic
func (*WebSocketManager) IsConnected ¶
func (wm *WebSocketManager) IsConnected() bool
IsConnected returns true if the WebSocket is connected
func (*WebSocketManager) Receive ¶
func (wm *WebSocketManager) Receive(ctx context.Context, v interface{}) error
Receive reads a message from the WebSocket
func (*WebSocketManager) Send ¶
func (wm *WebSocketManager) Send(msg interface{}) error
Send sends a message through the WebSocket
func (*WebSocketManager) SendPing ¶
func (wm *WebSocketManager) SendPing() error
SendPing sends a WebSocket ping to keep the connection alive
func (*WebSocketManager) StartHeartbeat ¶
func (wm *WebSocketManager) StartHeartbeat(ctx context.Context, interval time.Duration)
StartHeartbeat starts a goroutine that sends periodic pings