Documentation
¶
Overview ¶
Package elevenlabs provides a Go client for the ElevenLabs API.
The client wraps the ogen-generated API client with a higher-level interface that handles authentication and provides convenient methods for common operations.
Index ¶
- Constants
- Variables
- func IsForbiddenError(err error) bool
- func IsNotFoundError(err error) bool
- func IsRateLimitError(err error) bool
- func IsUnauthorizedError(err error) bool
- func PCMBytesToWAV(pcm []byte, sampleRate int) ([]byte, error)
- func PCMToWAV(pcmData io.Reader, sampleRate int) ([]byte, error)
- func ParsePCMSampleRate(format string) (int, error)
- type APIError
- type AlignmentCharacter
- type AlignmentWord
- type AudioIsolationRequest
- type AudioIsolationService
- func (s *AudioIsolationService) Isolate(ctx context.Context, req *AudioIsolationRequest) (io.Reader, error)
- func (s *AudioIsolationService) IsolateFile(ctx context.Context, audio io.Reader, filename string) (io.Reader, error)
- func (s *AudioIsolationService) IsolateStream(ctx context.Context, req *AudioIsolationRequest) (io.Reader, error)
- type Chapter
- type ChapterSnapshot
- type Client
- func (c *Client) API() *api.Client
- func (c *Client) AudioIsolation() *AudioIsolationService
- func (c *Client) Dubbing() *DubbingService
- func (c *Client) ForcedAlignment() *ForcedAlignmentService
- func (c *Client) History() *HistoryService
- func (c *Client) Models() *ModelsService
- func (c *Client) Music() *MusicService
- func (c *Client) PhoneNumbers() *PhoneNumberService
- func (c *Client) Projects() *ProjectsService
- func (c *Client) Pronunciation() *PronunciationService
- func (c *Client) SoundEffects() *SoundEffectsService
- func (c *Client) SpeechToSpeech() *SpeechToSpeechService
- func (c *Client) SpeechToText() *SpeechToTextService
- func (c *Client) TextToDialogue() *TextToDialogueService
- func (c *Client) TextToSpeech() *TextToSpeechService
- func (c *Client) Twilio() *TwilioService
- func (c *Client) User() *UserService
- func (c *Client) VoiceDesign() *VoiceDesignService
- func (c *Client) Voices() *VoicesService
- func (c *Client) WebSocketSTT() *WebSocketSTTService
- func (c *Client) WebSocketTTS() *WebSocketTTSService
- type CompositionPlan
- type CompositionPlanRequest
- type CreateProjectRequest
- type CreatePronunciationDictionaryRequest
- type DialogueInput
- type DialogueRequest
- type DialogueResponse
- type DubbingProject
- type DubbingRequest
- type DubbingResponse
- type DubbingService
- func (s *DubbingService) CreateFromURL(ctx context.Context, req *DubbingRequest) (*DubbingResponse, error)
- func (s *DubbingService) Delete(ctx context.Context, dubbingID string) error
- func (s *DubbingService) Get(ctx context.Context, dubbingID string) (*DubbingProject, error)
- func (s *DubbingService) GetDubbedFile(ctx context.Context, dubbingID, languageCode string) (io.Reader, error)
- type ForcedAlignmentRequest
- type ForcedAlignmentResponse
- type ForcedAlignmentService
- type HistoryItem
- type HistoryListOptions
- type HistoryListResponse
- type HistoryService
- func (s *HistoryService) Delete(ctx context.Context, historyItemID string) error
- func (s *HistoryService) Get(ctx context.Context, historyItemID string) (*HistoryItem, error)
- func (s *HistoryService) GetAudio(ctx context.Context, historyItemID string) (io.Reader, error)
- func (s *HistoryService) List(ctx context.Context, opts *HistoryListOptions) (*HistoryListResponse, error)
- type Language
- type ListPhoneNumbersResponse
- type Model
- type ModelsService
- type MusicDetailedRequest
- type MusicDetailedResponse
- type MusicRequest
- type MusicResponse
- type MusicService
- func (s *MusicService) Generate(ctx context.Context, req *MusicRequest) (*MusicResponse, error)
- func (s *MusicService) GenerateDetailed(ctx context.Context, req *MusicDetailedRequest) (*MusicDetailedResponse, error)
- func (s *MusicService) GenerateInstrumental(ctx context.Context, prompt string, durationMs int) (io.Reader, error)
- func (s *MusicService) GeneratePlan(ctx context.Context, req *CompositionPlanRequest) (*CompositionPlan, error)
- func (s *MusicService) GenerateStream(ctx context.Context, req *MusicRequest) (*MusicResponse, error)
- func (s *MusicService) SeparateStems(ctx context.Context, req *StemSeparationRequest) (io.Reader, error)
- func (s *MusicService) SeparateStemsFile(ctx context.Context, filePath string) (io.Reader, error)
- func (s *MusicService) Simple(ctx context.Context, prompt string) (io.Reader, error)
- type Option
- type PhoneNumber
- type PhoneNumberService
- func (s *PhoneNumberService) Delete(ctx context.Context, phoneNumberID string) error
- func (s *PhoneNumberService) Get(ctx context.Context, phoneNumberID string) (*PhoneNumber, error)
- func (s *PhoneNumberService) List(ctx context.Context) ([]PhoneNumber, error)
- func (s *PhoneNumberService) Update(ctx context.Context, phoneNumberID string, req *UpdatePhoneNumberRequest) (*PhoneNumber, error)
- type Project
- type ProjectSnapshot
- type ProjectsService
- func (s *ProjectsService) Convert(ctx context.Context, projectID string) error
- func (s *ProjectsService) ConvertChapter(ctx context.Context, projectID, chapterID string) error
- func (s *ProjectsService) Create(ctx context.Context, req *CreateProjectRequest) (*Project, error)
- func (s *ProjectsService) Delete(ctx context.Context, projectID string) error
- func (s *ProjectsService) DeleteChapter(ctx context.Context, projectID, chapterID string) error
- func (s *ProjectsService) DownloadSnapshotArchive(ctx context.Context, projectID, snapshotID string) (io.Reader, error)
- func (s *ProjectsService) List(ctx context.Context) ([]*Project, error)
- func (s *ProjectsService) ListChapterSnapshots(ctx context.Context, projectID, chapterID string) ([]*ChapterSnapshot, error)
- func (s *ProjectsService) ListChapters(ctx context.Context, projectID string) ([]*Chapter, error)
- func (s *ProjectsService) ListSnapshots(ctx context.Context, projectID string) ([]*ProjectSnapshot, error)
- func (s *ProjectsService) StreamChapterAudio(ctx context.Context, projectID, chapterID, snapshotID string) (io.Reader, error)
- func (s *ProjectsService) Update(ctx context.Context, projectID string, req *UpdateProjectRequest) error
- type PronunciationDictionary
- type PronunciationDictionaryListOptions
- type PronunciationDictionaryListResponse
- type PronunciationRule
- type PronunciationRules
- func (rules PronunciationRules) Graphemes() []string
- func (rules PronunciationRules) SavePLS(filename, language string) error
- func (rules PronunciationRules) String() string
- func (rules PronunciationRules) ToPLS(language string) ([]byte, error)
- func (rules PronunciationRules) ToPLSString(language string) (string, error)
- type PronunciationService
- func (s *PronunciationService) Archive(ctx context.Context, dictionaryID string) error
- func (s *PronunciationService) Create(ctx context.Context, req *CreatePronunciationDictionaryRequest) (*PronunciationDictionary, error)
- func (s *PronunciationService) CreateFromJSON(ctx context.Context, name, jsonFilePath string) (*PronunciationDictionary, error)
- func (s *PronunciationService) CreateFromMap(ctx context.Context, name string, rules map[string]string) (*PronunciationDictionary, error)
- func (s *PronunciationService) DownloadLatestPLS(ctx context.Context, dictionaryID string) (io.Reader, error)
- func (s *PronunciationService) Get(ctx context.Context, dictionaryID string) (*PronunciationDictionary, error)
- func (s *PronunciationService) GetVersionPLS(ctx context.Context, dictionaryID, versionID string) (io.Reader, error)
- func (s *PronunciationService) List(ctx context.Context, opts *PronunciationDictionaryListOptions) (*PronunciationDictionaryListResponse, error)
- func (s *PronunciationService) RemoveRules(ctx context.Context, dictionaryID string, ruleStrings []string) error
- func (s *PronunciationService) Rename(ctx context.Context, dictionaryID, newName string) error
- type SIPOutboundCallRequest
- type SIPOutboundCallResponse
- type STTTranscript
- type STTWord
- type SaveVoiceRequest
- type SongSection
- type SoundEffectRequest
- type SoundEffectResponse
- type SoundEffectsService
- func (s *SoundEffectsService) Generate(ctx context.Context, req *SoundEffectRequest) (*SoundEffectResponse, error)
- func (s *SoundEffectsService) GenerateLoop(ctx context.Context, description string, durationSeconds float64) (io.Reader, error)
- func (s *SoundEffectsService) Simple(ctx context.Context, description string) (io.Reader, error)
- type SpeechToSpeechRequest
- type SpeechToSpeechResponse
- type SpeechToSpeechService
- func (s *SpeechToSpeechService) Convert(ctx context.Context, req *SpeechToSpeechRequest) (*SpeechToSpeechResponse, error)
- func (s *SpeechToSpeechService) ConvertStream(ctx context.Context, req *SpeechToSpeechRequest) (*SpeechToSpeechResponse, error)
- func (s *SpeechToSpeechService) Simple(ctx context.Context, voiceID string, audio io.Reader) (io.Reader, error)
- type SpeechToTextService
- func (s *SpeechToTextService) Transcribe(ctx context.Context, req *TranscriptionRequest) (*TranscriptionResponse, error)
- func (s *SpeechToTextService) TranscribeURL(ctx context.Context, url string) (*TranscriptionResponse, error)
- func (s *SpeechToTextService) TranscribeWithDiarization(ctx context.Context, url string) (*TranscriptionResponse, error)
- type StemSeparationRequest
- type Subscription
- type TTSAlignment
- type TTSRequest
- type TTSResponse
- type TextToDialogueService
- func (s *TextToDialogueService) Generate(ctx context.Context, req *DialogueRequest) (io.Reader, error)
- func (s *TextToDialogueService) GenerateStream(ctx context.Context, req *DialogueRequest) (io.Reader, error)
- func (s *TextToDialogueService) GenerateWithTimestamps(ctx context.Context, req *DialogueRequest) (*DialogueResponse, error)
- func (s *TextToDialogueService) Simple(ctx context.Context, inputs []DialogueInput) (io.Reader, error)
- type TextToSpeechService
- func (s *TextToSpeechService) Generate(ctx context.Context, req *TTSRequest) (*TTSResponse, error)
- func (s *TextToSpeechService) GenerateToWriter(ctx context.Context, req *TTSRequest, w io.Writer) error
- func (s *TextToSpeechService) Simple(ctx context.Context, voiceID, text string) (io.Reader, error)
- type TranscriptionRequest
- type TranscriptionResponse
- type TranscriptionUtterance
- type TranscriptionWord
- type TwilioOutboundCallRequest
- type TwilioOutboundCallResponse
- type TwilioRegisterCallRequest
- type TwilioRegisterCallResponse
- type TwilioService
- func (s *TwilioService) OutboundCall(ctx context.Context, req *TwilioOutboundCallRequest) (*TwilioOutboundCallResponse, error)
- func (s *TwilioService) RegisterCall(ctx context.Context, req *TwilioRegisterCallRequest) (*TwilioRegisterCallResponse, error)
- func (s *TwilioService) SIPOutboundCall(ctx context.Context, req *SIPOutboundCallRequest) (*SIPOutboundCallResponse, error)
- type UpdatePhoneNumberRequest
- type UpdateProjectRequest
- type User
- type UserService
- type ValidationError
- type Voice
- type VoiceAccent
- type VoiceAge
- type VoiceDesignRequest
- type VoiceDesignResponse
- type VoiceDesignService
- func (s *VoiceDesignService) GeneratePreview(ctx context.Context, req *VoiceDesignRequest) (*VoiceDesignResponse, error)
- func (s *VoiceDesignService) SaveVoice(ctx context.Context, req *SaveVoiceRequest) (*Voice, error)
- func (s *VoiceDesignService) Simple(ctx context.Context, gender VoiceGender, age VoiceAge, accent VoiceAccent, ...) (*VoiceDesignResponse, error)
- type VoiceGender
- type VoiceSegment
- type VoiceSettings
- func DefaultVoiceSettings() *VoiceSettings
- func VoiceSettingsForAudiobook() *VoiceSettings
- func VoiceSettingsForCoursera() *VoiceSettings
- func VoiceSettingsForEdX() *VoiceSettings
- func VoiceSettingsForInstagram() *VoiceSettings
- func VoiceSettingsForPodcast() *VoiceSettings
- func VoiceSettingsForTikTok() *VoiceSettings
- func VoiceSettingsForUdemy() *VoiceSettings
- func VoiceSettingsForYouTube() *VoiceSettings
- type VoicesService
- func (s *VoicesService) Delete(ctx context.Context, voiceID string) error
- func (s *VoicesService) Get(ctx context.Context, voiceID string) (*Voice, error)
- func (s *VoicesService) GetDefaultSettings(ctx context.Context) (*VoiceSettings, error)
- func (s *VoicesService) GetSettings(ctx context.Context, voiceID string) (*VoiceSettings, error)
- func (s *VoicesService) List(ctx context.Context) ([]*Voice, error)
- type WebSocketSTTConnection
- func (wsc *WebSocketSTTConnection) Close() error
- func (wsc *WebSocketSTTConnection) Commit() error
- func (wsc *WebSocketSTTConnection) Errors() <-chan error
- func (wsc *WebSocketSTTConnection) SendAudio(audio []byte) error
- func (wsc *WebSocketSTTConnection) SendAudioWithCommit(audio []byte, commit bool) error
- func (wsc *WebSocketSTTConnection) SessionID() string
- func (wsc *WebSocketSTTConnection) StreamAudio(ctx context.Context, audioStream <-chan []byte) (<-chan *STTTranscript, <-chan error)
- func (wsc *WebSocketSTTConnection) Transcripts() <-chan *STTTranscript
- type WebSocketSTTOptions
- type WebSocketSTTService
- type WebSocketTTSConnection
- func (wsc *WebSocketTTSConnection) Alignments() <-chan *TTSAlignment
- func (wsc *WebSocketTTSConnection) Audio() <-chan []byte
- func (wsc *WebSocketTTSConnection) Close() error
- func (wsc *WebSocketTTSConnection) Done() <-chan struct{}
- func (wsc *WebSocketTTSConnection) Errors() <-chan error
- func (wsc *WebSocketTTSConnection) Flush() error
- func (wsc *WebSocketTTSConnection) SendText(text string) error
- func (wsc *WebSocketTTSConnection) SendTextWithContext(text, contextID string) error
- func (wsc *WebSocketTTSConnection) StreamText(ctx context.Context, textStream <-chan string) (<-chan []byte, <-chan error)
- func (wsc *WebSocketTTSConnection) TriggerGeneration() error
- type WebSocketTTSOptions
- type WebSocketTTSService
Constants ¶
const DefaultBaseURL = "https://api.elevenlabs.io"
DefaultBaseURL is the default ElevenLabs API base URL.
const DefaultModelID = "eleven_multilingual_v2"
DefaultModelID is the recommended model for text-to-speech.
const Version = "0.3.0"
Version is the SDK version.
Variables ¶
var ( // ErrNoAPIKey is returned when no API key is provided. ErrNoAPIKey = errors.New("elevenlabs: API key is required") // ErrEmptyText is returned when text is empty. ErrEmptyText = errors.New("elevenlabs: text cannot be empty") // ErrEmptyVoiceID is returned when voice ID is empty. ErrEmptyVoiceID = errors.New("elevenlabs: voice ID is required") // ErrInvalidStability is returned when stability is out of range. ErrInvalidStability = errors.New("elevenlabs: stability must be between 0.0 and 1.0") // ErrInvalidSimilarityBoost is returned when similarity_boost is out of range. ErrInvalidSimilarityBoost = errors.New("elevenlabs: similarity_boost must be between 0.0 and 1.0") // ErrInvalidStyle is returned when style is out of range. ErrInvalidStyle = errors.New("elevenlabs: style must be between 0.0 and 1.0") // ErrInvalidSpeed is returned when speed is out of range. ErrInvalidSpeed = errors.New("elevenlabs: speed must be between 0.25 and 4.0") )
Common errors
var ValidOutputFormats = map[string]bool{ "mp3_22050_32": true, "mp3_24000_48": true, "mp3_44100_32": true, "mp3_44100_64": true, "mp3_44100_96": true, "mp3_44100_128": true, "mp3_44100_192": true, "pcm_8000": true, "pcm_16000": true, "pcm_22050": true, "pcm_24000": true, "pcm_32000": true, "pcm_44100": true, "pcm_48000": true, "ulaw_8000": true, "alaw_8000": true, "opus_48000_32": true, "opus_48000_64": true, "opus_48000_96": true, "opus_48000_128": true, "opus_48000_192": true, }
ValidOutputFormats lists the valid audio output formats. For highest quality, use pcm_48000 (lossless) or mp3_44100_192.
Functions ¶
func IsForbiddenError ¶ added in v0.4.0
IsForbiddenError returns true if the error is a 403 Forbidden error.
func IsNotFoundError ¶
IsNotFoundError returns true if the error is a 404 Not Found error.
func IsRateLimitError ¶
IsRateLimitError returns true if the error is a 429 Too Many Requests error.
func IsUnauthorizedError ¶
IsUnauthorizedError returns true if the error is a 401 Unauthorized error.
func PCMBytesToWAV ¶ added in v0.4.0
PCMBytesToWAV wraps raw PCM bytes in a WAV header.
func PCMToWAV ¶ added in v0.4.0
PCMToWAV wraps raw PCM audio data in a WAV header. ElevenLabs PCM is 16-bit signed little-endian mono.
Usage:
resp, _ := client.TextToSpeech().Generate(ctx, &TTSRequest{
VoiceID: voiceID,
Text: "Hello",
OutputFormat: "pcm_44100",
})
wavData, _ := elevenlabs.PCMToWAV(resp.Audio, 44100)
func ParsePCMSampleRate ¶ added in v0.4.0
ParsePCMSampleRate extracts the sample rate from a PCM format string. Example: "pcm_44100" returns 44100.
Types ¶
type APIError ¶
APIError represents an error returned by the ElevenLabs API.
func ParseAPIError ¶ added in v0.4.0
ParseAPIError extracts API error details from an error returned by the SDK. It handles ogen's UnexpectedStatusCodeError and parses the response body to extract the ElevenLabs error message.
Usage:
resp, err := client.TextToSpeech().Generate(ctx, req)
if err != nil {
if apiErr := elevenlabs.ParseAPIError(err); apiErr != nil {
fmt.Printf("Status: %d, Message: %s\n", apiErr.StatusCode, apiErr.Message)
}
log.Fatal(err)
}
type AlignmentCharacter ¶
type AlignmentCharacter struct {
// Text is the character text.
Text string
// Start is the start time in seconds.
Start float64
// End is the end time in seconds.
End float64
}
AlignmentCharacter represents a character with timing information.
type AlignmentWord ¶
type AlignmentWord struct {
// Text is the word text.
Text string
// Start is the start time in seconds.
Start float64
// End is the end time in seconds.
End float64
// Loss is the confidence score for this word.
Loss float64
}
AlignmentWord represents a word with timing information.
type AudioIsolationRequest ¶
type AudioIsolationRequest struct {
// Audio is the audio file to process (required).
Audio io.Reader
// Filename is the name of the file (required).
Filename string
}
AudioIsolationRequest contains options for audio isolation.
type AudioIsolationService ¶
type AudioIsolationService struct {
// contains filtered or unexported fields
}
AudioIsolationService handles audio isolation (vocal/speech extraction).
func (*AudioIsolationService) Isolate ¶
func (s *AudioIsolationService) Isolate(ctx context.Context, req *AudioIsolationRequest) (io.Reader, error)
Isolate extracts vocals/speech from audio, removing background noise. Returns an io.Reader containing the isolated audio.
func (*AudioIsolationService) IsolateFile ¶
func (s *AudioIsolationService) IsolateFile(ctx context.Context, audio io.Reader, filename string) (io.Reader, error)
IsolateFile is a convenience method to isolate vocals from an audio file.
func (*AudioIsolationService) IsolateStream ¶
func (s *AudioIsolationService) IsolateStream(ctx context.Context, req *AudioIsolationRequest) (io.Reader, error)
IsolateStream extracts vocals/speech from audio with streaming output. Returns an io.Reader for streaming the isolated audio.
type Chapter ¶
type Chapter struct {
// ChapterID is the unique identifier.
ChapterID string
// Name is the chapter name.
Name string
// ConversionProgress is the conversion progress percentage.
ConversionProgress float64
// State is the current state.
State string
// LastConversionError is the last conversion error if any.
LastConversionError string
}
Chapter represents a chapter within a project.
type ChapterSnapshot ¶
type ChapterSnapshot struct {
// ChapterSnapshotID is the unique identifier.
ChapterSnapshotID string
// ProjectID is the parent project ID.
ProjectID string
// ChapterID is the chapter ID.
ChapterID string
// Name is the snapshot name.
Name string
// CreatedAt is when the snapshot was created.
CreatedAt time.Time
}
ChapterSnapshot represents a snapshot of a chapter.
type Client ¶
type Client struct {
// contains filtered or unexported fields
}
Client is the main ElevenLabs client for interacting with the API.
func (*Client) API ¶
API returns the underlying ogen-generated API client for advanced usage. Use this when you need access to API endpoints not covered by the high-level wrapper methods.
func (*Client) AudioIsolation ¶
func (c *Client) AudioIsolation() *AudioIsolationService
AudioIsolation returns the audio isolation service.
func (*Client) Dubbing ¶
func (c *Client) Dubbing() *DubbingService
Dubbing returns the dubbing service.
func (*Client) ForcedAlignment ¶
func (c *Client) ForcedAlignment() *ForcedAlignmentService
ForcedAlignment returns the forced alignment service.
func (*Client) History ¶
func (c *Client) History() *HistoryService
History returns the history service.
func (*Client) Music ¶
func (c *Client) Music() *MusicService
Music returns the music composition service.
func (*Client) PhoneNumbers ¶
func (c *Client) PhoneNumbers() *PhoneNumberService
PhoneNumbers returns the phone number management service.
func (*Client) Projects ¶
func (c *Client) Projects() *ProjectsService
Projects returns the projects (Studio) service.
func (*Client) Pronunciation ¶
func (c *Client) Pronunciation() *PronunciationService
Pronunciation returns the pronunciation dictionary service.
func (*Client) SoundEffects ¶
func (c *Client) SoundEffects() *SoundEffectsService
SoundEffects returns the sound effects service.
func (*Client) SpeechToSpeech ¶
func (c *Client) SpeechToSpeech() *SpeechToSpeechService
SpeechToSpeech returns the speech-to-speech voice conversion service.
func (*Client) SpeechToText ¶
func (c *Client) SpeechToText() *SpeechToTextService
SpeechToText returns the speech-to-text transcription service.
func (*Client) TextToDialogue ¶
func (c *Client) TextToDialogue() *TextToDialogueService
TextToDialogue returns the text-to-dialogue service.
func (*Client) TextToSpeech ¶
func (c *Client) TextToSpeech() *TextToSpeechService
TextToSpeech returns the text-to-speech service.
func (*Client) Twilio ¶
func (c *Client) Twilio() *TwilioService
Twilio returns the Twilio phone integration service.
func (*Client) VoiceDesign ¶
func (c *Client) VoiceDesign() *VoiceDesignService
VoiceDesign returns the voice design/generation service.
func (*Client) WebSocketSTT ¶
func (c *Client) WebSocketSTT() *WebSocketSTTService
WebSocketSTT returns the WebSocket speech-to-text service for real-time transcription.
func (*Client) WebSocketTTS ¶
func (c *Client) WebSocketTTS() *WebSocketTTSService
WebSocketTTS returns the WebSocket text-to-speech service for real-time streaming.
type CompositionPlan ¶
type CompositionPlan struct {
// PositiveGlobalStyles are styles that should be present throughout the song.
PositiveGlobalStyles []string
// NegativeGlobalStyles are styles that should NOT be present in the song.
NegativeGlobalStyles []string
// Sections defines the structure of the song with individual sections.
Sections []SongSection
}
CompositionPlan represents a detailed music composition plan. This can be used with GenerateDetailed for fine-grained control over music generation.
type CompositionPlanRequest ¶
type CompositionPlanRequest struct {
// Prompt is the text description of the music to plan.
Prompt string
// DurationMs is the target duration in milliseconds (3000-600000).
DurationMs int
// SourcePlan is an optional existing plan to use as a starting point.
SourcePlan *CompositionPlan
}
CompositionPlanRequest contains options for generating a composition plan.
type CreateProjectRequest ¶
type CreateProjectRequest struct {
// Name is the project name (required).
Name string
// Description is an optional description.
Description string
// Author is an optional author name.
Author string
// Language is the two-letter language code (ISO 639-1).
Language string
// DefaultModelID is the model to use for TTS.
DefaultModelID string
// DefaultParagraphVoiceID is the default voice for paragraphs.
DefaultParagraphVoiceID string
// DefaultTitleVoiceID is the default voice for titles.
DefaultTitleVoiceID string
// FromURL is a URL to extract content from.
FromURL string
// ContentType is the content type (e.g., "Novel", "Short Story").
ContentType string
// Genres is a list of genres.
Genres []string
// QualityPreset is the output quality: "standard", "high", "ultra", "ultra lossless".
QualityPreset string
// AutoConvert automatically converts the project to audio.
AutoConvert bool
}
CreateProjectRequest contains options for creating a project.
func (*CreateProjectRequest) Validate ¶
func (r *CreateProjectRequest) Validate() error
Validate validates the create request.
type CreatePronunciationDictionaryRequest ¶
type CreatePronunciationDictionaryRequest struct {
// Name is the name of the dictionary (required).
Name string
// Description is an optional description.
Description string
// PLSContent is the PLS (Pronunciation Lexicon Specification) XML content.
// Use this to provide pronunciation rules directly.
// You can generate this from PronunciationRules using ToPLSString().
PLSContent string
// Rules is a convenient alternative to PLSContent.
// If provided, it will be converted to PLS format automatically.
// If both PLSContent and Rules are provided, PLSContent takes precedence.
Rules PronunciationRules
// Language is the language code for the rules (default: "en-US").
// Only used when Rules is provided.
Language string
}
CreatePronunciationDictionaryRequest contains options for creating a pronunciation dictionary.
type DialogueInput ¶
type DialogueInput struct {
// Text is the text to be spoken.
Text string
// VoiceID is the ID of the voice to use.
VoiceID string
}
DialogueInput represents a single dialogue turn with text and voice.
type DialogueRequest ¶
type DialogueRequest struct {
// Inputs is a list of dialogue turns with text and voice pairs.
Inputs []DialogueInput
// ModelID is the model to use (default: eleven_multilingual_v2).
ModelID string
// LanguageCode is the ISO 639-1 language code (e.g., "en").
LanguageCode string
// Seed for deterministic generation (0-4294967295).
Seed int
}
DialogueRequest contains options for dialogue generation.
type DialogueResponse ¶
type DialogueResponse struct {
// AudioBase64 is the base64-encoded audio data.
AudioBase64 string
// VoiceSegments contains timing info for each voice segment.
VoiceSegments []VoiceSegment
}
DialogueResponse contains the dialogue generation result with timestamps.
type DubbingProject ¶
type DubbingProject struct {
// DubbingID is the unique identifier.
DubbingID string
// Name is the project name.
Name string
// Status is the current status (dubbed, dubbing, failed, cloning).
Status string
// TargetLanguages are the target languages for dubbing.
TargetLanguages []string
// SourceLanguage is the source language.
SourceLanguage string
// Error contains any error message if the project failed.
Error string
// CreatedAt is when the project was created.
CreatedAt time.Time
}
DubbingProject represents a dubbing project.
func (*DubbingProject) IsComplete ¶
func (p *DubbingProject) IsComplete() bool
IsComplete checks if a dubbing project is complete.
func (*DubbingProject) IsFailed ¶
func (p *DubbingProject) IsFailed() bool
IsFailed checks if a dubbing project has failed.
func (*DubbingProject) IsProcessing ¶
func (p *DubbingProject) IsProcessing() bool
IsProcessing checks if a dubbing project is still processing.
type DubbingRequest ¶
type DubbingRequest struct {
// Name is the name of the dubbing project.
Name string
// SourceURL is the URL of the source media (alternative to file upload).
SourceURL string
// File is the source media file (alternative to SourceURL).
File io.Reader
// SourceLanguage is the source language code (ISO 639-1).
SourceLanguage string
// TargetLanguage is the target language code (ISO 639-1).
TargetLanguage string
// NumSpeakers is the number of speakers (0 for auto-detection).
NumSpeakers int
// Watermark enables watermark (for free tier).
Watermark bool
// StartTime is the start time in seconds for dubbing.
StartTime int
// EndTime is the end time in seconds for dubbing.
EndTime int
// HighestResolution requests highest resolution output.
HighestResolution bool
// DropBackgroundAudio removes background audio.
DropBackgroundAudio bool
}
DubbingRequest contains options for creating a dubbing project.
type DubbingResponse ¶
type DubbingResponse struct {
// DubbingID is the ID of the created project.
DubbingID string
// ExpectedDurationSeconds is the expected duration.
ExpectedDurationSeconds float64
}
DubbingResponse contains the result of creating a dubbing project.
type DubbingService ¶
type DubbingService struct {
// contains filtered or unexported fields
}
DubbingService handles dubbing operations.
func (*DubbingService) CreateFromURL ¶
func (s *DubbingService) CreateFromURL(ctx context.Context, req *DubbingRequest) (*DubbingResponse, error)
CreateFromURL creates a dubbing project from a URL source.
func (*DubbingService) Delete ¶
func (s *DubbingService) Delete(ctx context.Context, dubbingID string) error
Delete deletes a dubbing project by ID.
func (*DubbingService) Get ¶
func (s *DubbingService) Get(ctx context.Context, dubbingID string) (*DubbingProject, error)
Get returns a dubbing project metadata by ID.
func (*DubbingService) GetDubbedFile ¶
func (s *DubbingService) GetDubbedFile(ctx context.Context, dubbingID, languageCode string) (io.Reader, error)
GetDubbedFile returns the dubbed audio/video file for a specific language.
type ForcedAlignmentRequest ¶
type ForcedAlignmentRequest struct {
// File is the audio file to align (required).
File io.Reader
// Filename is the name of the file (required).
Filename string
// Text is the text to align with the audio (required).
Text string
}
ForcedAlignmentRequest contains options for forced alignment.
type ForcedAlignmentResponse ¶
type ForcedAlignmentResponse struct {
// Words contains word-level timing information.
Words []AlignmentWord
// Characters contains character-level timing information.
Characters []AlignmentCharacter
// Loss is the average alignment confidence score.
Loss float64
}
ForcedAlignmentResponse contains the alignment result.
type ForcedAlignmentService ¶
type ForcedAlignmentService struct {
// contains filtered or unexported fields
}
ForcedAlignmentService handles forced alignment between audio and text.
func (*ForcedAlignmentService) Align ¶
func (s *ForcedAlignmentService) Align(ctx context.Context, req *ForcedAlignmentRequest) (*ForcedAlignmentResponse, error)
Align performs forced alignment between audio and text. This is useful for generating word-level timestamps for captions and subtitles.
func (*ForcedAlignmentService) AlignFile ¶
func (s *ForcedAlignmentService) AlignFile(ctx context.Context, file io.Reader, filename, text string) (*ForcedAlignmentResponse, error)
AlignFile is a convenience method to align audio from a file reader with text.
type HistoryItem ¶
type HistoryItem struct {
// HistoryItemID is the unique identifier.
HistoryItemID string
// VoiceID is the ID of the voice used.
VoiceID string
// VoiceName is the name of the voice used.
VoiceName string
// VoiceCategory is the category of the voice.
VoiceCategory string
// ModelID is the ID of the model used.
ModelID string
// Text is the text that was converted to speech.
Text string
// State is the state of the history item.
State string
// Source is the source of the generation.
Source string
// ContentType is the content type of the audio.
ContentType string
// CharactersUsed is the number of characters used.
CharactersUsed int
// CreatedAt is when the item was created.
CreatedAt time.Time
}
HistoryItem represents a speech generation history item.
type HistoryListOptions ¶
type HistoryListOptions struct {
// PageSize is the number of items per page.
PageSize int
// StartAfterHistoryItemID is for pagination (fetch items after this ID).
StartAfterHistoryItemID string
// VoiceID filters by voice ID.
VoiceID string
}
HistoryListOptions contains options for listing history items.
type HistoryListResponse ¶
type HistoryListResponse struct {
// Items is the list of history items.
Items []*HistoryItem
// HasMore indicates if there are more items to fetch.
HasMore bool
// LastHistoryItemID is the ID of the last item (for pagination).
LastHistoryItemID string
}
HistoryListResponse contains the list of history items and pagination info.
type HistoryService ¶
type HistoryService struct {
// contains filtered or unexported fields
}
HistoryService handles history operations.
func (*HistoryService) Delete ¶
func (s *HistoryService) Delete(ctx context.Context, historyItemID string) error
Delete deletes a history item by ID.
func (*HistoryService) Get ¶
func (s *HistoryService) Get(ctx context.Context, historyItemID string) (*HistoryItem, error)
Get returns a specific history item by ID.
func (*HistoryService) List ¶
func (s *HistoryService) List(ctx context.Context, opts *HistoryListOptions) (*HistoryListResponse, error)
List returns a list of speech history items.
type Language ¶
type Language struct {
// LanguageID is the unique identifier (ISO code).
LanguageID string
// Name is the display name of the language.
Name string
}
Language represents a language supported by a model.
type ListPhoneNumbersResponse ¶
type ListPhoneNumbersResponse struct {
PhoneNumbers []PhoneNumber `json:"phone_numbers"`
}
ListPhoneNumbersResponse is the response from listing phone numbers.
type Model ¶
type Model struct {
// ModelID is the unique identifier for the model.
ModelID string
// Name is the display name of the model.
Name string
// Description is the model description.
Description string
// CanDoTextToSpeech indicates if the model supports TTS.
CanDoTextToSpeech bool
// CanDoVoiceConversion indicates if the model supports voice conversion.
CanDoVoiceConversion bool
// CanBeFinetuned indicates if the model can be fine-tuned.
CanBeFinetuned bool
// CanUseStyle indicates if the model supports style settings.
CanUseStyle bool
// CanUseSpeakerBoost indicates if the model supports speaker boost.
CanUseSpeakerBoost bool
// Languages is the list of supported languages.
Languages []*Language
// MaxCharactersFreeUser is the max characters for free users.
MaxCharactersFreeUser int
// MaxCharactersSubscribedUser is the max characters for subscribed users.
MaxCharactersSubscribedUser int
// TokenCostFactor is the cost factor for the model.
TokenCostFactor float64
}
Model represents an ElevenLabs model.
type ModelsService ¶
type ModelsService struct {
// contains filtered or unexported fields
}
ModelsService handles model operations.
func (*ModelsService) List ¶
func (s *ModelsService) List(ctx context.Context) ([]*Model, error)
List returns all available models.
func (*ModelsService) ListTTSModels ¶
func (s *ModelsService) ListTTSModels(ctx context.Context) ([]*Model, error)
ListTTSModels returns only models that support text-to-speech.
type MusicDetailedRequest ¶
type MusicDetailedRequest struct {
// Prompt is a simple text description (cannot be used with CompositionPlan).
Prompt string
// CompositionPlan is a detailed plan (cannot be used with Prompt).
CompositionPlan *CompositionPlan
// DurationMs is the length in milliseconds (only used with Prompt).
DurationMs int
// ForceInstrumental ensures no vocals (only used with Prompt).
ForceInstrumental bool
// Seed for deterministic generation.
Seed int
// WithTimestamps returns word timestamps in the response.
WithTimestamps bool
}
MusicDetailedRequest contains options for detailed music generation.
type MusicDetailedResponse ¶
type MusicDetailedResponse struct {
// Audio is the generated music.
Audio io.Reader
// SongID is the unique identifier for this song.
SongID string
}
MusicDetailedResponse contains the detailed music generation result.
type MusicRequest ¶
type MusicRequest struct {
// Prompt is a simple text description of the music to generate.
// Cannot be used with CompositionPlan.
Prompt string
// DurationMs is the length of the song in milliseconds (3000-600000).
// If not provided, the model will choose based on the prompt.
DurationMs int
// ForceInstrumental ensures the song has no vocals.
ForceInstrumental bool
// Seed for deterministic generation (optional).
Seed int
}
MusicRequest contains options for music generation.
type MusicResponse ¶
type MusicResponse struct {
// Audio is the generated music.
Audio io.Reader
// SongID is the unique identifier for this song.
SongID string
}
MusicResponse contains the music generation result.
type MusicService ¶
type MusicService struct {
// contains filtered or unexported fields
}
MusicService handles music composition and generation.
func (*MusicService) Generate ¶
func (s *MusicService) Generate(ctx context.Context, req *MusicRequest) (*MusicResponse, error)
Generate creates music from a text prompt.
func (*MusicService) GenerateDetailed ¶
func (s *MusicService) GenerateDetailed(ctx context.Context, req *MusicDetailedRequest) (*MusicDetailedResponse, error)
GenerateDetailed creates music with detailed options and metadata. Use either Prompt for simple generation or CompositionPlan for fine-grained control.
Example with prompt:
resp, err := client.Music().GenerateDetailed(ctx, &MusicDetailedRequest{
Prompt: "epic orchestral music",
DurationMs: 60000,
})
Example with composition plan:
plan, _ := client.Music().GeneratePlan(ctx, &CompositionPlanRequest{Prompt: "pop song"})
resp, err := client.Music().GenerateDetailed(ctx, &MusicDetailedRequest{
CompositionPlan: plan,
})
func (*MusicService) GenerateInstrumental ¶
func (s *MusicService) GenerateInstrumental(ctx context.Context, prompt string, durationMs int) (io.Reader, error)
GenerateInstrumental generates instrumental music from a prompt.
func (*MusicService) GeneratePlan ¶
func (s *MusicService) GeneratePlan(ctx context.Context, req *CompositionPlanRequest) (*CompositionPlan, error)
GeneratePlan creates a composition plan from a text prompt. The returned plan can be modified and used with GenerateDetailed.
Example:
plan, err := client.Music().GeneratePlan(ctx, &MusicCompositionPlanRequest{
Prompt: "upbeat pop song about summer",
DurationMs: 180000, // 3 minutes
})
// Modify the plan if needed
plan.Sections[0].Lines = []string{"Custom lyrics here"}
// Generate music from the plan
resp, err := client.Music().GenerateDetailed(ctx, &MusicDetailedRequest{
CompositionPlan: plan,
})
func (*MusicService) GenerateStream ¶
func (s *MusicService) GenerateStream(ctx context.Context, req *MusicRequest) (*MusicResponse, error)
GenerateStream creates music with streaming output.
func (*MusicService) SeparateStems ¶
func (s *MusicService) SeparateStems(ctx context.Context, req *StemSeparationRequest) (io.Reader, error)
SeparateStems separates a song into individual stems (vocals, instruments, etc.).
Example:
f, _ := os.Open("song.mp3")
stems, err := client.Music().SeparateStems(ctx, &StemSeparationRequest{
File: f,
Filename: "song.mp3",
})
// Save the separated stems (returned as a zip file)
output, _ := os.Create("stems.zip")
io.Copy(output, stems)
func (*MusicService) SeparateStemsFile ¶
SeparateStemsFile is a convenience method to separate stems from a file path.
type Option ¶
type Option func(*clientOptions)
Option is a functional option for configuring the Client.
func WithAPIKey ¶
WithAPIKey sets the API key for authentication.
func WithHTTPClient ¶
WithHTTPClient sets a custom HTTP client.
func WithTimeout ¶
WithTimeout sets the request timeout.
type PhoneNumber ¶
type PhoneNumber struct {
ID string `json:"phone_number_id"`
PhoneNumber string `json:"phone_number"`
Label string `json:"label"`
AgentID string `json:"agent_id,omitempty"`
Provider string `json:"provider"` // "twilio", "sip"
Status string `json:"status"`
CreatedAt string `json:"created_at"`
}
PhoneNumber represents an ElevenLabs phone number.
type PhoneNumberService ¶
type PhoneNumberService struct {
// contains filtered or unexported fields
}
PhoneNumberService handles phone number management.
func (*PhoneNumberService) Delete ¶
func (s *PhoneNumberService) Delete(ctx context.Context, phoneNumberID string) error
Delete removes a phone number from the workspace.
func (*PhoneNumberService) Get ¶
func (s *PhoneNumberService) Get(ctx context.Context, phoneNumberID string) (*PhoneNumber, error)
Get retrieves a specific phone number by ID.
func (*PhoneNumberService) List ¶
func (s *PhoneNumberService) List(ctx context.Context) ([]PhoneNumber, error)
List lists all phone numbers in the workspace.
func (*PhoneNumberService) Update ¶
func (s *PhoneNumberService) Update(ctx context.Context, phoneNumberID string, req *UpdatePhoneNumberRequest) (*PhoneNumber, error)
Update updates a phone number's settings.
type Project ¶
type Project struct {
// ProjectID is the unique identifier.
ProjectID string
// Name is the project name.
Name string
// Description is the project description.
Description string
// Author is the project author.
Author string
// Language is the two-letter language code (ISO 639-1).
Language string
// DefaultModelID is the default model for TTS.
DefaultModelID string
// DefaultParagraphVoiceID is the default voice for paragraphs.
DefaultParagraphVoiceID string
// DefaultTitleVoiceID is the default voice for titles.
DefaultTitleVoiceID string
// ContentType is the content type (e.g., "Novel", "Short Story").
ContentType string
// CoverImageURL is the cover image URL.
CoverImageURL string
// CreatedAt is the creation timestamp.
CreatedAt time.Time
// CanBeDownloaded indicates if the project can be downloaded.
CanBeDownloaded bool
// AccessLevel is the access level of the project.
AccessLevel string
}
Project represents a Studio project.
type ProjectSnapshot ¶
type ProjectSnapshot struct {
// ProjectSnapshotID is the unique identifier.
ProjectSnapshotID string
// ProjectID is the parent project ID.
ProjectID string
// Name is the snapshot name.
Name string
// CreatedAt is when the snapshot was created.
CreatedAt time.Time
}
ProjectSnapshot represents a snapshot of a project.
type ProjectsService ¶
type ProjectsService struct {
// contains filtered or unexported fields
}
ProjectsService handles Studio Projects operations. Projects (formerly known as "Studio") allow you to create long-form audio content like audiobooks, podcasts, and video course narration organized into chapters.
func (*ProjectsService) Convert ¶
func (s *ProjectsService) Convert(ctx context.Context, projectID string) error
Convert initiates conversion of a project to audio.
func (*ProjectsService) ConvertChapter ¶
func (s *ProjectsService) ConvertChapter(ctx context.Context, projectID, chapterID string) error
ConvertChapter initiates conversion of a chapter to audio.
func (*ProjectsService) Create ¶
func (s *ProjectsService) Create(ctx context.Context, req *CreateProjectRequest) (*Project, error)
Create creates a new project.
func (*ProjectsService) Delete ¶
func (s *ProjectsService) Delete(ctx context.Context, projectID string) error
Delete deletes a project.
func (*ProjectsService) DeleteChapter ¶
func (s *ProjectsService) DeleteChapter(ctx context.Context, projectID, chapterID string) error
DeleteChapter deletes a chapter from a project.
func (*ProjectsService) DownloadSnapshotArchive ¶
func (s *ProjectsService) DownloadSnapshotArchive(ctx context.Context, projectID, snapshotID string) (io.Reader, error)
DownloadSnapshotArchive downloads a project snapshot as a zip archive.
func (*ProjectsService) List ¶
func (s *ProjectsService) List(ctx context.Context) ([]*Project, error)
List returns all projects.
func (*ProjectsService) ListChapterSnapshots ¶
func (s *ProjectsService) ListChapterSnapshots(ctx context.Context, projectID, chapterID string) ([]*ChapterSnapshot, error)
ListChapterSnapshots returns all snapshots for a chapter.
func (*ProjectsService) ListChapters ¶
ListChapters returns all chapters in a project.
func (*ProjectsService) ListSnapshots ¶
func (s *ProjectsService) ListSnapshots(ctx context.Context, projectID string) ([]*ProjectSnapshot, error)
ListSnapshots returns all snapshots for a project.
func (*ProjectsService) StreamChapterAudio ¶
func (s *ProjectsService) StreamChapterAudio(ctx context.Context, projectID, chapterID, snapshotID string) (io.Reader, error)
StreamChapterAudio streams audio from a chapter snapshot.
func (*ProjectsService) Update ¶
func (s *ProjectsService) Update(ctx context.Context, projectID string, req *UpdateProjectRequest) error
Update updates a project. Note: Name, DefaultParagraphVoiceID, and DefaultTitleVoiceID are required fields.
type PronunciationDictionary ¶
type PronunciationDictionary struct {
// ID is the unique identifier.
ID string
// Name is the display name.
Name string
// Description is the dictionary description.
Description string
// LatestVersionID is the ID of the latest version.
LatestVersionID string
// RulesCount is the number of rules in the latest version.
RulesCount int
// CreatedBy is the user ID who created the dictionary.
CreatedBy string
// CreatedAt is when the dictionary was created.
CreatedAt time.Time
}
PronunciationDictionary represents a pronunciation dictionary.
type PronunciationDictionaryListOptions ¶
type PronunciationDictionaryListOptions struct {
// PageSize is the number of items per page (max 100).
PageSize int
// Cursor is the pagination cursor.
Cursor string
}
PronunciationDictionaryListOptions contains options for listing.
type PronunciationDictionaryListResponse ¶
type PronunciationDictionaryListResponse struct {
// Dictionaries is the list of pronunciation dictionaries.
Dictionaries []*PronunciationDictionary
// HasMore indicates if there are more items to fetch.
HasMore bool
// NextCursor is the cursor for pagination.
NextCursor string
}
PronunciationDictionaryListResponse contains the list result.
type PronunciationRule ¶
type PronunciationRule struct {
// Grapheme is the text to match (required).
Grapheme string `json:"grapheme"`
// Alias is the replacement text (mutually exclusive with Phoneme).
// This is the easier option - just specify what text should be read instead.
Alias string `json:"alias,omitempty"`
// Phoneme is the IPA pronunciation (mutually exclusive with Alias).
// Use this for precise phonetic control.
Phoneme string `json:"phoneme,omitempty"`
}
PronunciationRule defines how a word or phrase should be pronounced. Rules can use either an alias (text substitution) or IPA phonemes.
Example JSON:
[
{"grapheme": "ADK", "alias": "Agent Development Kit"},
{"grapheme": "kubectl", "alias": "kube control"},
{"grapheme": "nginx", "phoneme": "ˈɛndʒɪnˈɛks"}
]
func (*PronunciationRule) Validate ¶
func (r *PronunciationRule) Validate() error
Validate checks that the rule is valid.
type PronunciationRules ¶
type PronunciationRules []PronunciationRule
PronunciationRules is a collection of pronunciation rules.
func LoadRulesFromJSON ¶
func LoadRulesFromJSON(filename string) (PronunciationRules, error)
LoadRulesFromJSON loads pronunciation rules from a JSON file.
Example file content:
[
{"grapheme": "ADK", "alias": "Agent Development Kit"},
{"grapheme": "API", "alias": "A P I"},
{"grapheme": "SQL", "alias": "sequel"}
]
func ParseRulesFromJSON ¶
func ParseRulesFromJSON(data []byte) (PronunciationRules, error)
ParseRulesFromJSON parses pronunciation rules from JSON bytes.
func RulesFromMap ¶
func RulesFromMap(m map[string]string) PronunciationRules
RulesFromMap creates pronunciation rules from a simple map. All entries are treated as alias substitutions.
Example:
rules := RulesFromMap(map[string]string{
"ADK": "Agent Development Kit",
"kubectl": "kube control",
"API": "A P I",
})
func (PronunciationRules) Graphemes ¶
func (rules PronunciationRules) Graphemes() []string
Graphemes returns a list of all graphemes (the words being defined).
func (PronunciationRules) SavePLS ¶
func (rules PronunciationRules) SavePLS(filename, language string) error
SavePLS writes the rules to a PLS file.
func (PronunciationRules) String ¶
func (rules PronunciationRules) String() string
String returns a human-readable summary of the rules.
func (PronunciationRules) ToPLS ¶
func (rules PronunciationRules) ToPLS(language string) ([]byte, error)
ToPLS converts the rules to PLS (Pronunciation Lexicon Specification) XML format. This is the format required by ElevenLabs API.
func (PronunciationRules) ToPLSString ¶
func (rules PronunciationRules) ToPLSString(language string) (string, error)
ToPLSString is a convenience method that returns the PLS as a string.
type PronunciationService ¶
type PronunciationService struct {
// contains filtered or unexported fields
}
PronunciationService handles pronunciation dictionary operations. Pronunciation dictionaries help ensure correct pronunciation of technical terms, names, and domain-specific vocabulary.
func (*PronunciationService) Archive ¶
func (s *PronunciationService) Archive(ctx context.Context, dictionaryID string) error
Archive archives a pronunciation dictionary.
func (*PronunciationService) Create ¶
func (s *PronunciationService) Create(ctx context.Context, req *CreatePronunciationDictionaryRequest) (*PronunciationDictionary, error)
Create creates a new pronunciation dictionary.
Example with rules:
dict, err := client.Pronunciation().Create(ctx, &CreatePronunciationDictionaryRequest{
Name: "Tech Terms",
Rules: elevenlabs.RulesFromMap(map[string]string{
"ADK": "Agent Development Kit",
"kubectl": "kube control",
}),
})
Example with PLS content:
dict, err := client.Pronunciation().Create(ctx, &CreatePronunciationDictionaryRequest{
Name: "Tech Terms",
PLSContent: plsXMLString,
})
func (*PronunciationService) CreateFromJSON ¶
func (s *PronunciationService) CreateFromJSON(ctx context.Context, name, jsonFilePath string) (*PronunciationDictionary, error)
CreateFromJSON creates a pronunciation dictionary from a JSON rules file.
Example JSON file:
[
{"grapheme": "ADK", "alias": "Agent Development Kit"},
{"grapheme": "kubectl", "alias": "kube control"}
]
func (*PronunciationService) CreateFromMap ¶
func (s *PronunciationService) CreateFromMap(ctx context.Context, name string, rules map[string]string) (*PronunciationDictionary, error)
CreateFromMap creates a pronunciation dictionary from a simple map. All entries are treated as alias substitutions (text replacements).
Example:
dict, err := client.Pronunciation().CreateFromMap(ctx, "Tech Terms", map[string]string{
"ADK": "Agent Development Kit",
"kubectl": "kube control",
"API": "A P I",
})
func (*PronunciationService) DownloadLatestPLS ¶
func (s *PronunciationService) DownloadLatestPLS(ctx context.Context, dictionaryID string) (io.Reader, error)
DownloadLatestPLS downloads the PLS file for the latest version of a dictionary. This is a convenience method that first gets the dictionary metadata to find the latest version ID, then downloads that version.
func (*PronunciationService) Get ¶
func (s *PronunciationService) Get(ctx context.Context, dictionaryID string) (*PronunciationDictionary, error)
Get returns a pronunciation dictionary by ID.
func (*PronunciationService) GetVersionPLS ¶
func (s *PronunciationService) GetVersionPLS(ctx context.Context, dictionaryID, versionID string) (io.Reader, error)
GetVersionPLS returns the PLS (Pronunciation Lexicon Specification) XML file for a specific version of a pronunciation dictionary.
The returned io.Reader contains the XML content that can be saved to a file or parsed directly.
Example:
pls, err := client.Pronunciation().GetVersionPLS(ctx, dictionaryID, versionID)
if err != nil {
log.Fatal(err)
}
// Save to file
f, _ := os.Create("dictionary.pls")
io.Copy(f, pls)
func (*PronunciationService) List ¶
func (s *PronunciationService) List(ctx context.Context, opts *PronunciationDictionaryListOptions) (*PronunciationDictionaryListResponse, error)
List returns all pronunciation dictionaries.
func (*PronunciationService) RemoveRules ¶
func (s *PronunciationService) RemoveRules(ctx context.Context, dictionaryID string, ruleStrings []string) error
RemoveRules removes pronunciation rules from a dictionary. The ruleStrings should be the original text strings to remove.
type SIPOutboundCallRequest ¶
type SIPOutboundCallRequest struct {
// AgentID is the ElevenLabs agent ID to handle the call.
AgentID string `json:"agent_id"`
// ToNumber is the phone number to call (E.164 format).
ToNumber string `json:"to_number"`
// SIPTrunkID is the SIP trunk ID to use.
SIPTrunkID string `json:"sip_trunk_id"`
// FromNumber is the caller ID to display (must be verified).
FromNumber string `json:"from_number,omitempty"`
// CustomLLMExtraBody is additional data to pass to the LLM.
CustomLLMExtraBody map[string]any `json:"custom_llm_extra_body,omitempty"`
// DynamicVariables are variables to inject into the agent prompt.
DynamicVariables map[string]string `json:"dynamic_variables,omitempty"`
// FirstMessage overrides the agent's default first message.
FirstMessage string `json:"first_message,omitempty"`
// SystemPrompt overrides the agent's system prompt.
SystemPrompt string `json:"system_prompt,omitempty"`
}
SIPOutboundCallRequest is the request to make an outbound call via SIP trunk.
type SIPOutboundCallResponse ¶
type SIPOutboundCallResponse struct {
// ConversationID is the ElevenLabs conversation ID for this call.
ConversationID string `json:"conversation_id"`
// Status is the initial call status.
Status string `json:"status"`
}
SIPOutboundCallResponse is the response from making a SIP outbound call.
type STTTranscript ¶
type STTTranscript struct {
// Text is the transcribed text.
Text string `json:"text"`
// IsFinal indicates if this is a final (committed) result.
IsFinal bool `json:"is_final"`
// Words contains word-level timing if enabled.
Words []STTWord `json:"words,omitempty"`
// LanguageCode is the detected language.
LanguageCode string `json:"language_code,omitempty"`
}
STTTranscript represents a transcription result.
type STTWord ¶
type STTWord struct {
Text string `json:"text"`
Start float64 `json:"start"`
End float64 `json:"end"`
Type string `json:"type,omitempty"` // "word" or "spacing"
SpeakerID string `json:"speaker_id,omitempty"` // Speaker identification
}
STTWord represents a single word with timing.
type SaveVoiceRequest ¶
type SaveVoiceRequest struct {
// GeneratedVoiceID from the design response.
GeneratedVoiceID string
// VoiceName is the name for the saved voice.
VoiceName string
// VoiceDescription describes the voice.
VoiceDescription string
// Labels are optional metadata tags.
Labels map[string]string
}
SaveVoiceRequest contains options for saving a generated voice.
type SongSection ¶
type SongSection struct {
// SectionName is the name of the section (e.g., "intro", "verse", "chorus").
SectionName string
// DurationMs is the duration in milliseconds (3000-120000).
DurationMs int
// Lines are the lyrics for this section (max 200 chars per line).
Lines []string
// PositiveLocalStyles are styles for this specific section.
PositiveLocalStyles []string
// NegativeLocalStyles are styles to avoid in this section.
NegativeLocalStyles []string
}
SongSection represents a section of a song in a composition plan.
type SoundEffectRequest ¶
type SoundEffectRequest struct {
// Text is the description of the sound effect to generate.
// Examples: "car engine starting", "thunder and rain", "crowd cheering"
Text string
// DurationSeconds is the target duration (0.5 to 30 seconds).
// If not set, the optimal duration will be guessed from the prompt.
DurationSeconds float64
// PromptInfluence controls how closely the generation follows the prompt (0.0 to 1.0).
// Higher values = more faithful to prompt but less variation.
// Default is 0.3.
PromptInfluence float64
// Loop creates a sound effect that loops smoothly.
Loop bool
// OutputFormat specifies the audio format (e.g., "mp3_44100_128").
OutputFormat string
}
SoundEffectRequest contains options for generating a sound effect.
func (*SoundEffectRequest) Validate ¶
func (r *SoundEffectRequest) Validate() error
Validate validates the sound effect request.
type SoundEffectResponse ¶
SoundEffectResponse contains the generated sound effect.
type SoundEffectsService ¶
type SoundEffectsService struct {
// contains filtered or unexported fields
}
SoundEffectsService handles sound effect generation.
func (*SoundEffectsService) Generate ¶
func (s *SoundEffectsService) Generate(ctx context.Context, req *SoundEffectRequest) (*SoundEffectResponse, error)
Generate creates a sound effect from a text description.
func (*SoundEffectsService) GenerateLoop ¶
func (s *SoundEffectsService) GenerateLoop(ctx context.Context, description string, durationSeconds float64) (io.Reader, error)
GenerateLoop generates a looping sound effect.
type SpeechToSpeechRequest ¶
type SpeechToSpeechRequest struct {
// VoiceID is the target voice to convert to.
VoiceID string
// Audio is the source audio data to convert.
Audio io.Reader
// AudioFilename is the filename for the audio (optional, helps with format detection).
AudioFilename string
// ModelID is the model to use. Defaults to "eleven_english_sts_v2".
ModelID string
// VoiceSettings configures the voice parameters.
VoiceSettings *VoiceSettings
// OutputFormat specifies the audio output format.
// Examples: "mp3_44100_128", "pcm_16000", "pcm_22050"
OutputFormat string
// RemoveBackgroundNoise removes background noise from the source audio.
RemoveBackgroundNoise bool
// SeedAudio is optional seed audio to influence the conversion.
SeedAudio io.Reader
// SeedAudioFilename is the filename for the seed audio.
SeedAudioFilename string
}
SpeechToSpeechRequest is a request to convert speech to a different voice.
func (*SpeechToSpeechRequest) Validate ¶
func (r *SpeechToSpeechRequest) Validate() error
Validate validates the speech-to-speech request.
type SpeechToSpeechResponse ¶
SpeechToSpeechResponse contains the converted audio.
type SpeechToSpeechService ¶
type SpeechToSpeechService struct {
// contains filtered or unexported fields
}
SpeechToSpeechService handles voice conversion operations.
func (*SpeechToSpeechService) Convert ¶
func (s *SpeechToSpeechService) Convert(ctx context.Context, req *SpeechToSpeechRequest) (*SpeechToSpeechResponse, error)
Convert converts speech from one voice to another.
func (*SpeechToSpeechService) ConvertStream ¶
func (s *SpeechToSpeechService) ConvertStream(ctx context.Context, req *SpeechToSpeechRequest) (*SpeechToSpeechResponse, error)
ConvertStream converts speech with streaming response.
type SpeechToTextService ¶
type SpeechToTextService struct {
// contains filtered or unexported fields
}
SpeechToTextService handles speech-to-text transcription.
func (*SpeechToTextService) Transcribe ¶
func (s *SpeechToTextService) Transcribe(ctx context.Context, req *TranscriptionRequest) (*TranscriptionResponse, error)
Transcribe transcribes audio to text.
func (*SpeechToTextService) TranscribeURL ¶
func (s *SpeechToTextService) TranscribeURL(ctx context.Context, url string) (*TranscriptionResponse, error)
TranscribeURL transcribes audio from a URL.
func (*SpeechToTextService) TranscribeWithDiarization ¶
func (s *SpeechToTextService) TranscribeWithDiarization(ctx context.Context, url string) (*TranscriptionResponse, error)
TranscribeWithDiarization transcribes audio with speaker identification.
type StemSeparationRequest ¶
type StemSeparationRequest struct {
// File is the audio file to separate.
File io.Reader
// Filename is the name of the file.
Filename string
// StemVariation specifies which stem variation to use.
// Options: "two_stems_v1" (vocals + music), "six_stems_v1" (vocals, drums, bass, other - default)
StemVariation string
}
StemSeparationRequest contains options for stem separation.
type Subscription ¶
type Subscription struct {
// Tier is the subscription tier (e.g., "free", "starter", "creator").
Tier string
// Status is the subscription status.
Status string
// CharacterCount is the number of characters used.
CharacterCount int
// CharacterLimit is the maximum characters allowed.
CharacterLimit int
// VoiceLimit is the maximum number of voices allowed.
VoiceLimit int
// VoiceSlotsUsed is the number of voice slots used.
VoiceSlotsUsed int
// CanUseInstantVoiceCloning indicates if instant cloning is available.
CanUseInstantVoiceCloning bool
// CanUseProfessionalVoiceCloning indicates if pro cloning is available.
CanUseProfessionalVoiceCloning bool
// NextCharacterResetUnix is when characters reset (Unix timestamp).
NextCharacterResetUnix int64
}
Subscription represents a user's subscription details.
func (*Subscription) CharactersRemaining ¶
func (s *Subscription) CharactersRemaining() int
CharactersRemaining returns the number of characters remaining.
type TTSAlignment ¶
type TTSAlignment struct {
Characters []string `json:"characters"`
CharacterStart []float64 `json:"character_start_times_seconds"`
CharacterEnd []float64 `json:"character_end_times_seconds"`
}
TTSAlignment contains word-level timing information.
type TTSRequest ¶
type TTSRequest struct {
// VoiceID is the voice to use for generation.
VoiceID string
// Text is the text to convert to speech.
Text string
// ModelID is the model to use. Defaults to DefaultModelID.
ModelID string
// VoiceSettings configures the voice parameters.
// If nil, default settings will be used.
VoiceSettings *VoiceSettings
// OutputFormat specifies the audio output format.
// Examples: "mp3_44100_128", "pcm_16000", "pcm_22050"
OutputFormat string
// LanguageCode is the ISO 639-1 language code for text normalization.
LanguageCode string
}
TTSRequest is a request to generate speech from text.
func (*TTSRequest) Validate ¶
func (r *TTSRequest) Validate() error
Validate validates the TTS request.
type TTSResponse ¶
TTSResponse contains the generated audio from text-to-speech.
type TextToDialogueService ¶
type TextToDialogueService struct {
// contains filtered or unexported fields
}
TextToDialogueService handles multi-voice dialogue generation.
func (*TextToDialogueService) Generate ¶
func (s *TextToDialogueService) Generate(ctx context.Context, req *DialogueRequest) (io.Reader, error)
Generate creates dialogue audio from multiple voice inputs. Returns an io.Reader containing the combined audio.
func (*TextToDialogueService) GenerateStream ¶
func (s *TextToDialogueService) GenerateStream(ctx context.Context, req *DialogueRequest) (io.Reader, error)
GenerateStream creates dialogue audio with streaming output.
func (*TextToDialogueService) GenerateWithTimestamps ¶
func (s *TextToDialogueService) GenerateWithTimestamps(ctx context.Context, req *DialogueRequest) (*DialogueResponse, error)
GenerateWithTimestamps creates dialogue audio with timing information.
func (*TextToDialogueService) Simple ¶
func (s *TextToDialogueService) Simple(ctx context.Context, inputs []DialogueInput) (io.Reader, error)
Simple generates dialogue audio from text-voice pairs with default settings.
type TextToSpeechService ¶
type TextToSpeechService struct {
// contains filtered or unexported fields
}
TextToSpeechService handles text-to-speech operations.
func (*TextToSpeechService) Generate ¶
func (s *TextToSpeechService) Generate(ctx context.Context, req *TTSRequest) (*TTSResponse, error)
Generate generates speech from text.
func (*TextToSpeechService) GenerateToWriter ¶
func (s *TextToSpeechService) GenerateToWriter(ctx context.Context, req *TTSRequest, w io.Writer) error
GenerateToWriter generates speech and writes it to a writer.
type TranscriptionRequest ¶
type TranscriptionRequest struct {
// FileURL is the HTTPS URL of the file to transcribe.
// Either FileURL or FileContent must be provided.
FileURL string
// FileContent is the base64-encoded file content.
// Either FileURL or FileContent must be provided.
FileContent string
// LanguageCode is an ISO-639-1 or ISO-639-3 language code.
// If not provided, language is auto-detected.
LanguageCode string
// Diarize enables speaker diarization (who said what).
Diarize bool
// NumSpeakers is the expected number of speakers (for diarization).
NumSpeakers int
// TagAudioEvents tags audio events like laughter, applause, etc.
TagAudioEvents bool
// ModelID is the transcription model to use (default: "scribe_v1").
ModelID string
}
TranscriptionRequest contains options for transcription.
type TranscriptionResponse ¶
type TranscriptionResponse struct {
// Text is the full transcribed text.
Text string
// LanguageCode is the detected language.
LanguageCode string
// Words contains word-level details with timestamps.
Words []TranscriptionWord
// Utterances contains speaker-labeled segments (when diarization is enabled).
Utterances []TranscriptionUtterance
}
TranscriptionResponse contains the transcription result.
type TranscriptionUtterance ¶
type TranscriptionUtterance struct {
// Text is the utterance text.
Text string
// Start is the start time in seconds.
Start float64
// End is the end time in seconds.
End float64
// Speaker is the speaker ID.
Speaker string
}
TranscriptionUtterance represents a speaker segment.
type TranscriptionWord ¶
type TranscriptionWord struct {
// Text is the word text.
Text string
// Start is the start time in seconds.
Start float64
// End is the end time in seconds.
End float64
// Confidence is the confidence score (0-1).
Confidence float64
// Speaker is the speaker ID (when diarization is enabled).
Speaker string
// Type is the word type (e.g., "word", "punctuation").
Type string
}
TranscriptionWord represents a single word with timing.
type TwilioOutboundCallRequest ¶
type TwilioOutboundCallRequest struct {
// AgentID is the ElevenLabs agent ID to handle the call.
AgentID string `json:"agent_id"`
// AgentPhoneNumberID is the ElevenLabs phone number ID to call from.
AgentPhoneNumberID string `json:"agent_phone_number_id"`
// ToNumber is the phone number to call (E.164 format).
ToNumber string `json:"to_number"`
// CustomLLMExtraBody is additional data to pass to the LLM.
CustomLLMExtraBody map[string]any `json:"custom_llm_extra_body,omitempty"`
// DynamicVariables are variables to inject into the agent prompt.
DynamicVariables map[string]string `json:"dynamic_variables,omitempty"`
// FirstMessage overrides the agent's default first message.
FirstMessage string `json:"first_message,omitempty"`
// SystemPrompt overrides the agent's system prompt.
SystemPrompt string `json:"system_prompt,omitempty"`
}
TwilioOutboundCallRequest is the request to make an outbound call via Twilio.
type TwilioOutboundCallResponse ¶
type TwilioOutboundCallResponse struct {
// CallSID is the Twilio call SID.
CallSID string `json:"call_sid"`
// ConversationID is the ElevenLabs conversation ID for this call.
ConversationID string `json:"conversation_id"`
// Status is the initial call status.
Status string `json:"status"`
}
TwilioOutboundCallResponse is the response from making an outbound call.
type TwilioRegisterCallRequest ¶
type TwilioRegisterCallRequest struct {
// AgentID is the ElevenLabs agent ID to handle the call.
AgentID string `json:"agent_id"`
// AgentPhoneNumberID is the ElevenLabs phone number ID (if using imported number).
AgentPhoneNumberID string `json:"agent_phone_number_id,omitempty"`
// CustomLLMExtraBody is additional data to pass to the LLM.
CustomLLMExtraBody map[string]any `json:"custom_llm_extra_body,omitempty"`
// DynamicVariables are variables to inject into the agent prompt.
DynamicVariables map[string]string `json:"dynamic_variables,omitempty"`
// FirstMessage overrides the agent's default first message.
FirstMessage string `json:"first_message,omitempty"`
// SystemPrompt overrides the agent's system prompt.
SystemPrompt string `json:"system_prompt,omitempty"`
}
TwilioRegisterCallRequest is the request to register an incoming Twilio call.
type TwilioRegisterCallResponse ¶
type TwilioRegisterCallResponse struct {
// TwiML is the TwiML response to return to Twilio.
TwiML string `json:"twiml"`
// ConversationID is the ElevenLabs conversation ID for this call.
ConversationID string `json:"conversation_id,omitempty"`
}
TwilioRegisterCallResponse is the response from registering a call.
type TwilioService ¶
type TwilioService struct {
// contains filtered or unexported fields
}
TwilioService handles Twilio phone integration for conversational AI.
func (*TwilioService) OutboundCall ¶
func (s *TwilioService) OutboundCall(ctx context.Context, req *TwilioOutboundCallRequest) (*TwilioOutboundCallResponse, error)
OutboundCall initiates an outbound call via Twilio.
func (*TwilioService) RegisterCall ¶
func (s *TwilioService) RegisterCall(ctx context.Context, req *TwilioRegisterCallRequest) (*TwilioRegisterCallResponse, error)
RegisterCall registers an incoming Twilio call with ElevenLabs. Returns TwiML that should be returned to Twilio's webhook.
func (*TwilioService) SIPOutboundCall ¶
func (s *TwilioService) SIPOutboundCall(ctx context.Context, req *SIPOutboundCallRequest) (*SIPOutboundCallResponse, error)
SIPOutboundCall initiates an outbound call via SIP trunk.
type UpdatePhoneNumberRequest ¶
type UpdatePhoneNumberRequest struct {
// Label is a descriptive label for the phone number.
Label string `json:"label,omitempty"`
// AgentID is the agent to associate with this phone number.
AgentID string `json:"agent_id,omitempty"`
}
UpdatePhoneNumberRequest is the request to update a phone number.
type UpdateProjectRequest ¶
type UpdateProjectRequest struct {
// Name is the new project name (required).
Name string
// DefaultParagraphVoiceID is the new default paragraph voice (required).
DefaultParagraphVoiceID string
// DefaultTitleVoiceID is the new default title voice (required).
DefaultTitleVoiceID string
// Author is an optional author name.
Author string
// Title is an optional title.
Title string
}
UpdateProjectRequest contains options for updating a project.
type User ¶
type User struct {
// UserID is the unique user identifier.
UserID string
// FirstName is the user's first name.
FirstName string
// Subscription contains the user's subscription details.
Subscription *Subscription
// CreatedAt is when the user was created.
CreatedAt time.Time
}
User represents an ElevenLabs user.
type UserService ¶
type UserService struct {
// contains filtered or unexported fields
}
UserService handles user and subscription operations.
func (*UserService) GetCharactersRemaining ¶
func (s *UserService) GetCharactersRemaining(ctx context.Context) (int, error)
GetCharactersRemaining returns the number of characters remaining in the current period.
func (*UserService) GetInfo ¶
func (s *UserService) GetInfo(ctx context.Context) (*User, error)
GetInfo returns the current user's information including subscription.
func (*UserService) GetSubscription ¶
func (s *UserService) GetSubscription(ctx context.Context) (*Subscription, error)
GetSubscription returns the current user's subscription details. This is a convenience method that calls GetInfo and returns just the subscription.
type ValidationError ¶
ValidationError represents a validation error.
func (*ValidationError) Error ¶
func (e *ValidationError) Error() string
Error implements the error interface.
type Voice ¶
type Voice struct {
// VoiceID is the unique identifier for the voice.
VoiceID string
// Name is the display name of the voice.
Name string
// Category is the category of the voice (e.g., "premade", "cloned").
Category string
// Description is the description of the voice.
Description string
// PreviewURL is the URL to preview the voice.
PreviewURL string
// Labels contains additional metadata about the voice.
Labels map[string]string
}
Voice represents an ElevenLabs voice.
type VoiceAccent ¶
type VoiceAccent string
VoiceAccent represents accent options for voice generation.
const ( VoiceAccentBritish VoiceAccent = "british" VoiceAccentAmerican VoiceAccent = "american" VoiceAccentAfrican VoiceAccent = "african" VoiceAccentAustralian VoiceAccent = "australian" VoiceAccentIndian VoiceAccent = "indian" )
type VoiceDesignRequest ¶
type VoiceDesignRequest struct {
// Gender of the voice (required).
Gender VoiceGender
// Age category of the voice (required).
Age VoiceAge
// Accent of the voice (required).
Accent VoiceAccent
// AccentStrength controls accent prominence (0.3 to 2.0).
AccentStrength float64
// Text for voice preview (100-1000 characters).
Text string
}
VoiceDesignRequest contains options for generating a random voice.
type VoiceDesignResponse ¶
type VoiceDesignResponse struct {
// Audio is the generated voice sample.
Audio io.Reader
// GeneratedVoiceID can be used to save this voice permanently.
GeneratedVoiceID string
}
VoiceDesignResponse contains the generated voice preview.
type VoiceDesignService ¶
type VoiceDesignService struct {
// contains filtered or unexported fields
}
VoiceDesignService handles AI voice generation and design.
func (*VoiceDesignService) GeneratePreview ¶
func (s *VoiceDesignService) GeneratePreview(ctx context.Context, req *VoiceDesignRequest) (*VoiceDesignResponse, error)
GeneratePreview creates a voice preview based on design parameters. Returns audio sample and a generated_voice_id that can be saved.
func (*VoiceDesignService) SaveVoice ¶
func (s *VoiceDesignService) SaveVoice(ctx context.Context, req *SaveVoiceRequest) (*Voice, error)
SaveVoice saves a previously generated voice to your voice library.
func (*VoiceDesignService) Simple ¶
func (s *VoiceDesignService) Simple(ctx context.Context, gender VoiceGender, age VoiceAge, accent VoiceAccent, previewText string) (*VoiceDesignResponse, error)
Simple generates a voice preview with common defaults.
type VoiceGender ¶
type VoiceGender string
VoiceGender represents the gender options for voice generation.
const ( VoiceGenderFemale VoiceGender = "female" VoiceGenderMale VoiceGender = "male" )
type VoiceSegment ¶
type VoiceSegment struct {
// VoiceID is the voice used for this segment.
VoiceID string
// StartTime is the start time in seconds.
StartTime float64
// EndTime is the end time in seconds.
EndTime float64
}
VoiceSegment represents a segment of audio for a specific voice.
type VoiceSettings ¶
type VoiceSettings struct {
// Stability determines how stable the voice is (0.0 to 1.0).
// Lower values introduce broader emotional range.
Stability float64
// SimilarityBoost determines how closely the AI should adhere to
// the original voice (0.0 to 1.0).
SimilarityBoost float64
// Style determines the style exaggeration (0.0 to 1.0).
// Higher values amplify the original speaker's style.
Style float64
// Speed adjusts the speed of the voice (0.25 to 4.0).
// 1.0 is the default speed.
Speed float64
// UseSpeakerBoost boosts similarity to the original speaker.
UseSpeakerBoost bool
}
VoiceSettings contains the voice configuration for text-to-speech.
func DefaultVoiceSettings ¶
func DefaultVoiceSettings() *VoiceSettings
DefaultVoiceSettings returns sensible default voice settings.
func VoiceSettingsForAudiobook ¶ added in v0.4.0
func VoiceSettingsForAudiobook() *VoiceSettings
VoiceSettingsForAudiobook returns settings tuned for audiobook narration. Clear, consistent, easy to listen to for extended periods.
func VoiceSettingsForCoursera ¶ added in v0.4.0
func VoiceSettingsForCoursera() *VoiceSettings
VoiceSettingsForCoursera returns settings tuned for Coursera courses. Slightly expressive, engaging for mixed media content.
func VoiceSettingsForEdX ¶ added in v0.4.0
func VoiceSettingsForEdX() *VoiceSettings
VoiceSettingsForEdX returns settings tuned for edX courses. Very stable, highly intelligible, slightly faster for dense academic content.
func VoiceSettingsForInstagram ¶ added in v0.4.0
func VoiceSettingsForInstagram() *VoiceSettings
VoiceSettingsForInstagram returns settings tuned for Instagram content. Energetic but polished, suitable for brand content.
func VoiceSettingsForPodcast ¶ added in v0.4.0
func VoiceSettingsForPodcast() *VoiceSettings
VoiceSettingsForPodcast returns settings tuned for podcast content. Natural conversational tone for long-form audio content.
func VoiceSettingsForTikTok ¶ added in v0.4.0
func VoiceSettingsForTikTok() *VoiceSettings
VoiceSettingsForTikTok returns settings tuned for TikTok content. Designed for immediate engagement in the first 1-3 seconds.
func VoiceSettingsForUdemy ¶ added in v0.4.0
func VoiceSettingsForUdemy() *VoiceSettings
VoiceSettingsForUdemy returns settings tuned for Udemy courses. Neutral, clear, consistent, safe for long lectures.
func VoiceSettingsForYouTube ¶ added in v0.4.0
func VoiceSettingsForYouTube() *VoiceSettings
VoiceSettingsForYouTube returns settings tuned for YouTube content. Designed to hold attention for 5-20 minutes without sounding robotic or theatrical.
func (*VoiceSettings) Validate ¶
func (vs *VoiceSettings) Validate() error
Validate validates the voice settings.
type VoicesService ¶
type VoicesService struct {
// contains filtered or unexported fields
}
VoicesService handles voice operations.
func (*VoicesService) Delete ¶
func (s *VoicesService) Delete(ctx context.Context, voiceID string) error
Delete deletes a voice by ID.
func (*VoicesService) GetDefaultSettings ¶
func (s *VoicesService) GetDefaultSettings(ctx context.Context) (*VoiceSettings, error)
GetDefaultSettings returns the default voice settings.
func (*VoicesService) GetSettings ¶
func (s *VoicesService) GetSettings(ctx context.Context, voiceID string) (*VoiceSettings, error)
GetSettings returns the settings for a voice.
type WebSocketSTTConnection ¶
type WebSocketSTTConnection struct {
// contains filtered or unexported fields
}
WebSocketSTTConnection represents an active WebSocket STT connection.
func (*WebSocketSTTConnection) Close ¶
func (wsc *WebSocketSTTConnection) Close() error
Close closes the WebSocket connection gracefully.
func (*WebSocketSTTConnection) Commit ¶ added in v0.7.0
func (wsc *WebSocketSTTConnection) Commit() error
Commit forces a commit of the current transcript segment. This sends an empty audio chunk with commit=true.
func (*WebSocketSTTConnection) Errors ¶
func (wsc *WebSocketSTTConnection) Errors() <-chan error
Errors returns a channel that receives errors from the connection.
func (*WebSocketSTTConnection) SendAudio ¶
func (wsc *WebSocketSTTConnection) SendAudio(audio []byte) error
SendAudio sends audio data for transcription. The audio should be in the format specified in WebSocketSTTOptions.AudioFormat.
func (*WebSocketSTTConnection) SendAudioWithCommit ¶ added in v0.7.0
func (wsc *WebSocketSTTConnection) SendAudioWithCommit(audio []byte, commit bool) error
SendAudioWithCommit sends audio data and optionally commits the transcript. When commit is true, the server will finalize the current transcript segment. This is useful for manual commit strategy.
func (*WebSocketSTTConnection) SessionID ¶ added in v0.7.0
func (wsc *WebSocketSTTConnection) SessionID() string
SessionID returns the session ID assigned by the server. This is available after the connection is established.
func (*WebSocketSTTConnection) StreamAudio ¶
func (wsc *WebSocketSTTConnection) StreamAudio(ctx context.Context, audioStream <-chan []byte) (<-chan *STTTranscript, <-chan error)
StreamAudio is a convenience method that streams audio from a channel. It handles committing automatically when the input channel closes.
func (*WebSocketSTTConnection) Transcripts ¶
func (wsc *WebSocketSTTConnection) Transcripts() <-chan *STTTranscript
Transcripts returns a channel that receives transcription results.
type WebSocketSTTOptions ¶
type WebSocketSTTOptions struct {
// ModelID is the transcription model to use.
// Default: "scribe_v2_realtime"
ModelID string
// AudioFormat specifies the audio encoding format.
// Options: "pcm_8000", "pcm_16000", "pcm_22050", "pcm_24000", "pcm_44100", "pcm_48000", "ulaw_8000"
// Default: "pcm_16000"
AudioFormat string
// LanguageCode is the expected language (e.g., "en", "es").
// If not specified, language will be auto-detected.
LanguageCode string
// IncludeTimestamps enables word-level timing information.
IncludeTimestamps bool
// IncludeLanguageDetection includes detected language in responses.
IncludeLanguageDetection bool
// CommitStrategy determines how transcripts are committed.
// Options: "manual" (default), "vad" (voice activity detection)
CommitStrategy string
// VADSilenceThresholdSecs is the silence duration to trigger commit in VAD mode.
// Default: 1.5
VADSilenceThresholdSecs float64
// VADThreshold is the VAD sensitivity threshold.
// Default: 0.4
VADThreshold float64
// MinSpeechDurationMs is the minimum speech duration in milliseconds.
// Default: 100
MinSpeechDurationMs int
// MinSilenceDurationMs is the minimum silence duration in milliseconds.
// Default: 100
MinSilenceDurationMs int
}
WebSocketSTTOptions configures the WebSocket STT connection.
func DefaultWebSocketSTTOptions ¶
func DefaultWebSocketSTTOptions() *WebSocketSTTOptions
DefaultWebSocketSTTOptions returns default options for real-time STT.
type WebSocketSTTService ¶
type WebSocketSTTService struct {
// contains filtered or unexported fields
}
WebSocketSTTService handles real-time speech-to-text via WebSocket.
func (*WebSocketSTTService) Connect ¶
func (s *WebSocketSTTService) Connect(ctx context.Context, opts *WebSocketSTTOptions) (*WebSocketSTTConnection, error)
Connect establishes a WebSocket connection for real-time STT.
type WebSocketTTSConnection ¶
type WebSocketTTSConnection struct {
// contains filtered or unexported fields
}
WebSocketTTSConnection represents an active WebSocket TTS connection.
func (*WebSocketTTSConnection) Alignments ¶
func (wsc *WebSocketTTSConnection) Alignments() <-chan *TTSAlignment
Alignments returns a channel that receives word alignment information.
func (*WebSocketTTSConnection) Audio ¶
func (wsc *WebSocketTTSConnection) Audio() <-chan []byte
Audio returns a channel that receives audio chunks as they are generated.
func (*WebSocketTTSConnection) Close ¶
func (wsc *WebSocketTTSConnection) Close() error
Close closes the WebSocket connection gracefully.
func (*WebSocketTTSConnection) Done ¶ added in v0.7.0
func (wsc *WebSocketTTSConnection) Done() <-chan struct{}
Done returns a channel that is closed when all audio has been received after Flush(). Use this to wait for completion before closing the connection.
func (*WebSocketTTSConnection) Errors ¶
func (wsc *WebSocketTTSConnection) Errors() <-chan error
Errors returns a channel that receives errors from the connection.
func (*WebSocketTTSConnection) Flush ¶
func (wsc *WebSocketTTSConnection) Flush() error
Flush signals that no more text will be sent and flushes remaining audio. This should be called when the text stream is complete. After calling Flush, use Done() to wait for all audio to be received.
func (*WebSocketTTSConnection) SendText ¶
func (wsc *WebSocketTTSConnection) SendText(text string) error
SendText sends text to be converted to speech. The text can be sent in chunks as it becomes available (e.g., from an LLM stream).
func (*WebSocketTTSConnection) SendTextWithContext ¶
func (wsc *WebSocketTTSConnection) SendTextWithContext(text, contextID string) error
SendTextWithContext sends text with a specific context ID for multi-context sessions.
func (*WebSocketTTSConnection) StreamText ¶
func (wsc *WebSocketTTSConnection) StreamText(ctx context.Context, textStream <-chan string) (<-chan []byte, <-chan error)
StreamText is a convenience method that sends all text from a channel and returns audio. It handles flushing automatically when the input channel closes.
func (*WebSocketTTSConnection) TriggerGeneration ¶
func (wsc *WebSocketTTSConnection) TriggerGeneration() error
TriggerGeneration forces audio generation for buffered text.
type WebSocketTTSOptions ¶
type WebSocketTTSOptions struct {
// ModelID is the model to use. Defaults to "eleven_turbo_v2_5" for low latency.
ModelID string
// OutputFormat specifies the audio output format.
// Recommended for real-time: "pcm_16000", "pcm_22050", "pcm_24000", "pcm_44100"
// Also supports: "mp3_44100_64", "mp3_44100_96", "mp3_44100_128", "mp3_44100_192"
OutputFormat string
// VoiceSettings configures the voice parameters.
VoiceSettings *VoiceSettings
// OptimizeStreamingLatency reduces latency at the cost of quality (0-4).
// 0 = no optimization, 4 = maximum optimization.
OptimizeStreamingLatency int
// EnableSSMLParsing enables SSML parsing for the input text.
EnableSSMLParsing bool
// LanguageCode is the ISO language code (e.g., "en", "es").
LanguageCode string
// ChunkLengthSchedule controls text chunking for audio generation.
// Array of integers representing character counts before generating audio.
ChunkLengthSchedule []int
// InactivityTimeout is the context timeout in seconds (default 20).
InactivityTimeout int
// PronunciationDictionaryIDs is a list of pronunciation dictionary IDs to use.
PronunciationDictionaryIDs []string
}
WebSocketTTSOptions configures the WebSocket TTS connection.
func DefaultWebSocketTTSOptions ¶
func DefaultWebSocketTTSOptions() *WebSocketTTSOptions
DefaultWebSocketTTSOptions returns default options optimized for low latency.
type WebSocketTTSService ¶
type WebSocketTTSService struct {
// contains filtered or unexported fields
}
WebSocketTTSService handles real-time text-to-speech via WebSocket.
Stream Completion Behavior ¶
ElevenLabs WebSocket TTS does not send an explicit "end of stream" signal. After calling Flush(), the server generates remaining audio and then waits for more input. If no input arrives within the inactivity timeout (default 20 seconds), the server sends an "input_timeout_exceeded" error and closes the connection.
For applications that need faster stream completion detection, set a shorter InactivityTimeout in WebSocketTTSOptions (e.g., 5 seconds) and treat the timeout as a successful completion if audio was received after flush.
func (*WebSocketTTSService) Connect ¶
func (s *WebSocketTTSService) Connect(ctx context.Context, voiceID string, opts *WebSocketTTSOptions) (*WebSocketTTSConnection, error)
Connect establishes a WebSocket connection for real-time TTS.
Source Files
¶
- audio.go
- audioisolation.go
- client.go
- dubbing.go
- errors.go
- forcedalignment.go
- history.go
- models.go
- music.go
- projects.go
- pronunciation.go
- pronunciation_rules.go
- soundeffects.go
- speechtospeech.go
- speechtotext.go
- texttodialogue.go
- texttospeech.go
- twilio.go
- user.go
- voicedesign.go
- voices.go
- voicesettings.go
- websocketstt.go
- websockettts.go
Directories
¶
| Path | Synopsis |
|---|---|
|
cmd
|
|
|
openapi-convert
command
Command openapi-convert converts OpenAPI 3.1 specs to 3.0.3 for ogen compatibility.
|
Command openapi-convert converts OpenAPI 3.1 specs to 3.0.3 for ogen compatibility. |
|
ttsscript
command
Command ttsscript generates TTS audio from a JSON script file using ElevenLabs.
|
Command ttsscript generates TTS audio from a JSON script file using ElevenLabs. |
|
examples
|
|
|
basic
command
Example basic shows how to use the ElevenLabs SDK for common operations.
|
Example basic shows how to use the ElevenLabs SDK for common operations. |
|
retryhttp
command
Example demonstrating retry middleware with ElevenLabs client.
|
Example demonstrating retry middleware with ElevenLabs client. |
|
simple
command
|
|
|
speech-to-speech
command
Example: Speech-to-Speech - Voice conversion
|
Example: Speech-to-Speech - Voice conversion |
|
ttsscript
command
Example: Using ttsscript to generate multilingual TTS audio
|
Example: Using ttsscript to generate multilingual TTS audio |
|
twilio
command
Example: Twilio Integration - Phone call handling
|
Example: Twilio Integration - Phone call handling |
|
websocket-stt
command
Example: WebSocket STT - Real-time speech-to-text streaming
|
Example: WebSocket STT - Real-time speech-to-text streaming |
|
websocket-tts
command
Example: WebSocket TTS - Real-time text-to-speech streaming
|
Example: WebSocket TTS - Real-time text-to-speech streaming |
|
internal
|
|
|
api
Code generated by ogen, DO NOT EDIT.
|
Code generated by ogen, DO NOT EDIT. |
|
Package omnivoice provides OmniVoice provider implementations using the ElevenLabs API.
|
Package omnivoice provides OmniVoice provider implementations using the ElevenLabs API. |
|
agent
Package agent provides an OmniVoice Agent provider implementation using ElevenLabs.
|
Package agent provides an OmniVoice Agent provider implementation using ElevenLabs. |
|
stt
Package stt provides an OmniVoice STT provider implementation using ElevenLabs.
|
Package stt provides an OmniVoice STT provider implementation using ElevenLabs. |
|
tts
Package tts provides an OmniVoice TTS provider implementation using ElevenLabs.
|
Package tts provides an OmniVoice TTS provider implementation using ElevenLabs. |
|
Package ttsscript provides a structured format for authoring multilingual TTS (Text-to-Speech) scripts that can be compiled to various output formats.
|
Package ttsscript provides a structured format for authoring multilingual TTS (Text-to-Speech) scripts that can be compiled to various output formats. |
|
Package voices provides reference information for ElevenLabs voices.
|
Package voices provides reference information for ElevenLabs voices. |