Documentation
¶
Index ¶
- Constants
- type ASRChunk
- type ASRServiceV2
- type ASRV2Config
- type ASRV2Result
- type ASRV2Session
- type ASRV2Utterance
- type ASRV2Word
- type AudioFormat
- type Client
- type Error
- type Language
- type MultipartFile
- type Option
- func WithAPIKey(apiKey string) Option
- func WithBaseURL(url string) Option
- func WithBearerToken(token string) Option
- func WithCluster(cluster string) Option
- func WithHTTPClient(client *http.Client) Option
- func WithHTTPTransport(doer transport.HTTPDoer) Option
- func WithRealtimeAPIKey(accessKey, appKey string) Option
- func WithResourceID(resourceID string) Option
- func WithTimeout(timeout time.Duration) Option
- func WithUserID(userID string) Option
- func WithV2APIKey(accessKey, appKey string) Option
- func WithWebSocketURL(url string) Option
- type RealtimeASRConfig
- type RealtimeAudioConfig
- type RealtimeConfig
- type RealtimeConnection
- type RealtimeConversationMessage
- type RealtimeDialogConfig
- type RealtimeEvent
- type RealtimeEventType
- type RealtimeGenerationProps
- type RealtimePromptConfig
- type RealtimeService
- type RealtimeSession
- func (s *RealtimeSession) Close() error
- func (s *RealtimeSession) Interrupt(ctx context.Context) error
- func (s *RealtimeSession) Recv() iter.Seq2[*RealtimeEvent, error]
- func (s *RealtimeSession) RecvEvent(ctx context.Context) (*RealtimeEvent, error)
- func (s *RealtimeSession) ReplaceHistory(index int, message RealtimeConversationMessage) error
- func (s *RealtimeSession) SayHello(ctx context.Context, content string) error
- func (s *RealtimeSession) SendAudio(ctx context.Context, audio []byte) error
- func (s *RealtimeSession) SendTTSText(ctx context.Context, text string) error
- func (s *RealtimeSession) SendText(ctx context.Context, text string) error
- func (s *RealtimeSession) SendUserMessage(ctx context.Context, text string) error
- func (s *RealtimeSession) SessionID() string
- func (s *RealtimeSession) UpdateHistory(history []RealtimeConversationMessage)
- func (s *RealtimeSession) UpdatePrompt(prompt RealtimePromptConfig)
- func (s *RealtimeSession) UpdateProps(props RealtimeGenerationProps)
- type RealtimeTTSConfig
- type SampleRate
- type StreamASRConfig
- type TTSServiceV2
- type TTSV2Chunk
- type TTSV2MixSpeaker
- type TTSV2MixSpeakerSource
- type TTSV2Request
- type TTSV2WSChunk
- type TTSV2WSConfig
- type TTSV2WSSession
- func (s *TTSV2WSSession) CancelSession(ctx context.Context) error
- func (s *TTSV2WSSession) Close() error
- func (s *TTSV2WSSession) Recv() iter.Seq2[*TTSV2WSChunk, error]
- func (s *TTSV2WSSession) SendText(ctx context.Context, text string, isLast bool) error
- func (s *TTSV2WSSession) StartNextSession(ctx context.Context) error
- type Task
- type TaskFailureMapper
- type TaskPoller
- type TaskStatus
- type TaskStatusMapper
- type Utterance
- type VoiceCloneRequest
- type VoiceCloneService
- func (s *VoiceCloneService) Activate(ctx context.Context, voiceID string) error
- func (s *VoiceCloneService) GetStatus(ctx context.Context, speakerOrVoiceID string) (*VoiceCloneStatus, error)
- func (s *VoiceCloneService) Submit(ctx context.Context, req *VoiceCloneRequest) (*Task[VoiceCloneStatus], error)
- func (s *VoiceCloneService) Upload(ctx context.Context, req *VoiceCloneRequest) (*Task[VoiceCloneStatus], error)
- type VoiceCloneStatus
- type Word
Constants ¶
const ( AppKeyRealtime = "PlgvMymc7f3tQnJ6" AppKeyPodcast = "aGjiRDfUWi" )
V2/V3 fixed app keys (official constants, not user credentials).
const ( ResourceTTSV1 = "seed-tts-1.0" ResourceTTSV1Concurr = "seed-tts-1.0-concurr" ResourceTTSV2 = "seed-tts-2.0" ResourceTTSV2Concurr = "seed-tts-2.0-concurr" ResourceVoiceCloneV1 = "seed-icl-1.0" ResourceVoiceCloneV2 = "seed-icl-2.0" ResourceASRStream = "volc.bigasr.sauc.duration" ResourceASRStreamV2 = "volc.seedasr.sauc.duration" ResourceASRFile = "volc.bigasr.auc.duration" ResourceRealtime = "volc.speech.dialog" ResourcePodcast = "volc.service_type.10050" ResourceTranslation = "volc.megatts.simt" )
V2/V3 Resource IDs.
const ( CodeSuccess = 3000 CodeParamError = 3001 CodeAuthError = 3002 CodeRateLimit = 3003 CodeQuotaExceed = 3004 CodeServerError = 3005 CodeASRSuccess = 1000 )
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type ASRChunk ¶
type ASRChunk = ASRV2Result
type ASRServiceV2 ¶
type ASRServiceV2 struct {
// contains filtered or unexported fields
}
ASRServiceV2 provides SAUC WebSocket streaming recognition.
func (*ASRServiceV2) OpenStreamSession ¶
func (s *ASRServiceV2) OpenStreamSession(ctx context.Context, cfg *ASRV2Config) (*ASRV2Session, error)
OpenStreamSession opens a SAUC V2 WebSocket session.
type ASRV2Config ¶
type ASRV2Config struct {
Format AudioFormat `json:"format" yaml:"format"`
SampleRate SampleRate `json:"sample_rate" yaml:"sample_rate"`
Channel int `json:"channel,omitempty" yaml:"channel,omitempty"`
Channels int `json:"channels,omitempty" yaml:"channels,omitempty"` // Backward-compatible alias field.
Bits int `json:"bits,omitempty" yaml:"bits,omitempty"`
Language Language `json:"language,omitempty" yaml:"language,omitempty"`
EnableITN bool `json:"enable_itn,omitempty" yaml:"enable_itn,omitempty"`
EnablePunc bool `json:"enable_punc,omitempty" yaml:"enable_punc,omitempty"`
EnableDiarization bool `json:"enable_diarization,omitempty" yaml:"enable_diarization,omitempty"`
SpeakerNum int `json:"speaker_num,omitempty" yaml:"speaker_num,omitempty"`
Hotwords []string `json:"hotwords,omitempty" yaml:"hotwords,omitempty"`
ResultType string `json:"result_type,omitempty" yaml:"result_type,omitempty"` // single/full
ResourceID string `json:"resource_id,omitempty" yaml:"resource_id,omitempty"`
}
ASRV2Config is SAUC V2 streaming session config.
type ASRV2Result ¶
type ASRV2Result struct {
Text string `json:"text"`
Utterances []ASRV2Utterance `json:"utterances,omitempty"`
IsFinal bool `json:"is_final"`
Duration int `json:"duration,omitempty"`
ReqID string `json:"reqid,omitempty"`
TraceID string `json:"trace_id,omitempty"`
LogID string `json:"log_id,omitempty"`
ConnectID string `json:"connect_id,omitempty"`
}
ASRV2Result is one parsed server response.
type ASRV2Session ¶
type ASRV2Session struct {
// contains filtered or unexported fields
}
ASRV2Session represents one streaming recognition session.
func (*ASRV2Session) Recv ¶
func (s *ASRV2Session) Recv() iter.Seq2[*ASRV2Result, error]
Recv yields recognition results as a stream.
type ASRV2Utterance ¶
type ASRV2Utterance struct {
Text string `json:"text"`
StartTime int `json:"start_time"`
EndTime int `json:"end_time"`
Definite bool `json:"definite"`
SpeakerID string `json:"speaker_id,omitempty"`
Words []ASRV2Word `json:"words,omitempty"`
Confidence float64 `json:"confidence,omitempty"`
}
ASRV2Utterance contains utterance-level info.
type ASRV2Word ¶
type ASRV2Word struct {
Text string `json:"text"`
StartTime int `json:"start_time"`
EndTime int `json:"end_time"`
Conf float64 `json:"conf,omitempty"`
}
ASRV2Word contains word-level timing info.
type AudioFormat ¶
type AudioFormat string
AudioFormat represents audio encoding format.
const ( FormatPCM AudioFormat = "pcm" FormatWAV AudioFormat = "wav" FormatMP3 AudioFormat = "mp3" FormatOGG AudioFormat = "ogg_opus" FormatAAC AudioFormat = "aac" FormatM4A AudioFormat = "m4a" )
type Client ¶
type Client struct {
// ASR V2 streaming recognition.
ASR *ASRServiceV2
ASRV2 *ASRServiceV2
// Voice cloning.
VoiceClone *VoiceCloneService
// Realtime dialogue.
Realtime *RealtimeService
// TTS V2 WebSocket synthesis.
TTS *TTSServiceV2
TTSV2 *TTSServiceV2
// contains filtered or unexported fields
}
Client is the SDK entry point.
In this migration stage, ASR V2, TTS V2 WS, Voice Clone, and Realtime are implemented.
type Error ¶
type Error struct {
Code int `json:"code"`
Message string `json:"message"`
TraceID string `json:"trace_id,omitempty"`
LogID string `json:"log_id,omitempty"`
ConnectID string `json:"connect_id,omitempty"`
HTTPStatus int `json:"-"`
ReqID string `json:"reqid,omitempty"`
}
Error is the unified error model.
func (*Error) IsAuthError ¶
func (*Error) IsInvalidParam ¶
func (*Error) IsQuotaExceeded ¶
func (*Error) IsRateLimit ¶
func (*Error) IsServerError ¶
type MultipartFile ¶
MultipartFile is one file part in multipart/form-data payload.
type Option ¶
type Option func(*clientConfig)
Option configures Client.
func WithBearerToken ¶
WithBearerToken sets Bearer token. V1 header format is `Authorization: Bearer;{token}` (historical convention).
func WithCluster ¶
WithCluster sets the V1 cluster (kept for backward compatibility).
func WithHTTPClient ¶
WithHTTPClient sets a custom HTTP client.
func WithHTTPTransport ¶
WithHTTPTransport sets a custom HTTP transport doer.
func WithRealtimeAPIKey ¶
WithRealtimeAPIKey is a compatibility alias.
func WithResourceID ¶
WithResourceID sets the default resource_id.
func WithV2APIKey ¶
WithV2APIKey sets V2/V3 authentication.
func WithWebSocketURL ¶
WithWebSocketURL sets the WebSocket base URL.
type RealtimeASRConfig ¶
type RealtimeASRConfig struct {
Language Language `json:"language,omitempty" yaml:"language,omitempty"`
Extra map[string]any `json:"extra,omitempty" yaml:"extra,omitempty"`
}
RealtimeASRConfig configures ASR behavior.
type RealtimeAudioConfig ¶
type RealtimeAudioConfig struct {
Channel int `json:"channel" yaml:"channel"`
Format AudioFormat `json:"format" yaml:"format"`
SampleRate SampleRate `json:"sample_rate" yaml:"sample_rate"`
Bits int `json:"bits,omitempty" yaml:"bits,omitempty"`
}
RealtimeAudioConfig describes audio IO parameters.
type RealtimeConfig ¶
type RealtimeConfig struct {
ASR RealtimeASRConfig `json:"asr" yaml:"asr"`
TTS RealtimeTTSConfig `json:"tts" yaml:"tts"`
Dialog RealtimeDialogConfig `json:"dialog" yaml:"dialog"`
Prompt RealtimePromptConfig `json:"prompt" yaml:"prompt,omitempty"`
Props RealtimeGenerationProps `json:"props" yaml:"props,omitempty"`
History []RealtimeConversationMessage `json:"history,omitempty" yaml:"history,omitempty"`
ResourceID string `json:"resource_id,omitempty" yaml:"resource_id,omitempty"`
// Local runtime controls (not sent to server).
EventBuffer int `json:"-" yaml:"-"`
BackpressureTimeout time.Duration `json:"-" yaml:"-"`
}
RealtimeConfig represents one realtime session config.
func DefaultRealtimeConfig ¶
func DefaultRealtimeConfig() RealtimeConfig
DefaultRealtimeConfig returns a baseline realtime config.
type RealtimeConnection ¶
type RealtimeConnection struct {
// contains filtered or unexported fields
}
RealtimeConnection represents an established realtime websocket connection.
func (*RealtimeConnection) Close ¶
func (c *RealtimeConnection) Close() error
Close closes websocket connection.
func (*RealtimeConnection) StartSession ¶
func (c *RealtimeConnection) StartSession(ctx context.Context, cfg *RealtimeConfig) (*RealtimeSession, error)
StartSession starts one realtime session on current connection.
type RealtimeConversationMessage ¶
type RealtimeConversationMessage struct {
Role string `json:"role" yaml:"role"`
Content string `json:"content" yaml:"content"`
}
RealtimeConversationMessage is one dialog history entry.
type RealtimeDialogConfig ¶
type RealtimeDialogConfig struct {
BotName string `json:"bot_name,omitempty" yaml:"bot_name,omitempty"`
SystemRole string `json:"system_role,omitempty" yaml:"system_role,omitempty"`
SpeakingStyle string `json:"speaking_style,omitempty" yaml:"speaking_style,omitempty"`
CharacterManifest string `json:"character_manifest,omitempty" yaml:"character_manifest,omitempty"`
Extra map[string]any `json:"extra,omitempty" yaml:"extra,omitempty"`
}
RealtimeDialogConfig configures dialogue behavior.
type RealtimeEvent ¶
type RealtimeEvent struct {
Type RealtimeEventType `json:"type"`
SessionID string `json:"session_id,omitempty"`
ConnectID string `json:"connect_id,omitempty"`
Sequence int32 `json:"sequence,omitempty"`
Text string `json:"text,omitempty"`
Audio []byte `json:"audio,omitempty"`
Payload []byte `json:"payload,omitempty"`
Error *Error `json:"error,omitempty"`
IsFinal bool `json:"is_final,omitempty"`
ReqID string `json:"reqid,omitempty"`
TraceID string `json:"trace_id,omitempty"`
LogID string `json:"log_id,omitempty"`
}
RealtimeEvent is one parsed server event.
type RealtimeEventType ¶
type RealtimeEventType int32
RealtimeEventType represents realtime websocket event ID.
const ( // Connection events. EventConnectionStarted RealtimeEventType = 50 EventConnectionFailed RealtimeEventType = 51 EventConnectionEnded RealtimeEventType = 52 // Session events. EventSessionStarted RealtimeEventType = 150 EventSessionFinished RealtimeEventType = 152 EventSessionFailed RealtimeEventType = 153 EventUsageResponse RealtimeEventType = 154 // ASR events. EventASRInfo RealtimeEventType = 450 EventASRResponse RealtimeEventType = 451 EventASREnded RealtimeEventType = 459 // TTS events. EventTTSStarted RealtimeEventType = 350 EventTTSSegmentEnd RealtimeEventType = 351 EventTTSAudioData RealtimeEventType = 352 EventTTSFinished RealtimeEventType = 359 // Chat events. EventChatResponse RealtimeEventType = 550 EventChatEnded RealtimeEventType = 559 )
type RealtimeGenerationProps ¶
type RealtimeGenerationProps struct {
Temperature float64 `json:"temperature,omitempty" yaml:"temperature,omitempty"`
TopP float64 `json:"top_p,omitempty" yaml:"top_p,omitempty"`
MaxTokens int `json:"max_tokens,omitempty" yaml:"max_tokens,omitempty"`
PresencePenalty float64 `json:"presence_penalty,omitempty" yaml:"presence_penalty,omitempty"`
FrequencyPenalty float64 `json:"frequency_penalty,omitempty" yaml:"frequency_penalty,omitempty"`
Extra map[string]any `json:"extra,omitempty" yaml:"extra,omitempty"`
}
RealtimeGenerationProps controls generation params.
type RealtimePromptConfig ¶
type RealtimePromptConfig struct {
System string `json:"system,omitempty" yaml:"system,omitempty"`
Variables map[string]string `json:"variables,omitempty" yaml:"variables,omitempty"`
}
RealtimePromptConfig controls prompt and prompt variables.
type RealtimeService ¶
type RealtimeService struct {
// contains filtered or unexported fields
}
RealtimeService provides real-time dialogue operations.
func (*RealtimeService) Connect ¶
func (s *RealtimeService) Connect(ctx context.Context, cfg *RealtimeConfig) (*RealtimeSession, error)
Connect is a convenience method for Dial + StartSession.
func (*RealtimeService) Dial ¶
func (s *RealtimeService) Dial(ctx context.Context) (*RealtimeConnection, error)
Dial opens a realtime websocket connection and completes StartConnection handshake.
func (*RealtimeService) OpenSession ¶
func (s *RealtimeService) OpenSession(ctx context.Context, cfg *RealtimeConfig) (*RealtimeSession, error)
OpenSession is a compatibility alias for Connect.
type RealtimeSession ¶
type RealtimeSession struct {
// contains filtered or unexported fields
}
RealtimeSession represents one realtime dialogue session.
func (*RealtimeSession) Close ¶
func (s *RealtimeSession) Close() error
Close closes current session. It is idempotent.
func (*RealtimeSession) Interrupt ¶
func (s *RealtimeSession) Interrupt(ctx context.Context) error
Interrupt interrupts current generation (event=102).
func (*RealtimeSession) Recv ¶
func (s *RealtimeSession) Recv() iter.Seq2[*RealtimeEvent, error]
Recv returns a streaming iterator. Concurrent Recv is not supported.
func (*RealtimeSession) RecvEvent ¶
func (s *RealtimeSession) RecvEvent(ctx context.Context) (*RealtimeEvent, error)
RecvEvent receives one event. Concurrent Recv/RecvEvent is not supported.
func (*RealtimeSession) ReplaceHistory ¶
func (s *RealtimeSession) ReplaceHistory(index int, message RealtimeConversationMessage) error
ReplaceHistory replaces one item in local history by index.
func (*RealtimeSession) SayHello ¶
func (s *RealtimeSession) SayHello(ctx context.Context, content string) error
SayHello sends SayHello event (event=300).
func (*RealtimeSession) SendAudio ¶
func (s *RealtimeSession) SendAudio(ctx context.Context, audio []byte) error
SendAudio sends one audio chunk (event=200).
func (*RealtimeSession) SendTTSText ¶
func (s *RealtimeSession) SendTTSText(ctx context.Context, text string) error
SendTTSText sends incremental TTS text (event=500).
func (*RealtimeSession) SendText ¶
func (s *RealtimeSession) SendText(ctx context.Context, text string) error
SendText sends user text (event=501).
func (*RealtimeSession) SendUserMessage ¶
func (s *RealtimeSession) SendUserMessage(ctx context.Context, text string) error
SendUserMessage sends one user text with current history/prompt/props snapshot.
func (*RealtimeSession) SessionID ¶
func (s *RealtimeSession) SessionID() string
SessionID returns current session ID.
func (*RealtimeSession) UpdateHistory ¶
func (s *RealtimeSession) UpdateHistory(history []RealtimeConversationMessage)
UpdateHistory replaces the whole local history snapshot used by future turns.
func (*RealtimeSession) UpdatePrompt ¶
func (s *RealtimeSession) UpdatePrompt(prompt RealtimePromptConfig)
UpdatePrompt replaces current prompt config used by future turns.
func (*RealtimeSession) UpdateProps ¶
func (s *RealtimeSession) UpdateProps(props RealtimeGenerationProps)
UpdateProps replaces current generation props used by future turns.
type RealtimeTTSConfig ¶
type RealtimeTTSConfig struct {
Speaker string `json:"speaker" yaml:"speaker"`
AudioConfig RealtimeAudioConfig `json:"audio_config" yaml:"audio_config"`
Extra map[string]any `json:"extra,omitempty" yaml:"extra,omitempty"`
}
RealtimeTTSConfig configures TTS behavior.
type SampleRate ¶
type SampleRate int
SampleRate represents audio sample rate.
const ( SampleRate8000 SampleRate = 8000 SampleRate16000 SampleRate = 16000 SampleRate22050 SampleRate = 22050 SampleRate24000 SampleRate = 24000 SampleRate32000 SampleRate = 32000 SampleRate44100 SampleRate = 44100 SampleRate48000 SampleRate = 48000 )
type StreamASRConfig ¶
type StreamASRConfig = ASRV2Config
Backward-compatible aliases mapped to V2 types.
type TTSServiceV2 ¶
type TTSServiceV2 struct {
// contains filtered or unexported fields
}
TTSServiceV2 provides TTS V2 WebSocket streaming synthesis.
func (*TTSServiceV2) OpenStreamSession ¶
func (s *TTSServiceV2) OpenStreamSession(ctx context.Context, cfg *TTSV2WSConfig) (*TTSV2WSSession, error)
OpenStreamSession opens a TTS V2 bidirectional WebSocket stream session.
func (*TTSServiceV2) Stream ¶
func (s *TTSServiceV2) Stream(ctx context.Context, req *TTSV2Request) iter.Seq2[*TTSV2Chunk, error]
Stream synthesizes speech with TTS V2 HTTP streaming endpoint.
type TTSV2Chunk ¶
type TTSV2Chunk struct {
Audio []byte `json:"-"`
IsLast bool `json:"is_last"`
ReqID string `json:"reqid,omitempty"`
TraceID string `json:"trace_id,omitempty"`
LogID string `json:"log_id,omitempty"`
Code int `json:"code"`
Message string `json:"message,omitempty"`
}
TTSV2Chunk is one stream chunk from TTS V2 HTTP streaming API.
type TTSV2MixSpeaker ¶
type TTSV2MixSpeaker struct {
Speakers []TTSV2MixSpeakerSource `json:"speakers,omitempty" yaml:"speakers,omitempty"`
}
TTSV2MixSpeaker represents mixed-speaker parameters.
type TTSV2MixSpeakerSource ¶
type TTSV2MixSpeakerSource struct {
SourceSpeaker string `json:"source_speaker" yaml:"source_speaker"`
MixFactor float64 `json:"mix_factor" yaml:"mix_factor"`
}
TTSV2MixSpeakerSource is one source speaker in a mixed-speaker request.
type TTSV2Request ¶
type TTSV2Request struct {
Text string `json:"text" yaml:"text"`
Speaker string `json:"speaker" yaml:"speaker"`
Format AudioFormat `json:"format,omitempty" yaml:"format,omitempty"`
SampleRate SampleRate `json:"sample_rate,omitempty" yaml:"sample_rate,omitempty"`
BitRate int `json:"bit_rate,omitempty" yaml:"bit_rate,omitempty"`
SpeechRate int `json:"speech_rate,omitempty" yaml:"speech_rate,omitempty"`
PitchRate int `json:"pitch_rate,omitempty" yaml:"pitch_rate,omitempty"`
VolumeRate int `json:"volume_rate,omitempty" yaml:"volume_rate,omitempty"`
Emotion string `json:"emotion,omitempty" yaml:"emotion,omitempty"`
Language string `json:"language,omitempty" yaml:"language,omitempty"`
ResourceID string `json:"resource_id,omitempty" yaml:"resource_id,omitempty"`
MixSpeaker *TTSV2MixSpeaker `json:"mix_speaker,omitempty" yaml:"mix_speaker,omitempty"`
}
TTSV2Request represents a TTS V2 stream request.
type TTSV2WSChunk ¶
type TTSV2WSChunk struct {
Audio []byte `json:"-"`
IsFinal bool `json:"is_final"`
Event int32 `json:"event"`
ReqID string `json:"reqid,omitempty"`
TraceID string `json:"trace_id,omitempty"`
LogID string `json:"log_id,omitempty"`
ConnectID string `json:"connect_id,omitempty"`
}
TTSV2WSChunk is one downstream chunk from TTS V2 WebSocket stream.
type TTSV2WSConfig ¶
type TTSV2WSConfig struct {
Speaker string `json:"speaker" yaml:"speaker"`
Format AudioFormat `json:"format,omitempty" yaml:"format,omitempty"`
SampleRate SampleRate `json:"sample_rate,omitempty" yaml:"sample_rate,omitempty"`
// ResourceID defaults to seed-tts-2.0 when empty.
ResourceID string `json:"resource_id,omitempty" yaml:"resource_id,omitempty"`
}
TTSV2WSConfig is bidirectional TTS V2 WebSocket session config.
type TTSV2WSSession ¶
type TTSV2WSSession struct {
// contains filtered or unexported fields
}
TTSV2WSSession represents one bidirectional TTS V2 WebSocket session.
func (*TTSV2WSSession) CancelSession ¶
func (s *TTSV2WSSession) CancelSession(ctx context.Context) error
CancelSession cancels the current session.
func (*TTSV2WSSession) Recv ¶
func (s *TTSV2WSSession) Recv() iter.Seq2[*TTSV2WSChunk, error]
Recv yields TTS output chunks.
func (*TTSV2WSSession) SendText ¶
SendText sends one text piece. When isLast=true, it also sends FinishSession.
func (*TTSV2WSSession) StartNextSession ¶
func (s *TTSV2WSSession) StartNextSession(ctx context.Context) error
StartNextSession starts a new session on the same WebSocket connection.
type Task ¶
Task represents an asynchronous task.
func (*Task[T]) SetFailureMapper ¶
func (t *Task[T]) SetFailureMapper(mapper TaskFailureMapper[T]) *Task[T]
SetFailureMapper customizes terminal task failure error mapping.
func (*Task[T]) SetStatusMapper ¶
func (t *Task[T]) SetStatusMapper(mapper TaskStatusMapper) *Task[T]
SetStatusMapper customizes task status normalization.
type TaskFailureMapper ¶
type TaskFailureMapper[T any] func(status TaskStatus, result *T) error
TaskFailureMapper converts terminal failure status to a concrete error.
type TaskPoller ¶
type TaskPoller[T any] func(ctx context.Context, taskID string) (status TaskStatus, result *T, err error)
TaskPoller polls current task status and optional result.
type TaskStatus ¶
type TaskStatus string
TaskStatus is async task status.
const ( TaskStatusPending TaskStatus = "pending" TaskStatusProcessing TaskStatus = "processing" TaskStatusSuccess TaskStatus = "success" TaskStatusFailed TaskStatus = "failed" TaskStatusCancelled TaskStatus = "cancelled" )
type TaskStatusMapper ¶
type TaskStatusMapper func(status TaskStatus) TaskStatus
TaskStatusMapper normalizes raw task status values.
type Utterance ¶
type Utterance = ASRV2Utterance
type VoiceCloneRequest ¶
type VoiceCloneRequest struct {
// VoiceID is a custom voice identifier.
VoiceID string `json:"voice_id,omitempty" yaml:"voice_id,omitempty"`
// SpeakerID is an alias of VoiceID for compatibility with official docs.
SpeakerID string `json:"speaker_id,omitempty" yaml:"speaker_id,omitempty"`
Audio []byte `json:"-" yaml:"-"`
AudioFileName string `json:"audio_file_name,omitempty" yaml:"audio_file_name,omitempty"`
AudioContentType string `json:"audio_content_type,omitempty" yaml:"audio_content_type,omitempty"`
AudioFormat string `json:"audio_format,omitempty" yaml:"audio_format,omitempty"`
Text string `json:"text,omitempty" yaml:"text,omitempty"`
Language int `json:"language,omitempty" yaml:"language,omitempty"`
ModelType int `json:"model_type,omitempty" yaml:"model_type,omitempty"`
Source int `json:"source,omitempty" yaml:"source,omitempty"`
ResourceID string `json:"resource_id,omitempty" yaml:"resource_id,omitempty"`
PollInterval time.Duration `json:"-" yaml:"-"`
}
VoiceCloneRequest is the request payload for voice clone upload task.
type VoiceCloneService ¶
type VoiceCloneService struct {
// contains filtered or unexported fields
}
VoiceCloneService provides voice clone training and status operations.
func (*VoiceCloneService) Activate ¶
func (s *VoiceCloneService) Activate(ctx context.Context, voiceID string) error
Activate formalizes a trained cloned voice.
func (*VoiceCloneService) GetStatus ¶
func (s *VoiceCloneService) GetStatus(ctx context.Context, speakerOrVoiceID string) (*VoiceCloneStatus, error)
GetStatus queries current voice clone task status.
func (*VoiceCloneService) Submit ¶
func (s *VoiceCloneService) Submit(ctx context.Context, req *VoiceCloneRequest) (*Task[VoiceCloneStatus], error)
Submit uploads training audio and returns a task handle for polling.
func (*VoiceCloneService) Upload ¶
func (s *VoiceCloneService) Upload(ctx context.Context, req *VoiceCloneRequest) (*Task[VoiceCloneStatus], error)
Upload uploads training audio and returns a task handle for polling.
type VoiceCloneStatus ¶
type VoiceCloneStatus struct {
TaskID string `json:"task_id,omitempty"`
SpeakerID string `json:"speaker_id,omitempty"`
VoiceID string `json:"voice_id,omitempty"`
Status TaskStatus `json:"status"`
RawStatus string `json:"raw_status,omitempty"`
RawStatusCode int `json:"raw_status_code,omitempty"`
StatusCode int `json:"status_code,omitempty"`
StatusMessage string `json:"status_message,omitempty"`
Version string `json:"version,omitempty"`
DemoAudio string `json:"demo_audio,omitempty"`
CreateTime int64 `json:"create_time,omitempty"`
ReqID string `json:"reqid,omitempty"`
TraceID string `json:"trace_id,omitempty"`
LogID string `json:"log_id,omitempty"`
}
VoiceCloneStatus is one status snapshot of clone training task.
Source Files
¶
Directories
¶
| Path | Synopsis |
|---|---|
|
examples
|
|
|
asr_v2_sauc_ws
command
|
|
|
realtime
command
|
|
|
tts_v2/http_stream
command
|
|
|
tts_v2/websocket
command
|
|
|
voice_clone
command
|
|
|
internal
|
|