Documentation
¶
Index ¶
- Constants
- func CheckLocalTTSAvailable() []string
- func ComputeSampleByteCount(sampleRate, bitDepth, channels int) int
- func DetectLocalTTSCommand() string
- func GetAzureVoices() map[string]string
- func GetLocalTTSInfo() map[string]interface{}
- func NormalizeFramePeriod(d string) time.Duration
- func SetGlobalSynthesisFactory(factory SynthesisFactory)
- func StripEmoji(text string) string
- func WithSynthesis(svc AudioSynthesisEngine) media.MediaHandlerFunc
- type AmazonService
- func (as *AmazonService) CacheKey(text string) string
- func (as *AmazonService) Close() error
- func (as *AmazonService) Format() media.StreamFormat
- func (as *AmazonService) Provider() TTSProvider
- func (as *AmazonService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
- type AmazonTTSConfig
- type AudioAudioSynthesisPlayerRequest
- type AudioSynthesisEngine
- type AudioSynthesisHandler
- type AudioSynthesisPlayer
- func (player *AudioSynthesisPlayer) Close()
- func (player *AudioSynthesisPlayer) Emit(h media.MediaHandler, audioPacket *media.AudioPacket, inputRate int)
- func (player *AudioSynthesisPlayer) EmitStopPlayState(h media.MediaHandler, duration string, playId string, sequence int, ...)
- func (player *AudioSynthesisPlayer) Interrupt(h media.MediaHandler, reason string)
- func (player *AudioSynthesisPlayer) Run(handler media.MediaHandler, ctx context.Context)
- type AudioSynthesisRequest
- type AzureConfig
- type AzureRequest
- type AzureService
- type BaiduTTSConfig
- type BaiduTTSService
- func (bs *BaiduTTSService) CacheKey(text string) string
- func (bs *BaiduTTSService) Close() error
- func (bs *BaiduTTSService) DoubleURLEncode(text string) string
- func (bs *BaiduTTSService) Format() media.StreamFormat
- func (bs *BaiduTTSService) Provider() TTSProvider
- func (bs *BaiduTTSService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
- type CoquiResponse
- type CoquiService
- type CoquiTTSOption
- type DefaultSynthesisFactory
- func (f *DefaultSynthesisFactory) CreateEngine(config SynthesisConfig) (AudioSynthesisEngine, error)
- func (f *DefaultSynthesisFactory) GetSupportedProviders() []TTSProvider
- func (f *DefaultSynthesisFactory) IsProviderSupported(provider TTSProvider) bool
- func (f *DefaultSynthesisFactory) RegisterCreator(provider TTSProvider, ...)
- type ElevenLabsConfig
- type ElevenLabsRequest
- type ElevenLabsService
- func (es *ElevenLabsService) CacheKey(text string) string
- func (es *ElevenLabsService) Close() error
- func (es *ElevenLabsService) Format() media.StreamFormat
- func (es *ElevenLabsService) Provider() TTSProvider
- func (es *ElevenLabsService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
- type ElevenLabsVoiceSettings
- type FishAudioConfig
- type FishAudioListModelsResponse
- type FishAudioRequest
- type FishAudioService
- func (fa *FishAudioService) CacheKey(text string) string
- func (fa *FishAudioService) Close() error
- func (fa *FishAudioService) Format() media.StreamFormat
- func (fa *FishAudioService) Provider() TTSProvider
- func (fa *FishAudioService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
- type FishAudioVoiceOption
- type FishSpeechConfig
- type FishSpeechListModelsRequest
- type FishSpeechListModelsResponse
- type FishSpeechService
- func (fs *FishSpeechService) CacheKey(text string) string
- func (fs *FishSpeechService) Close() error
- func (fs *FishSpeechService) Format() media.StreamFormat
- func (fs *FishSpeechService) Provider() TTSProvider
- func (fs *FishSpeechService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
- type FishSpeechV2Request
- type FishSpeechV2Response
- type FishSpeechVoiceOption
- type GoogleService
- func (gs *GoogleService) CacheKey(text string) string
- func (gs *GoogleService) Close() error
- func (gs *GoogleService) Format() media.StreamFormat
- func (gs *GoogleService) Provider() TTSProvider
- func (gs *GoogleService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
- type GoogleTTSOption
- type LocalGoSpeechConfig
- type LocalGoSpeechProvider
- type LocalGoSpeechService
- func (s *LocalGoSpeechService) CacheKey(text string) string
- func (s *LocalGoSpeechService) Close() error
- func (s *LocalGoSpeechService) Format() media.StreamFormat
- func (s *LocalGoSpeechService) GetConfig() *LocalGoSpeechConfig
- func (s *LocalGoSpeechService) GetSupportedLanguages() []string
- func (s *LocalGoSpeechService) GetSupportedSpeakers() []string
- func (s *LocalGoSpeechService) IsReady() bool
- func (s *LocalGoSpeechService) Provider() TTSProvider
- func (s *LocalGoSpeechService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
- func (s *LocalGoSpeechService) UpdateConfig(config *LocalGoSpeechConfig) error
- type LocalService
- type LocalTTSConfig
- type MinimaxAudioSetting
- type MinimaxConnectionResponse
- type MinimaxOption
- type MinimaxPronunciationDict
- type MinimaxService
- func (ms *MinimaxService) CacheKey(text string) string
- func (ms *MinimaxService) Close() error
- func (ms *MinimaxService) Format() media.StreamFormat
- func (ms *MinimaxService) GetConnSessionID() string
- func (ms *MinimaxService) GetTraceID() string
- func (ms *MinimaxService) Provider() TTSProvider
- func (ms *MinimaxService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
- type MinimaxTaskContinueResponse
- type MinimaxTaskStartRequest
- type MinimaxTaskStartResponse
- type MinimaxTimbreWeight
- type MinimaxVoiceSetting
- type OpenAIConfig
- type OpenAIRequest
- type OpenAIService
- func (os *OpenAIService) CacheKey(text string) string
- func (os *OpenAIService) Close() error
- func (os *OpenAIService) Format() media.StreamFormat
- func (os *OpenAIService) Provider() TTSProvider
- func (os *OpenAIService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
- type PlayRecord
- type QCloudService
- func (qs *QCloudService) CacheKey(text string) string
- func (qs *QCloudService) Close() error
- func (qs *QCloudService) Format() media.StreamFormat
- func (qs *QCloudService) Provider() TTSProvider
- func (qs *QCloudService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
- type QCloudTTSConfig
- type QiniuService
- type QiniuTTSConfig
- type QiniuTTSRequest
- type QiniuTTSResponse
- type SentenceTimestamp
- type SynthesisBuffer
- type SynthesisConfig
- type SynthesisFactory
- type TTSAddition
- type TTSAudio
- type TTSCredentialConfig
- type TTSProvider
- type TTSRequestData
- type VolcAddition
- type VolcengineService
- func (v *VolcengineService) CacheKey(text string) string
- func (v *VolcengineService) Close() error
- func (v *VolcengineService) Format() media.StreamFormat
- func (v *VolcengineService) Provider() TTSProvider
- func (v *VolcengineService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
- type VolcengineTTSOption
- type VolcengineTTSServResponse
- type WSAudio
- type WSHeader
- type WSParameter
- type WSPayload
- type WSRequest
- type WSTTS
- type Word
- type XunfeiService
- func (xs *XunfeiService) CacheKey(text string) string
- func (xs *XunfeiService) Close() error
- func (xs *XunfeiService) Format() media.StreamFormat
- func (xs *XunfeiService) Provider() TTSProvider
- func (xs *XunfeiService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
- type XunfeiTTSConfig
Constants ¶
const ( MinimaxWebSocketURL = "wss://api.minimaxi.com/ws/v1/t2a_v2" MinimaxSpeech25TurboPreview = "speech-2.5-turbo-preview" )
const ( TTS_QCLOUD = "tts.qcloud" TTS_XUNFEI = "tts.xunfei" TTS_QINIU = "tts.qiniu" TTS_BAIDU = "tts.baidu" TTS_GOOGLE = "tts.google" TTS_AWS = "tts.aws" TTS_AZURE = "tts.azure" TTS_OPENAI = "tts.openai" TTS_ELEVENLABS = "tts.elevenlabs" TTS_LOCAL = "tts.local" TTS_LOCAL_GOSPEECH = "tts.local_gospeech" TTS_FISHSPEECH = "tts.fishspeech" TTS_FISHAUDIO = "tts.fishaudio" TTS_COQUI = "tts.coqui" TTS_VOLCENGINE = "tts.volcengine" TTS_VOLCENGINE_CLONE = "tts.volcengine_clone" TTS_VOLCENGINE_LLM = "tts.volcengine_llm" TTS_VOLCENGINE_STREAM = "tts.volcengine_stream" TTS_MINIMAX = "tts.minimax" )
const ( SsmlSpeak = "<speak>" VolcengineCloneCluster = "volcano_icl" VolcengineLLMCluster = "volcano_tts" )
Variables ¶
This section is empty.
Functions ¶
func CheckLocalTTSAvailable ¶
func CheckLocalTTSAvailable() []string
CheckLocalTTSAvailable 检查本地是否安装了 TTS 工具
func ComputeSampleByteCount ¶
ComputeSampleByteCount computes the number of bytes for audio samples based on sample rate, bit depth, and number of channels. Formula: (sampleRate * bitDepth * channels) / 8
func DetectLocalTTSCommand ¶
func DetectLocalTTSCommand() string
DetectLocalTTSCommand 自动检测可用的本地 TTS 命令
func NormalizeFramePeriod ¶
ValidateAndNormalizeDuration uses different validation logic with explicit bounds checking
func SetGlobalSynthesisFactory ¶
func SetGlobalSynthesisFactory(factory SynthesisFactory)
SetGlobalSynthesisFactory 设置全局TTS工厂实例
func StripEmoji ¶
func WithSynthesis ¶
func WithSynthesis(svc AudioSynthesisEngine) media.MediaHandlerFunc
Types ¶
type AmazonService ¶
type AmazonService struct {
// contains filtered or unexported fields
}
func NewAmazonService ¶
func NewAmazonService(opt AmazonTTSConfig) *AmazonService
func (*AmazonService) CacheKey ¶
func (as *AmazonService) CacheKey(text string) string
func (*AmazonService) Close ¶
func (as *AmazonService) Close() error
func (*AmazonService) Format ¶
func (as *AmazonService) Format() media.StreamFormat
func (*AmazonService) Provider ¶
func (as *AmazonService) Provider() TTSProvider
func (*AmazonService) Synthesize ¶
func (as *AmazonService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type AmazonTTSConfig ¶
type AmazonTTSConfig struct {
SampleRate int `json:"sampleRate" env:"sample_rate" default:"16000"`
Region string `json:"region"`
OutputFormat types.OutputFormat `json:"outputFormat" env:"output_format" default:"pcm"`
VoiceId types.VoiceId `json:"voiceId" env:"voice_id"`
Channels int `json:"channels" env:"channels" default:"1"`
BitDepth int `json:"bitDepth" env:"bit_depth" default:"16"`
FrameDuration string `json:"frameDuration" env:"frame_duration" default:"20ms"`
}
func NewAmazonTTSOption ¶
func NewAmazonTTSOption(region string, outputFormat types.OutputFormat, voiceId types.VoiceId) AmazonTTSConfig
func (*AmazonTTSConfig) GetProvider ¶
func (c *AmazonTTSConfig) GetProvider() TTSProvider
GetProvider returns the TTS provider type
func (*AmazonTTSConfig) String ¶
func (opt *AmazonTTSConfig) String() string
type AudioAudioSynthesisPlayerRequest ¶
type AudioAudioSynthesisPlayerRequest struct {
// contains filtered or unexported fields
}
type AudioSynthesisEngine ¶
type AudioSynthesisEngine interface {
Provider() TTSProvider
Format() media.StreamFormat
CacheKey(text string) string
Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
Close() error
}
AudioSynthesisEngine is the core interface for TTS (Text-to-Speech) synthesis.
func NewAudioSynthesisEngine ¶
func NewAudioSynthesisEngine(name string, options map[string]any) (AudioSynthesisEngine, error)
func NewAudioSynthesisEngineFromCredential ¶
func NewAudioSynthesisEngineFromCredential(config TTSCredentialConfig) (AudioSynthesisEngine, error)
NewAudioSynthesisEngineFromCredential 根据凭证配置创建TTS服务
type AudioSynthesisHandler ¶
type AudioSynthesisHandler interface {
OnMessage([]byte)
OnTimestamp(timestamp SentenceTimestamp)
}
AudioSynthesisHandler is the callback interface for TTS synthesis events.
type AudioSynthesisPlayer ¶
type AudioSynthesisPlayer struct {
SenderName string
Format media.StreamFormat
// contains filtered or unexported fields
}
func NewAudioSynthesisPlayer ¶
func NewAudioSynthesisPlayer(vendor string, format media.StreamFormat) *AudioSynthesisPlayer
func (*AudioSynthesisPlayer) Close ¶
func (player *AudioSynthesisPlayer) Close()
func (*AudioSynthesisPlayer) Emit ¶
func (player *AudioSynthesisPlayer) Emit(h media.MediaHandler, audioPacket *media.AudioPacket, inputRate int)
func (*AudioSynthesisPlayer) EmitStopPlayState ¶
func (player *AudioSynthesisPlayer) EmitStopPlayState(h media.MediaHandler, duration string, playId string, sequence int, reason string, sourceText string)
func (*AudioSynthesisPlayer) Interrupt ¶
func (player *AudioSynthesisPlayer) Interrupt(h media.MediaHandler, reason string)
func (*AudioSynthesisPlayer) Run ¶
func (player *AudioSynthesisPlayer) Run(handler media.MediaHandler, ctx context.Context)
type AudioSynthesisRequest ¶
type AudioSynthesisRequest struct {
PlayID string
// contains filtered or unexported fields
}
func (*AudioSynthesisRequest) OnMessage ¶
func (req *AudioSynthesisRequest) OnMessage(data []byte)
func (*AudioSynthesisRequest) OnTimestamp ¶
func (req *AudioSynthesisRequest) OnTimestamp(timestamp SentenceTimestamp)
type AzureConfig ¶
type AzureConfig struct {
SubscriptionKey string `json:"subscription_key" yaml:"subscription_key" env:"AZURE_SUBSCRIPTION_KEY"`
Region string `json:"region" yaml:"region" env:"AZURE_REGION"`
Voice string `json:"voice" yaml:"voice" default:"zh-CN-XiaoxiaoNeural"`
Language string `json:"language" yaml:"language"` // 语言代码,用于 SSML 的 xml:lang
SampleRate int `json:"sample_rate" yaml:"sample_rate" default:"22050"`
Channels int `json:"channels" yaml:"channels" default:"1"`
BitDepth int `json:"bit_depth" yaml:"bit_depth" default:"16"`
Codec string `json:"codec" yaml:"codec" default:"audio-24khz-48kbitrate-mono-mp3"`
FrameDuration string `json:"frame_duration" yaml:"frame_duration" default:"20ms"`
Timeout int `json:"timeout" yaml:"timeout" default:"30"`
BaseURL string `json:"base_url" yaml:"base_url"`
}
AzureConfig Azure TTS配置
func NewAzureConfig ¶
func NewAzureConfig(subscriptionKey, region string) AzureConfig
NewAzureConfig 创建 Azure TTS 配置
func (*AzureConfig) GetProvider ¶
func (c *AzureConfig) GetProvider() TTSProvider
GetProvider returns the TTS provider type
type AzureRequest ¶
type AzureRequest struct {
Text string `json:"text"`
}
AzureRequest Azure TTS API 请求
type AzureService ¶
type AzureService struct {
// contains filtered or unexported fields
}
func NewAzureService ¶
func NewAzureService(opt AzureConfig) *AzureService
NewAzureService 创建 Azure TTS 服务
func (*AzureService) CacheKey ¶
func (as *AzureService) CacheKey(text string) string
func (*AzureService) Close ¶
func (as *AzureService) Close() error
func (*AzureService) Format ¶
func (as *AzureService) Format() media.StreamFormat
func (*AzureService) Provider ¶
func (as *AzureService) Provider() TTSProvider
func (*AzureService) Synthesize ¶
func (as *AzureService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type BaiduTTSConfig ¶
type BaiduTTSConfig struct {
Tok string `json:"tok" env:"tok" env:"BAIDU_ACCESS_TOKEN"`
Cuid string `json:"cuid" env:"cuid"`
Ctp string `json:"ctp" env:"ctp" default:"1"`
Lan string `json:"lan" env:"lan" default:"zh"`
Spd string `json:"spd" env:"spd" default:"5"`
Pit string `json:"pit" env:"pit" default:"5"`
Vol string `json:"vol" env:"vol" default:"5"`
Aue string `json:"aue" env:"aue" default:"3"`
Channels int `json:"channels" env:"channels" default:"1"`
SampleRate int `json:"sampleRate" env:"sample_rate" default:"16000"`
BitDepth int `json:"bitDepth" env:"bit_depth" default:"16"`
FrameDuration string `json:"frameDuration" env:"frame_duration" default:"20ms"`
}
func NewBaiduTTSOption ¶
func NewBaiduTTSOption(token string) BaiduTTSConfig
func (*BaiduTTSConfig) GetProvider ¶
func (c *BaiduTTSConfig) GetProvider() TTSProvider
GetProvider returns the TTS provider type
func (*BaiduTTSConfig) String ¶
func (opt *BaiduTTSConfig) String() string
type BaiduTTSService ¶
type BaiduTTSService struct {
// contains filtered or unexported fields
}
func NewBaiduService ¶
func NewBaiduService(opt BaiduTTSConfig) *BaiduTTSService
func (*BaiduTTSService) CacheKey ¶
func (bs *BaiduTTSService) CacheKey(text string) string
func (*BaiduTTSService) Close ¶
func (bs *BaiduTTSService) Close() error
func (*BaiduTTSService) DoubleURLEncode ¶
func (bs *BaiduTTSService) DoubleURLEncode(text string) string
func (*BaiduTTSService) Format ¶
func (bs *BaiduTTSService) Format() media.StreamFormat
func (*BaiduTTSService) Provider ¶
func (bs *BaiduTTSService) Provider() TTSProvider
func (*BaiduTTSService) Synthesize ¶
func (bs *BaiduTTSService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type CoquiResponse ¶
type CoquiResponse struct {
Audio string `json:"audio"`
}
type CoquiService ¶
type CoquiService struct {
// contains filtered or unexported fields
}
func NewCoquiService ¶
func NewCoquiService(opt CoquiTTSOption) *CoquiService
func (*CoquiService) CacheKey ¶
func (c *CoquiService) CacheKey(text string) string
func (*CoquiService) Close ¶
func (c *CoquiService) Close() error
func (*CoquiService) Format ¶
func (c *CoquiService) Format() media.StreamFormat
func (*CoquiService) Provider ¶
func (c *CoquiService) Provider() TTSProvider
func (*CoquiService) Synthesize ¶
func (c *CoquiService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type CoquiTTSOption ¶
type CoquiTTSOption struct {
Url string `json:"url" yaml:"url" env:"COQUI_URL"`
Language string `json:"language" yaml:"language" default:"en_US"`
Speaker string `json:"speaker" yaml:"speaker" default:"p226"`
SampleRate int `json:"sampleRate" yaml:"sample_rate" default:"16000"`
Channels int `json:"channels" yaml:"channels" default:"1"`
BitDepth int `json:"bitDepth" yaml:"bit_depth" default:"16"`
FrameDuration string `json:"frameDuration" yaml:"frame_duration" default:"20ms"`
}
func NewCoquiTTSOption ¶
func NewCoquiTTSOption(url string) CoquiTTSOption
func (*CoquiTTSOption) GetProvider ¶
func (c *CoquiTTSOption) GetProvider() TTSProvider
GetProvider returns the TTS provider type
func (*CoquiTTSOption) String ¶
func (opt *CoquiTTSOption) String() string
type DefaultSynthesisFactory ¶
type DefaultSynthesisFactory struct {
// contains filtered or unexported fields
}
DefaultSynthesisFactory 默认TTS工厂实现
func NewSynthesisFactory ¶
func NewSynthesisFactory() *DefaultSynthesisFactory
NewSynthesisFactory 创建新的TTS工厂实例
func (*DefaultSynthesisFactory) CreateEngine ¶
func (f *DefaultSynthesisFactory) CreateEngine(config SynthesisConfig) (AudioSynthesisEngine, error)
CreateEngine 创建 AudioSynthesisEngine
func (*DefaultSynthesisFactory) GetSupportedProviders ¶
func (f *DefaultSynthesisFactory) GetSupportedProviders() []TTSProvider
GetSupportedProviders 获取支持的提供商列表
func (*DefaultSynthesisFactory) IsProviderSupported ¶
func (f *DefaultSynthesisFactory) IsProviderSupported(provider TTSProvider) bool
IsProviderSupported 检查提供商是否支持
func (*DefaultSynthesisFactory) RegisterCreator ¶
func (f *DefaultSynthesisFactory) RegisterCreator(provider TTSProvider, creator func(SynthesisConfig) (AudioSynthesisEngine, error))
RegisterCreator 注册创建函数
type ElevenLabsConfig ¶
type ElevenLabsConfig struct {
APIKey string `json:"api_key" yaml:"api_key" env:"ELEVENLABS_API_KEY"`
VoiceID string `json:"voice_id" yaml:"voice_id" default:"21m00Tcm4TlvDq8ikWAM"` // 默认 Rachel 音色
ModelID string `json:"model_id" yaml:"model_id" default:"eleven_monolingual_v1"`
LanguageCode string `json:"language_code" yaml:"language_code"` // 语言代码,如 en, zh, ja 等
SampleRate int `json:"sample_rate" yaml:"sample_rate" default:"44100"`
Channels int `json:"channels" yaml:"channels" default:"1"`
BitDepth int `json:"bit_depth" yaml:"bit_depth" default:"16"`
Codec string `json:"codec" yaml:"codec" default:"mp3"`
FrameDuration string `json:"frame_duration" yaml:"frame_duration" default:"20ms"`
Timeout int `json:"timeout" yaml:"timeout" default:"30"`
// 语音设置
Stability float64 `json:"stability" yaml:"stability" default:"0.5"` // 0.0-1.0
SimilarityBoost float64 `json:"similarity_boost" yaml:"similarity_boost" default:"0.75"` // 0.0-1.0
Style float64 `json:"style" yaml:"style" default:"0.0"` // 0.0-1.0
UseSpeakerBoost bool `json:"use_speaker_boost" yaml:"use_speaker_boost" default:"true"`
}
ElevenLabsConfig ElevenLabs TTS配置
func NewElevenLabsConfig ¶
func NewElevenLabsConfig(apiKey, voiceID string) ElevenLabsConfig
NewElevenLabsConfig 创建 ElevenLabs TTS 配置
func (*ElevenLabsConfig) GetProvider ¶
func (c *ElevenLabsConfig) GetProvider() TTSProvider
GetProvider returns the TTS provider type
type ElevenLabsRequest ¶
type ElevenLabsRequest struct {
Text string `json:"text"`
ModelID string `json:"model_id,omitempty"`
VoiceSettings *ElevenLabsVoiceSettings `json:"voice_settings,omitempty"`
LanguageCode string `json:"language_code,omitempty"`
}
ElevenLabsRequest ElevenLabs API 请求
type ElevenLabsService ¶
type ElevenLabsService struct {
// contains filtered or unexported fields
}
func NewElevenLabsService ¶
func NewElevenLabsService(opt ElevenLabsConfig) *ElevenLabsService
NewElevenLabsService 创建 ElevenLabs TTS 服务
func (*ElevenLabsService) CacheKey ¶
func (es *ElevenLabsService) CacheKey(text string) string
func (*ElevenLabsService) Close ¶
func (es *ElevenLabsService) Close() error
func (*ElevenLabsService) Format ¶
func (es *ElevenLabsService) Format() media.StreamFormat
func (*ElevenLabsService) Provider ¶
func (es *ElevenLabsService) Provider() TTSProvider
func (*ElevenLabsService) Synthesize ¶
func (es *ElevenLabsService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type ElevenLabsVoiceSettings ¶
type ElevenLabsVoiceSettings struct {
Stability float64 `json:"stability"`
SimilarityBoost float64 `json:"similarity_boost"`
Style float64 `json:"style"`
UseSpeakerBoost bool `json:"use_speaker_boost"`
}
ElevenLabsVoiceSettings 音色设置
type FishAudioConfig ¶
type FishAudioConfig struct {
APIKey string `json:"api_key" yaml:"api_key" env:"FISHAUDIO_API_KEY"`
ReferenceID string `json:"reference_id" yaml:"reference_id" default:""` // 模型ID
Model string `json:"model" yaml:"model" default:"s1"` // 模型版本: s1, speech-1.6, speech-1.5
SampleRate int `json:"sample_rate" yaml:"sample_rate" default:"44100"`
Channels int `json:"channels" yaml:"channels" default:"1"`
BitDepth int `json:"bit_depth" yaml:"bit_depth" default:"16"`
Format string `json:"format" yaml:"format" default:"mp3"` // wav, pcm, mp3, opus
Temperature float64 `json:"temperature" yaml:"temperature" default:"0.7"`
TopP float64 `json:"top_p" yaml:"top_p" default:"0.7"`
Latency string `json:"latency" yaml:"latency" default:"normal"` // low, normal, balanced
ChunkLength int `json:"chunk_length" yaml:"chunk_length" default:"300"`
Normalize bool `json:"normalize" yaml:"normalize" default:"true"`
MPEGBitrate int `json:"mp3_bitrate" yaml:"mp3_bitrate" default:"128"` // 64, 128, 192
Timeout int `json:"timeout" yaml:"timeout" default:"30"`
}
FishAudioConfig Fish Audio TTS 配置
func NewFishAudioConfig ¶
func NewFishAudioConfig(apiKey, referenceID string) FishAudioConfig
NewFishAudioConfig 创建 Fish Audio TTS 配置
func (*FishAudioConfig) GetProvider ¶
func (c *FishAudioConfig) GetProvider() TTSProvider
GetProvider returns the TTS provider type
type FishAudioListModelsResponse ¶
type FishAudioListModelsResponse struct {
Total int `json:"total"`
Items []FishAudioVoiceOption `json:"items"`
}
FishAudioListModelsResponse Fish Audio API 返回的模型列表响应
type FishAudioRequest ¶
type FishAudioRequest struct {
Text string `json:"text"`
Model string `json:"model"`
ReferenceID string `json:"reference_id,omitempty"`
Temperature float64 `json:"temperature,omitempty"`
TopP float64 `json:"top_p,omitempty"`
Format string `json:"format,omitempty"`
SampleRate *int `json:"sample_rate,omitempty"`
ChunkLength int `json:"chunk_length,omitempty"`
Normalize bool `json:"normalize,omitempty"`
Latency string `json:"latency,omitempty"`
MaxNewTokens int `json:"max_new_tokens,omitempty"`
RepetitionPenalty float64 `json:"repetition_penalty,omitempty"`
MinChunkLength int `json:"min_chunk_length,omitempty"`
ConditionOnPreviousChunks bool `json:"condition_on_previous_chunks,omitempty"`
EarlyStopThreshold float64 `json:"early_stop_threshold,omitempty"`
}
FishAudioRequest Fish Audio TTS 请求
type FishAudioService ¶
type FishAudioService struct {
// contains filtered or unexported fields
}
func NewFishAudioService ¶
func NewFishAudioService(opt FishAudioConfig) *FishAudioService
NewFishAudioService 创建 Fish Audio TTS 服务
func (*FishAudioService) CacheKey ¶
func (fa *FishAudioService) CacheKey(text string) string
func (*FishAudioService) Close ¶
func (fa *FishAudioService) Close() error
func (*FishAudioService) Format ¶
func (fa *FishAudioService) Format() media.StreamFormat
func (*FishAudioService) Provider ¶
func (fa *FishAudioService) Provider() TTSProvider
func (*FishAudioService) Synthesize ¶
func (fa *FishAudioService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type FishAudioVoiceOption ¶
type FishAudioVoiceOption struct {
ID string `json:"_id"`
Title string `json:"title"`
Description string `json:"description"`
Type string `json:"type"`
State string `json:"state"`
CoverImage string `json:"cover_image"`
Languages []string `json:"languages"`
Author struct {
ID string `json:"_id"`
Nickname string `json:"nickname"`
Avatar string `json:"avatar"`
} `json:"author"`
}
FishAudioVoiceOption Fish Audio 音色选项
func GetFishAudioVoices ¶
func GetFishAudioVoices(apiKey string) ([]FishAudioVoiceOption, error)
GetFishAudioVoices 从 Fish Audio API 获取可用的音色列表 apiKey: Fish Audio API Key returns: 音色列表和错误信息
type FishSpeechConfig ¶
type FishSpeechConfig struct {
APIKey string `json:"api_key" yaml:"api_key" env:"FISHSPEECH_API_KEY"`
ReferenceID string `json:"reference_id" yaml:"reference_id" default:"default"` // 模型ID
SampleRate int `json:"sample_rate" yaml:"sample_rate" default:"24000"`
Channels int `json:"channels" yaml:"channels" default:"1"`
BitDepth int `json:"bit_depth" yaml:"bit_depth" default:"16"`
Codec string `json:"codec" yaml:"codec" default:"wav"`
FrameDuration string `json:"frame_duration" yaml:"frame_duration" default:"20ms"`
Timeout int `json:"timeout" yaml:"timeout" default:"30"`
Latency string `json:"latency" yaml:"latency" default:"normal"` // normal, balanced
Version string `json:"version" yaml:"version" default:"s1"`
}
FishSpeechConfig FishSpeech TTS配置
func NewFishSpeechConfig ¶
func NewFishSpeechConfig(apiKey, referenceID string) FishSpeechConfig
NewFishSpeechConfig 创建 FishSpeech TTS 配置
func (*FishSpeechConfig) GetProvider ¶
func (c *FishSpeechConfig) GetProvider() TTSProvider
GetProvider returns the TTS provider type
type FishSpeechListModelsRequest ¶
type FishSpeechListModelsRequest struct {
ModelType string `json:"modelType,omitempty"` // 可选,模型类型:"public" | "personal" | "all"
IncludePersonal bool `json:"includePersonal,omitempty"` // 可选,是否包含个人模型
Page int `json:"page,omitempty"` // 可选,页码,从1开始
PageSize int `json:"pageSize,omitempty"` // 可选,每页条数
}
FishSpeechListModelsRequest FishSpeech 列表模型请求
type FishSpeechListModelsResponse ¶
type FishSpeechListModelsResponse struct {
Total int `json:"total"`
Page int `json:"page"`
PageSize int `json:"pageSize"`
TotalPages int `json:"totalPages"`
Items []FishSpeechVoiceOption `json:"items"`
}
FishSpeechListModelsResponse FishSpeech API 返回的模型列表响应
type FishSpeechService ¶
type FishSpeechService struct {
// contains filtered or unexported fields
}
func NewFishSpeechService ¶
func NewFishSpeechService(opt FishSpeechConfig) *FishSpeechService
NewFishSpeechService 创建 FishSpeech TTS 服务
func (*FishSpeechService) CacheKey ¶
func (fs *FishSpeechService) CacheKey(text string) string
func (*FishSpeechService) Close ¶
func (fs *FishSpeechService) Close() error
func (*FishSpeechService) Format ¶
func (fs *FishSpeechService) Format() media.StreamFormat
func (*FishSpeechService) Provider ¶
func (fs *FishSpeechService) Provider() TTSProvider
func (*FishSpeechService) Synthesize ¶
func (fs *FishSpeechService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type FishSpeechV2Request ¶
type FishSpeechV2Request struct {
Text string `json:"text"`
ReferenceID string `json:"reference_id,omitempty"`
Format string `json:"format,omitempty"`
Latency string `json:"latency,omitempty"`
}
FishSpeechV2Request WebSocket v2 请求
type FishSpeechV2Response ¶
type FishSpeechV2Response struct {
Type string `json:"type"` // "audio" 或 "error"
Data string `json:"data,omitempty"` // base64 编码的音频数据
Error string `json:"error,omitempty"` // 错误信息
Message string `json:"message,omitempty"` // 消息
}
FishSpeechV2Response WebSocket v2 响应
type FishSpeechVoiceOption ¶
type FishSpeechVoiceOption struct {
ModelID string `json:"modelId"` // 模型 ID
Title string `json:"title"` // 模型名称
Description string `json:"description"` // 模型描述
IsPersonal bool `json:"isPersonal"` // 是否为个人模型
}
FishSpeechVoiceOption FishSpeech 音色选项
func GetFishSpeechVoices ¶
func GetFishSpeechVoices(apiKey string) ([]FishSpeechVoiceOption, error)
GetFishSpeechVoices 从 FishSpeech API 获取可用的音色列表 apiKey: FishSpeech API Key returns: 音色列表和错误信息
type GoogleService ¶
type GoogleService struct {
// contains filtered or unexported fields
}
func NewGoogleService ¶
func NewGoogleService(opt GoogleTTSOption) *GoogleService
func (*GoogleService) CacheKey ¶
func (gs *GoogleService) CacheKey(text string) string
func (*GoogleService) Close ¶
func (gs *GoogleService) Close() error
func (*GoogleService) Format ¶
func (gs *GoogleService) Format() media.StreamFormat
func (*GoogleService) Provider ¶
func (gs *GoogleService) Provider() TTSProvider
func (*GoogleService) Synthesize ¶
func (gs *GoogleService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type GoogleTTSOption ¶
type GoogleTTSOption struct {
LanguageCode string `json:"languageCode" yaml:"language_code"`
SsmlGender texttospeechpb.SsmlVoiceGender `json:"ssmlGender" yaml:"ssml_gender"`
AudioEncoding texttospeechpb.AudioEncoding `json:"audioEncoding" yaml:"audio_encoding" default:"LINEAR16"`
SampleRate int `json:"sampleRate" yaml:"sample_rate" default:"16000"`
Channels int `json:"channels" yaml:"channels" default:"1"`
BitDepth int `json:"bitDepth" yaml:"bit_depth" default:"16"`
FrameDuration string `json:"frameDuration" yaml:"frame_duration" default:"20ms"`
}
func NewGoogleTTSOption ¶
func NewGoogleTTSOption(languageCode string) GoogleTTSOption
func (*GoogleTTSOption) GetProvider ¶
func (c *GoogleTTSOption) GetProvider() TTSProvider
GetProvider returns the TTS provider type
func (*GoogleTTSOption) String ¶
func (opt *GoogleTTSOption) String() string
type LocalGoSpeechConfig ¶
type LocalGoSpeechConfig struct {
Provider LocalGoSpeechProvider `json:"provider"` // TTS提供商
ModelPath string `json:"modelPath"` // 模型文件路径(可选)
Language string `json:"language"` // 语言代码
Speaker string `json:"speaker"` // 发音人
SampleRate int `json:"sampleRate"` // 采样率
Channels int `json:"channels"` // 声道数
BitDepth int `json:"bitDepth"` // 位深度
Speed float32 `json:"speed"` // 语速
Pitch float32 `json:"pitch"` // 音调
Volume float32 `json:"volume"` // 音量
EnableCache bool `json:"enableCache"` // 是否启用缓存
CacheExpiry time.Duration `json:"cacheExpiry"` // 缓存过期时间
Command string `json:"command"` // 自定义命令
OutputDir string `json:"outputDir"` // 输出目录
}
LocalGoSpeechConfig 本地TTS配置
func NewLocalGoSpeechConfig ¶
func NewLocalGoSpeechConfig(provider LocalGoSpeechProvider, modelPath string) *LocalGoSpeechConfig
NewLocalGoSpeechConfig 创建默认本地TTS配置
func (*LocalGoSpeechConfig) GetProvider ¶
func (c *LocalGoSpeechConfig) GetProvider() TTSProvider
GetProvider returns the TTS provider type
type LocalGoSpeechProvider ¶
type LocalGoSpeechProvider string
LocalGoSpeechProvider 本地TTS提供商类型
const ( LocalGoSpeechProviderEspeak LocalGoSpeechProvider = "espeak" LocalGoSpeechProviderSay LocalGoSpeechProvider = "say" LocalGoSpeechProviderFestival LocalGoSpeechProvider = "festival" LocalGoSpeechProviderPico LocalGoSpeechProvider = "pico" )
type LocalGoSpeechService ¶
type LocalGoSpeechService struct {
// contains filtered or unexported fields
}
LocalGoSpeechService 本地TTS服务
func NewLocalGoSpeechService ¶
func NewLocalGoSpeechService(config *LocalGoSpeechConfig) (*LocalGoSpeechService, error)
NewLocalGoSpeechService 创建本地TTS服务
func (*LocalGoSpeechService) CacheKey ¶
func (s *LocalGoSpeechService) CacheKey(text string) string
CacheKey 生成缓存键
func (*LocalGoSpeechService) Format ¶
func (s *LocalGoSpeechService) Format() media.StreamFormat
Format 返回音频格式
func (*LocalGoSpeechService) GetConfig ¶
func (s *LocalGoSpeechService) GetConfig() *LocalGoSpeechConfig
GetConfig 获取配置
func (*LocalGoSpeechService) GetSupportedLanguages ¶
func (s *LocalGoSpeechService) GetSupportedLanguages() []string
GetSupportedLanguages 获取支持的语言列表
func (*LocalGoSpeechService) GetSupportedSpeakers ¶
func (s *LocalGoSpeechService) GetSupportedSpeakers() []string
GetSupportedSpeakers 获取支持的发音人列表
func (*LocalGoSpeechService) IsReady ¶
func (s *LocalGoSpeechService) IsReady() bool
IsReady 检查服务是否就绪
func (*LocalGoSpeechService) Provider ¶
func (s *LocalGoSpeechService) Provider() TTSProvider
Provider 返回提供商
func (*LocalGoSpeechService) Synthesize ¶
func (s *LocalGoSpeechService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
Synthesize 合成语音
func (*LocalGoSpeechService) UpdateConfig ¶
func (s *LocalGoSpeechService) UpdateConfig(config *LocalGoSpeechConfig) error
UpdateConfig 更新配置
type LocalService ¶
type LocalService struct {
// contains filtered or unexported fields
}
func NewLocalService ¶
func NewLocalService(opt LocalTTSConfig) *LocalService
NewLocalService 创建本地TTS服务
func (*LocalService) CacheKey ¶
func (ls *LocalService) CacheKey(text string) string
func (*LocalService) Close ¶
func (ls *LocalService) Close() error
func (*LocalService) Format ¶
func (ls *LocalService) Format() media.StreamFormat
func (*LocalService) Provider ¶
func (ls *LocalService) Provider() TTSProvider
func (*LocalService) Synthesize ¶
func (ls *LocalService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type LocalTTSConfig ¶
type LocalTTSConfig struct {
Command string `json:"command" yaml:"command" default:"say"` // TTS 命令(如 say, festival, espeak)
Voice string `json:"voice" yaml:"voice" default:""` // 音色(可选)
SampleRate int `json:"sample_rate" yaml:"sample_rate" default:"16000"` // 采样率
Channels int `json:"channels" yaml:"channels" default:"1"` // 声道数
BitDepth int `json:"bit_depth" yaml:"bit_depth" default:"16"` // 位深度
Codec string `json:"codec" yaml:"codec" default:"wav"` // 音频编解码器
FrameDuration string `json:"frame_duration" yaml:"frame_duration" default:"20ms"`
OutputDir string `json:"output_dir" yaml:"output_dir" default:"/tmp"` // 输出目录
}
LocalTTSConfig 本地TTS配置
func NewLocalTTSConfig ¶
func NewLocalTTSConfig(command string) LocalTTSConfig
NewLocalTTSConfig 创建本地TTS配置
func (*LocalTTSConfig) GetProvider ¶
func (c *LocalTTSConfig) GetProvider() TTSProvider
GetProvider returns the TTS provider type
type MinimaxAudioSetting ¶
type MinimaxOption ¶
type MinimaxOption struct {
Model string `json:"model" yaml:"model" default:"speech-2.5-turbo-preview"`
APIKey string `json:"apiKey" yaml:"api_key" env:"MINIMAX_API_KEY"`
VoiceID string `json:"voiceId" yaml:"voice_id" default:"male-qn-qingse"`
SpeedRatio float64 `json:"speedRatio" yaml:"speed_ratio" default:"1.0"`
Volume float64 `json:"volume" yaml:"volume" default:"1.0"`
Pitch float64 `json:"pitch" yaml:"pitch" default:"0.0"`
Emotion string `json:"emotion" yaml:"emotion" default:"neutral"`
LanguageBoost string `json:"languageBoost" yaml:"language_boost" default:"auto"`
TrainingTimes int `json:"trainingTimes" yaml:"training_times" default:"1"`
SampleRate int `json:"sampleRate" yaml:"sample_rate" default:"8000"`
Bitrate int `json:"bitrate" yaml:"bitrate" default:"16"`
Format string `json:"format" yaml:"format" default:"pcm"`
Channels int `json:"channels" yaml:"channels" default:"1"`
FrameDuration string `json:"frameDuration" yaml:"frame_duration" default:"20ms"`
}
func NewMinimaxOption ¶
func NewMinimaxOption(apiKey string) MinimaxOption
func (*MinimaxOption) GetProvider ¶
func (c *MinimaxOption) GetProvider() TTSProvider
GetProvider returns the TTS provider type
func (*MinimaxOption) String ¶
func (opt *MinimaxOption) String() string
type MinimaxPronunciationDict ¶
type MinimaxPronunciationDict struct {
}
type MinimaxService ¶
type MinimaxService struct {
ConnSessionID string
TraceID string
// contains filtered or unexported fields
}
func NewMinimaxService ¶
func NewMinimaxService(opt MinimaxOption) *MinimaxService
func (*MinimaxService) CacheKey ¶
func (ms *MinimaxService) CacheKey(text string) string
func (*MinimaxService) Close ¶
func (ms *MinimaxService) Close() error
func (*MinimaxService) Format ¶
func (ms *MinimaxService) Format() media.StreamFormat
func (*MinimaxService) GetConnSessionID ¶
func (ms *MinimaxService) GetConnSessionID() string
func (*MinimaxService) GetTraceID ¶
func (ms *MinimaxService) GetTraceID() string
func (*MinimaxService) Provider ¶
func (ms *MinimaxService) Provider() TTSProvider
func (*MinimaxService) Synthesize ¶
func (ms *MinimaxService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type MinimaxTaskContinueResponse ¶
type MinimaxTaskContinueResponse struct {
Data struct {
Audio string `json:"audio"`
} `json:"data"`
SessionID string `json:"session_id"`
Event string `json:"event"`
IsFinal bool `json:"is_final"`
TraceID string `json:"trace_id"`
BaseResp struct {
StatusCode int `json:"status_code"`
StatusMsg string `json:"status_msg"`
} `json:"base_resp"`
}
type MinimaxTaskStartRequest ¶
type MinimaxTaskStartRequest struct {
Event string `json:"event"`
Model string `json:"model"`
VoiceSetting MinimaxVoiceSetting `json:"voice_setting"`
AudioSetting MinimaxAudioSetting `json:"audio_setting"`
PronunciationDict *MinimaxPronunciationDict `json:"pronunciation_dict,omitempty"`
LanguageBoost string `json:"language_boost,omitempty"`
}
type MinimaxTimbreWeight ¶
type MinimaxVoiceSetting ¶
type MinimaxVoiceSetting struct {
VoiceID string `json:"voice_id,omitempty"`
Weight int `json:"weight,omitempty"`
TimbreWeights []MinimaxTimbreWeight `json:"timbre_weights,omitempty"`
Speed float64 `json:"speed"`
Volume float64 `json:"vol"`
Pitch float64 `json:"pitch"`
Emotion string `json:"emotion"`
ToneList []string `json:"tonelist,omitempty"`
}
type OpenAIConfig ¶
type OpenAIConfig struct {
APIKey string `json:"api_key" yaml:"api_key" env:"OPENAI_API_KEY"`
Model string `json:"model" yaml:"model" default:"tts-1"`
Voice string `json:"voice" yaml:"voice" default:"alloy"`
Speed float64 `json:"speed" yaml:"speed" default:"1.0"`
SampleRate int `json:"sample_rate" yaml:"sample_rate" default:"24000"`
Channels int `json:"channels" yaml:"channels" default:"1"`
BitDepth int `json:"bit_depth" yaml:"bit_depth" default:"16"`
Codec string `json:"codec" yaml:"codec" default:"mp3"`
FrameDuration string `json:"frame_duration" yaml:"frame_duration" default:"20ms"`
Timeout int `json:"timeout" yaml:"timeout" default:"30"`
BaseURL string `json:"base_url" yaml:"base_url" default:"https://api.openai.com"`
}
OpenAIConfig OpenAI TTS配置
func NewOpenAIConfig ¶
func NewOpenAIConfig(apiKey string) OpenAIConfig
NewOpenAIConfig 创建 OpenAI TTS 配置
func (*OpenAIConfig) GetProvider ¶
func (c *OpenAIConfig) GetProvider() TTSProvider
GetProvider returns the TTS provider type
type OpenAIRequest ¶
type OpenAIRequest struct {
Model string `json:"model"`
Input string `json:"input"`
Voice string `json:"voice"`
ResponseFormat string `json:"response_format,omitempty"`
Speed float64 `json:"speed,omitempty"`
}
OpenAIRequest OpenAI API 请求
type OpenAIService ¶
type OpenAIService struct {
// contains filtered or unexported fields
}
func NewOpenAIService ¶
func NewOpenAIService(opt OpenAIConfig) *OpenAIService
NewOpenAIService 创建 OpenAI TTS 服务
func (*OpenAIService) CacheKey ¶
func (os *OpenAIService) CacheKey(text string) string
func (*OpenAIService) Close ¶
func (os *OpenAIService) Close() error
func (*OpenAIService) Format ¶
func (os *OpenAIService) Format() media.StreamFormat
func (*OpenAIService) Provider ¶
func (os *OpenAIService) Provider() TTSProvider
func (*OpenAIService) Synthesize ¶
func (os *OpenAIService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type PlayRecord ¶
type PlayRecord struct {
// contains filtered or unexported fields
}
type QCloudService ¶
type QCloudService struct {
// contains filtered or unexported fields
}
func NewQCloudService ¶
func NewQCloudService(opt QCloudTTSConfig) *QCloudService
func (*QCloudService) CacheKey ¶
func (qs *QCloudService) CacheKey(text string) string
func (*QCloudService) Close ¶
func (qs *QCloudService) Close() error
func (*QCloudService) Format ¶
func (qs *QCloudService) Format() media.StreamFormat
func (*QCloudService) Provider ¶
func (qs *QCloudService) Provider() TTSProvider
func (*QCloudService) Synthesize ¶
func (qs *QCloudService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type QCloudTTSConfig ¶
type QCloudTTSConfig struct {
AppID int64 `json:"appId" yaml:"app_id" env:"QCLOUD_APP_ID"`
SecretID string `json:"secretId" yaml:"secret_id" env:"QCLOUD_SECRET_ID"`
SecretKey string `json:"secret" yaml:"secret" env:"QCLOUD_SECRET"`
VoiceType int64 `json:"voiceType" yaml:"voice_type" default:"1005"`
ModelType int64 `json:"modelType" yaml:"model_type" default:"1"`
Language string `json:"language" yaml:"language"` // 语言代码,如 zh-CN, en-US(腾讯云通过音色类型区分语言,此字段用于配置和缓存)
SampleRate int `json:"sampleRate" yaml:"sample_rate" default:"8000"`
Channels int `json:"channels" yaml:"channels" default:"1"`
BitDepth int `json:"bitDepth" yaml:"bit_depth" default:"16"`
Codec string `json:"codec" yaml:"codec" default:"pcm"`
FrameDuration string `json:"frameDuration" yaml:"frame_duration" default:"20ms"`
// Speed is Tencent TTS speed level (typically -2~6, 0 means default).
Speed int64 `json:"speed" yaml:"speed" default:"0"`
}
QCloudTTSConfig teccent tts config
func NewQcloudTTSConfig ¶
func (*QCloudTTSConfig) GetProvider ¶
func (c *QCloudTTSConfig) GetProvider() TTSProvider
GetProvider returns the TTS provider type
func (*QCloudTTSConfig) ToString ¶
func (opt *QCloudTTSConfig) ToString() string
type QiniuService ¶
type QiniuService struct {
// contains filtered or unexported fields
}
func NewQiniuService ¶
func NewQiniuService(opt QiniuTTSConfig) *QiniuService
NewQiniuService 创建七牛云TTS服务
func (*QiniuService) CacheKey ¶
func (qs *QiniuService) CacheKey(text string) string
func (*QiniuService) Close ¶
func (qs *QiniuService) Close() error
func (*QiniuService) Format ¶
func (qs *QiniuService) Format() media.StreamFormat
func (*QiniuService) Provider ¶
func (qs *QiniuService) Provider() TTSProvider
func (*QiniuService) Synthesize ¶
func (qs *QiniuService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type QiniuTTSConfig ¶
type QiniuTTSConfig struct {
APIKey string `json:"api_key" yaml:"api_key" env:"QINIU_TTS_API_KEY"`
BaseURL string `json:"base_url" yaml:"base_url" env:"QINIU_TTS_BASE_URL"`
VoiceType string `json:"voice_type" yaml:"voice_type" default:"female_cn_001"`
SampleRate int `json:"sample_rate" yaml:"sample_rate" default:"16000"`
Channels int `json:"channels" yaml:"channels" default:"1"`
BitDepth int `json:"bit_depth" yaml:"bit_depth" default:"16"`
Codec string `json:"codec" yaml:"codec" default:"pcm"`
FrameDuration string `json:"frame_duration" yaml:"frame_duration" default:"20ms"`
Timeout int `json:"timeout" yaml:"timeout" default:"30"`
Retries int `json:"retries" yaml:"retries" default:"0"`
}
QiniuTTSConfig 七牛云TTS配置
func NewQiniuTTSConfig ¶
func NewQiniuTTSConfig(apiKey, baseURL string) QiniuTTSConfig
NewQiniuTTSConfig 创建七牛云TTS配置
func (*QiniuTTSConfig) GetProvider ¶
func (c *QiniuTTSConfig) GetProvider() TTSProvider
GetProvider returns the TTS provider type
type QiniuTTSRequest ¶
type QiniuTTSRequest struct {
Audio TTSAudio `json:"audio"`
Request TTSRequestData `json:"request"`
}
QiniuTTSRequest 七牛云TTS请求结构
type QiniuTTSResponse ¶
type QiniuTTSResponse struct {
Reqid string `json:"reqid"`
Operation string `json:"operation"`
Sequence int `json:"sequence"`
Data string `json:"data"`
Addition *TTSAddition `json:"addition,omitempty"`
}
QiniuTTSResponse 七牛云TTS响应结构
type SentenceTimestamp ¶
type SentenceTimestamp struct {
Words []Word `json:"words"`
}
type SynthesisBuffer ¶
type SynthesisBuffer struct {
Data []byte
Timestamp SentenceTimestamp
}
func (*SynthesisBuffer) OnMessage ¶
func (s *SynthesisBuffer) OnMessage(data []byte)
func (*SynthesisBuffer) OnTimestamp ¶
func (s *SynthesisBuffer) OnTimestamp(timestamp SentenceTimestamp)
type SynthesisConfig ¶
type SynthesisConfig interface {
GetProvider() TTSProvider
}
SynthesisConfig 统一的TTS配置接口
type SynthesisFactory ¶
type SynthesisFactory interface {
// CreateEngine 根据配置创建 AudioSynthesisEngine
CreateEngine(config SynthesisConfig) (AudioSynthesisEngine, error)
// GetSupportedProviders 获取支持的提供商列表
GetSupportedProviders() []TTSProvider
// IsProviderSupported 检查提供商是否支持
IsProviderSupported(provider TTSProvider) bool
// RegisterCreator 注册创建函数
RegisterCreator(provider TTSProvider, creator func(SynthesisConfig) (AudioSynthesisEngine, error))
}
SynthesisFactory TTS工厂接口
func GetGlobalSynthesisFactory ¶
func GetGlobalSynthesisFactory() SynthesisFactory
GetGlobalSynthesisFactory 获取全局TTS工厂实例
type TTSAddition ¶
type TTSAddition struct {
Duration string `json:"duration"`
}
TTSAddition TTS附加信息
type TTSAudio ¶
type TTSAudio struct {
VoiceType string `json:"voice_type"`
Encoding string `json:"encoding"`
SpeedRatio float64 `json:"speed_ratio,omitempty"`
}
TTSAudio TTS音频配置
type TTSCredentialConfig ¶
type TTSCredentialConfig map[string]interface{}
TTSCredentialConfig TTS凭证配置结构(灵活的键值对配置)
type TTSProvider ¶
type TTSProvider string
TTSProvider TTS服务提供商类型
const ( // ProviderQiniu 七牛云TTS ProviderQiniu TTSProvider = "qiniu" // ProviderXunfei 讯飞TTS ProviderXunfei TTSProvider = "xunfei" // ProviderAliyun 阿里云TTS ProviderAliyun TTSProvider = "aliyun" // ProviderTencent 腾讯云TTS ProviderTencent TTSProvider = "qcloud" // ProviderBaidu 百度TTS ProviderBaidu TTSProvider = "baidu" // ProviderAzure 微软Azure TTS ProviderAzure TTSProvider = "azure" // ProviderGoogle Google Cloud TTS ProviderGoogle TTSProvider = "google" // ProviderAWS Amazon Polly TTS ProviderAWS TTSProvider = "aws" // ProviderOpenAI OpenAI TTS ProviderOpenAI TTSProvider = "openai" // ProviderElevenLabs ElevenLabs TTS ProviderElevenLabs TTSProvider = "elevenlabs" // ProviderLocal 本地TTS ProviderLocal TTSProvider = "local" // ProviderLocalGoSpeech 本地go-speech TTS ProviderLocalGoSpeech TTSProvider = "local_gospeech" // ProviderFishSpeech FishSpeech TTS ProviderFishSpeech TTSProvider = "fishspeech" // ProviderFishAudio Fish Audio TTS ProviderFishAudio TTSProvider = "fishaudio" // ProviderCoqui Coqui TTS ProviderCoqui TTSProvider = "coqui" // ProviderVolcengine 火山引擎标准TTS ProviderVolcengine TTSProvider = "volcengine" // ProviderMinimax Minimax TTS ProviderMinimax TTSProvider = "minimax" )
func (TTSProvider) ToString ¶
func (tp TTSProvider) ToString() string
type TTSRequestData ¶
type TTSRequestData struct {
Text string `json:"text"`
}
TTSRequestData TTS请求数据
type VolcAddition ¶
type VolcAddition struct {
Frontend string `json:"frontend"`
}
VolcAddition 火山引擎附加信息
type VolcengineService ¶
type VolcengineService struct {
// contains filtered or unexported fields
}
VolcengineService 火山引擎标准TTS服务
func NewVolcengineService ¶
func NewVolcengineService(opt VolcengineTTSOption) *VolcengineService
NewVolcengineService 创建火山引擎TTS服务
func (*VolcengineService) CacheKey ¶
func (v *VolcengineService) CacheKey(text string) string
func (*VolcengineService) Close ¶
func (v *VolcengineService) Close() error
func (*VolcengineService) Format ¶
func (v *VolcengineService) Format() media.StreamFormat
func (*VolcengineService) Provider ¶
func (v *VolcengineService) Provider() TTSProvider
func (*VolcengineService) Synthesize ¶
func (v *VolcengineService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type VolcengineTTSOption ¶
type VolcengineTTSOption struct {
AppID string `json:"appID"` // 应用ID
AccessToken string `json:"accessToken"` // 访问令牌
Cluster string `json:"cluster"` // 集群名称,如 volcano_tts
VoiceType string `json:"voiceType"` // 音色类型,如 BV700_streaming
Rate int `json:"rate"` // 采样率,默认 8000
Encoding string `json:"encoding"` // 编码格式,默认 pcm
SpeedRatio float32 `json:"speedRatio"` // 语速比例,默认 1.0
VolumeRatio float32 `json:"volumeRatio"` // 音量比例,默认 1.0
PitchRatio float32 `json:"pitchRatio"` // 音调比例,默认 1.0
Channels int `json:"channels"` // 声道数,默认 1
BitDepth int `json:"bitDepth"` // 位深度,默认 16
FrameDuration string `json:"frameDuration"` // 帧时长,默认 20ms
TextType string `json:"textType"` // 文本类型,plain 或 ssml
Ssml bool `json:"ssml"` // 是否使用 SSML
}
VolcengineTTSOption 火山引擎标准TTS配置 支持的常用音色类型(VoiceType): - BV700_streaming: 默认音色 - BV700_V2_streaming: V2版本 - BV213_streaming: 广西老表(男声) - BV025_streaming: 甜美台妹(女声) 更多音色类型请参考火山引擎官方文档
func NewVolcengineTTSOption ¶
func NewVolcengineTTSOption(appID, accessToken, cluster string) VolcengineTTSOption
NewVolcengineTTSOption 创建火山引擎TTS配置
func (*VolcengineTTSOption) GetProvider ¶
func (c *VolcengineTTSOption) GetProvider() TTSProvider
GetProvider returns the TTS provider type
type VolcengineTTSServResponse ¶
type VolcengineTTSServResponse struct {
ReqID string `json:"reqid"`
Code int `json:"code"`
Message string `json:"message"`
Operation string `json:"operation"`
Sequence int `json:"sequence"`
Data string `json:"data"`
Addition VolcAddition `json:"addition"`
}
VolcengineTTSServResponse 火山引擎TTS响应结构
type WSHeader ¶
type WSHeader struct {
AppID string `json:"app_id"`
Status int `json:"status"`
ResID string `json:"res_id"`
}
WSHeader WebSocket请求头
type WSPayload ¶
type WSPayload struct {
Text struct {
Encoding string `json:"encoding"`
Compress string `json:"compress"`
Format string `json:"format"`
Status int `json:"status"`
Seq int `json:"seq"`
Text string `json:"text"`
} `json:"text"`
}
WSPayload WebSocket载荷
type WSRequest ¶
type WSRequest struct {
Header WSHeader `json:"header"`
Parameter WSParameter `json:"parameter"`
Payload WSPayload `json:"payload"`
}
WSRequest WebSocket请求结构
type WSTTS ¶
type WSTTS struct {
Vcn string `json:"vcn"`
Volume int `json:"volume"`
Rhy int `json:"rhy"`
Pybuffer int `json:"pybuffer"`
Speed int `json:"speed"`
Pitch int `json:"pitch"`
Bgs int `json:"bgs"`
Reg int `json:"reg"`
Rdn int `json:"rdn"`
Audio WSAudio `json:"audio"`
}
WSTTS WebSocket TTS参数
type XunfeiService ¶
type XunfeiService struct {
// contains filtered or unexported fields
}
func NewXunfeiService ¶
func NewXunfeiService(opt XunfeiTTSConfig) *XunfeiService
NewXunfeiService 创建讯飞TTS服务
func (*XunfeiService) CacheKey ¶
func (xs *XunfeiService) CacheKey(text string) string
func (*XunfeiService) Close ¶
func (xs *XunfeiService) Close() error
func (*XunfeiService) Format ¶
func (xs *XunfeiService) Format() media.StreamFormat
func (*XunfeiService) Provider ¶
func (xs *XunfeiService) Provider() TTSProvider
func (*XunfeiService) Synthesize ¶
func (xs *XunfeiService) Synthesize(ctx context.Context, handler AudioSynthesisHandler, text string) error
type XunfeiTTSConfig ¶
type XunfeiTTSConfig struct {
AppID string `json:"app_id" yaml:"app_id" env:"XUNFEI_APP_ID"`
APIKey string `json:"api_key" yaml:"api_key" env:"XUNFEI_API_KEY"`
APISecret string `json:"api_secret" yaml:"api_secret" env:"XUNFEI_API_SECRET"`
SampleRate int `json:"sample_rate" yaml:"sample_rate" default:"24000"`
Channels int `json:"channels" yaml:"channels" default:"1"`
BitDepth int `json:"bit_depth" yaml:"bit_depth" default:"16"`
Codec string `json:"codec" yaml:"codec" default:"raw"`
FrameDuration string `json:"frame_duration" yaml:"frame_duration" default:"20ms"`
Timeout int `json:"timeout" yaml:"timeout" default:"30"`
}
XunfeiTTSConfig 讯飞TTS配置
func NewXunfeiTTSConfig ¶
func NewXunfeiTTSConfig(appID, apiKey, apiSecret string) XunfeiTTSConfig
NewXunfeiTTSConfig 创建讯飞TTS配置
func (*XunfeiTTSConfig) GetProvider ¶
func (c *XunfeiTTSConfig) GetProvider() TTSProvider
GetProvider returns the TTS provider type