asr

package

v1.0.8 Latest Latest Go to latest Published: Nov 18, 2022 License: Apache-2.0 Imports: 18 Imported by: 2

Documentation ¶

Index ¶

Constants
type FlashRecognitionRequest
type FlashRecognitionResponse
type FlashRecognitionResult
type FlashRecognitionSentence
type FlashRecognizer
- func NewFlashRecognizer(appID string, credential *common.Credential) *FlashRecognizer
- func (recognizer *FlashRecognizer) Recognize(req *FlashRecognitionRequest, videoData []byte) (*FlashRecognitionResponse, error)
type FlashWordData
type SpeechRecognitionListener
type SpeechRecognitionResponse
type SpeechRecognitionResponseResult
type SpeechRecognitionResponseResultWord
type SpeechRecognizer
- func NewSpeechRecognizer(appID string, credential *common.Credential, engineModelType string, ...) *SpeechRecognizer

Constants ¶

const (
	AudioFormatPCM   = 1
	AudioFormatSpeex = 4
	AudioFormatSilk  = 6
	AudioFormatMp3   = 8
	AudioFormatOpus  = 10
	AudioFormatWav   = 12
	AudioFormatM4A   = 14
	AudioFormatAAC   = 16
)

AudioFormat type

Variables ¶

This section is empty.

Functions ¶

This section is empty.

Types ¶

type FlashRecognitionRequest ¶ added in v1.0.3

type FlashRecognitionRequest struct {
	EngineType         string `json:"engine_type"`
	VoiceFormat        string `json:"voice_format"`
	SpeakerDiarization uint32 `json:"speaker_diarization"`
	HotwordId          string `json:"hotword_id"`
	CustomizationId    string `json:"customization_id"`
	FilterDirty        int32  `json:"filter_dirty"`
	FilterModal        int32  `json:"filter_modal"`
	FilterPunc         int32  `json:"filter_punc"`
	ConvertNumMode     int32  `json:"convert_num_mode"`
	WordInfo           int32  `json:"word_info"`
	FirstChannelOnly   int32  `json:"first_channel_only"`
}

FlashRecognitionRequest FlashRecognitionRequest

type FlashRecognitionResponse ¶ added in v1.0.3

type FlashRecognitionResponse struct {
	RequestId     string                    `json:"request_id"`
	Code          int                       `json:"code"`
	Message       string                    `json:"message"`
	AudioDuration int64                     `json:"audio_duration"`
	FlashResult   []*FlashRecognitionResult `json:"flash_result,omitempty"`
}

FlashRecognitionResponse FlashRecognitionResponse

type FlashRecognitionResult ¶ added in v1.0.3

type FlashRecognitionResult struct {
	Text         string                      `json:"text"`
	ChannelId    int32                       `json:"channel_id"`
	SentenceList []*FlashRecognitionSentence `json:"sentence_list,omitempty"`
}

FlashRecognitionResult FlashRecognitionResult

type FlashRecognitionSentence ¶ added in v1.0.3

type FlashRecognitionSentence struct {
	Text      string           `json:"text"`
	StartTime uint32           `json:"start_time"`
	EndTime   uint32           `json:"end_time"`
	SpeakerId int32            `json:"speaker_id"`
	WordList  []*FlashWordData `json:"word_list,omitempty"`
}

FlashRecognitionSentence FlashRecognitionSentence

type FlashRecognizer ¶ added in v1.0.3

type FlashRecognizer struct {
	AppID string

	//for proxy
	ProxyURL string

	Credential *common.Credential
}

FlashRecognizer is the entry for ASR flash recognizer

func NewFlashRecognizer ¶ added in v1.0.3

func NewFlashRecognizer(appID string, credential *common.Credential) *FlashRecognizer

NewFlashRecognizer creates instance of FlashRecognizer

func (*FlashRecognizer) Recognize ¶ added in v1.0.3

func (recognizer *FlashRecognizer) Recognize(req *FlashRecognitionRequest,
	videoData []byte) (*FlashRecognitionResponse, error)

Recognize Recognize

type FlashWordData ¶ added in v1.0.3

type FlashWordData struct {
	Word       string `json:"word"`
	StartTime  uint32 `json:"start_time"`
	EndTime    uint32 `json:"end_time"`
	StableFlag uint32 `json:"stable_flag"`
}

FlashWordData FlashWordData

type SpeechRecognitionListener ¶

type SpeechRecognitionListener interface {
	OnRecognitionStart(*SpeechRecognitionResponse)
	OnSentenceBegin(*SpeechRecognitionResponse)
	OnRecognitionResultChange(*SpeechRecognitionResponse)
	OnSentenceEnd(*SpeechRecognitionResponse)
	OnRecognitionComplete(*SpeechRecognitionResponse)
	OnFail(*SpeechRecognitionResponse, error)
}

SpeechRecognitionListener User must impletement it. Get recognition result

type SpeechRecognitionResponse ¶

type SpeechRecognitionResponse struct {
	Code      int                             `json:"code"`
	Message   string                          `json:"message"`
	VoiceID   string                          `json:"voice_id,omitempty"`
	MessageID string                          `json:"message_id,omitempty"`
	Final     uint32                          `json:"final,omitempty"`
	Result    SpeechRecognitionResponseResult `json:"result,omitempty"`
}

SpeechRecognitionResponse is the reponse of asr service

type SpeechRecognitionResponseResult ¶

type SpeechRecognitionResponseResult struct {
	SliceType    uint32                                `json:"slice_type"`
	Index        int                                   `json:"index"`
	StartTime    uint32                                `json:"start_time"`
	EndTime      uint32                                `json:"end_time"`
	VoiceTextStr string                                `json:"voice_text_str"`
	WordSize     uint32                                `json:"word_size"`
	WordList     []SpeechRecognitionResponseResultWord `json:"word_list"`
}

SpeechRecognitionResponseResult SpeechRecognitionResponseResult

type SpeechRecognitionResponseResultWord ¶

type SpeechRecognitionResponseResultWord struct {
	Word       string `json:"word"`
	StartTime  uint32 `json:"start_time"`
	EndTime    uint32 `json:"end_time"`
	StableFlag uint32 `json:"stable_flag"`
}

SpeechRecognitionResponseResultWord SpeechRecognitionResponseResultWord

type SpeechRecognizer ¶

type SpeechRecognizer struct {
	//request params
	AppID            string
	EngineModelType  string
	VoiceFormat      int
	NeedVad          int
	HotwordId        string
	CustomizationId  string
	FilterDirty      int
	FilterModal      int
	FilterPunc       int
	ConvertNumMode   int
	WordInfo         int
	VadSilenceTime   int
	ReinforceHotword int

	Credential *common.Credential

	//uuid for voice
	VoiceID string

	//for proxy
	ProxyURL string
	// contains filtered or unexported fields
}

SpeechRecognizer is the entry for ASR service

func NewSpeechRecognizer ¶

func NewSpeechRecognizer(appID string, credential *common.Credential, engineModelType string,
	listener SpeechRecognitionListener) *SpeechRecognizer

NewSpeechRecognizer creates instance of SpeechRecognizer

func (*SpeechRecognizer) Start ¶

func (recognizer *SpeechRecognizer) Start() error

Start connects to server and start a recognition session

func (*SpeechRecognizer) Stop ¶

func (recognizer *SpeechRecognizer) Stop() error

Stop wait for the recognition process to complete

func (*SpeechRecognizer) Write ¶

func (recognizer *SpeechRecognizer) Write(data []byte) error

Write : write data in channel

Source Files ¶

View all Source files

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL