asr

package
v0.1.2 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Feb 8, 2023 License: Apache-2.0 Imports: 18 Imported by: 0

Documentation

Index

Constants

View Source
const (
	AudioFormatPCM   = 1
	AudioFormatSpeex = 4
	AudioFormatSilk  = 6
	AudioFormatMp3   = 8
	AudioFormatOpus  = 10
	AudioFormatWav   = 12
	AudioFormatM4A   = 14
	AudioFormatAAC   = 16
)

AudioFormat type

Variables

This section is empty.

Functions

This section is empty.

Types

type FlashRecognitionRequest

type FlashRecognitionRequest struct {
	EngineType         string `json:"engine_type"`
	VoiceFormat        string `json:"voice_format"`
	SpeakerDiarization uint32 `json:"speaker_diarization"`
	HotwordId          string `json:"hotword_id"`
	CustomizationId    string `json:"customization_id"`
	FilterDirty        int32  `json:"filter_dirty"`
	FilterModal        int32  `json:"filter_modal"`
	FilterPunc         int32  `json:"filter_punc"`
	ConvertNumMode     int32  `json:"convert_num_mode"`
	WordInfo           int32  `json:"word_info"`
	FirstChannelOnly   int32  `json:"first_channel_only"`
	ReinforceHotword   int32  `json:"reinforce_hotword"`
	SentenceMaxLength  int32  `json:"sentence_max_length"`
}

FlashRecognitionRequest FlashRecognitionRequest

type FlashRecognitionResponse

type FlashRecognitionResponse struct {
	RequestId     string                    `json:"request_id"`
	Code          int                       `json:"code"`
	Message       string                    `json:"message"`
	AudioDuration int64                     `json:"audio_duration"`
	FlashResult   []*FlashRecognitionResult `json:"flash_result,omitempty"`
}

FlashRecognitionResponse FlashRecognitionResponse

type FlashRecognitionResult

type FlashRecognitionResult struct {
	Text         string                      `json:"text"`
	ChannelId    int32                       `json:"channel_id"`
	SentenceList []*FlashRecognitionSentence `json:"sentence_list,omitempty"`
}

FlashRecognitionResult FlashRecognitionResult

type FlashRecognitionSentence

type FlashRecognitionSentence struct {
	Text      string           `json:"text"`
	StartTime uint32           `json:"start_time"`
	EndTime   uint32           `json:"end_time"`
	SpeakerId int32            `json:"speaker_id"`
	WordList  []*FlashWordData `json:"word_list,omitempty"`
}

FlashRecognitionSentence FlashRecognitionSentence

type FlashRecognizer

type FlashRecognizer struct {
	AppID string

	//for proxy
	ProxyURL string

	Credential *common.Credential
}

FlashRecognizer is the entry for ASR flash recognizer

func NewFlashRecognizer

func NewFlashRecognizer(appID string, credential *common.Credential) *FlashRecognizer

NewFlashRecognizer creates instance of FlashRecognizer

func (*FlashRecognizer) Recognize

func (recognizer *FlashRecognizer) Recognize(req *FlashRecognitionRequest,
	videoData []byte) (*FlashRecognitionResponse, error)

Recognize Recognize

type FlashWordData

type FlashWordData struct {
	Word       string `json:"word"`
	StartTime  uint32 `json:"start_time"`
	EndTime    uint32 `json:"end_time"`
	StableFlag uint32 `json:"stable_flag"`
}

FlashWordData FlashWordData

type SpeechRecognitionListener

type SpeechRecognitionListener interface {
	OnRecognitionStart(*SpeechRecognitionResponse)
	OnSentenceBegin(*SpeechRecognitionResponse)
	OnRecognitionResultChange(*SpeechRecognitionResponse)
	OnSentenceEnd(*SpeechRecognitionResponse)
	OnRecognitionComplete(*SpeechRecognitionResponse)
	OnFail(*SpeechRecognitionResponse, error)
}

SpeechRecognitionListener User must impletement it. Get recognition result

type SpeechRecognitionResponse

type SpeechRecognitionResponse struct {
	Code      int                             `json:"code"`
	Message   string                          `json:"message"`
	VoiceID   string                          `json:"voice_id,omitempty"`
	MessageID string                          `json:"message_id,omitempty"`
	Final     uint32                          `json:"final,omitempty"`
	Result    SpeechRecognitionResponseResult `json:"result,omitempty"`
}

SpeechRecognitionResponse is the reponse of asr service

type SpeechRecognitionResponseResult

type SpeechRecognitionResponseResult struct {
	SliceType    uint32                                `json:"slice_type"`
	Index        int                                   `json:"index"`
	StartTime    uint32                                `json:"start_time"`
	EndTime      uint32                                `json:"end_time"`
	VoiceTextStr string                                `json:"voice_text_str"`
	WordSize     uint32                                `json:"word_size"`
	WordList     []SpeechRecognitionResponseResultWord `json:"word_list"`
}

SpeechRecognitionResponseResult SpeechRecognitionResponseResult

type SpeechRecognitionResponseResultWord

type SpeechRecognitionResponseResultWord struct {
	Word       string `json:"word"`
	StartTime  uint32 `json:"start_time"`
	EndTime    uint32 `json:"end_time"`
	StableFlag uint32 `json:"stable_flag"`
}

SpeechRecognitionResponseResultWord SpeechRecognitionResponseResultWord

type SpeechRecognizer

type SpeechRecognizer struct {
	//request params
	AppID             string
	EngineModelType   string
	VoiceFormat       int
	NeedVad           int
	HotwordId         string
	CustomizationId   string
	FilterDirty       int
	FilterModal       int
	FilterPunc        int
	ConvertNumMode    int
	WordInfo          int
	VadSilenceTime    int
	ReinforceHotword  int
	NoiseThreshold    int
	FilterEmptyResult int

	Credential *common.Credential

	//uuid for voice
	VoiceID string

	//for proxy
	ProxyURL string
	// contains filtered or unexported fields
}

SpeechRecognizer is the entry for ASR service

func NewSpeechRecognizer

func NewSpeechRecognizer(appID string, credential *common.Credential, engineModelType string,
	listener SpeechRecognitionListener) *SpeechRecognizer

NewSpeechRecognizer creates instance of SpeechRecognizer

func (*SpeechRecognizer) SetHost

func (recognizer *SpeechRecognizer) SetHost(host string)

SetHost 设置请求host地址

func (*SpeechRecognizer) Start

func (recognizer *SpeechRecognizer) Start() error

Start connects to server and start a recognition session

func (*SpeechRecognizer) Stop

func (recognizer *SpeechRecognizer) Stop() error

Stop wait for the recognition process to complete

func (*SpeechRecognizer) Write

func (recognizer *SpeechRecognizer) Write(data []byte) error

Write : write data in channel

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL