common

package
Version: v1.15.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 27, 2021 License: MIT Imports: 2 Imported by: 1

Documentation

Overview

Package common contains the definitions for many of the shared objects and properties in the Speech SDK

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

This section is empty.

Types

type CancellationErrorCode

type CancellationErrorCode int

CancellationErrorCode defines error code in case that CancellationReason is Error.

const (

	// No error.
	// If CancellationReason is EndOfStream, CancellationErrorCode
	// is set to NoError.
	NoError CancellationErrorCode = 0

	// Indicates an authentication error.
	// An authentication error occurs if subscription key or authorization token is invalid, expired,
	// or does not match the region being used.
	AuthenticationFailure CancellationErrorCode = 1

	BadRequest CancellationErrorCode = 2

	// Indicates that the number of parallel requests exceeded the number of allowed concurrent transcriptions for the subscription.
	TooManyRequests CancellationErrorCode = 3

	// Indicates that the free subscription used by the request ran out of quota.
	Forbidden CancellationErrorCode = 4

	// Indicates a connection error.
	ConnectionFailure CancellationErrorCode = 5

	// Indicates a time-out error when waiting for response from service.
	ServiceTimeout CancellationErrorCode = 6

	// Indicates that an error is returned by the service.
	ServiceError CancellationErrorCode = 7

	// Indicates that the service is currently unavailable.
	ServiceUnavailable CancellationErrorCode = 8

	// Indicates an unexpected runtime error.
	RuntimeError CancellationErrorCode = 9
)

type CancellationReason

type CancellationReason int

CancellationReason defines the possible reasons a recognition result might be canceled.

const (
	// Indicates that an error occurred during speech recognition.
	Error CancellationReason = 1

	// Indicates that the end of the audio stream was reached.
	EndOfStream CancellationReason = 2
)

type CarbonError

type CarbonError struct {
	Code uintptr
}

func NewCarbonError

func NewCarbonError(code uintptr) CarbonError

func (CarbonError) Error

func (e CarbonError) Error() string

type OperationOutcome

type OperationOutcome struct {
	// Error is present (not nil) if the operation failed
	Error error
}

OperationOutcome is the base type of operation outcomes.

func (OperationOutcome) Failed

func (outcome OperationOutcome) Failed() bool

Failed checks if the operation failed

type OutputFormat

type OutputFormat int

OutputFormat Defines output formats

const (
	// Simple output format
	Simple OutputFormat = 0
	// Detailed output format
	Detailed OutputFormat = 1
)

type ProfanityOption

type ProfanityOption int

ProfanityOption defines the profanity option.

const (
	// Masked profanity option.
	Masked ProfanityOption = 0

	// Removed profanity option
	Removed ProfanityOption = 1

	// Raw profanity option
	Raw ProfanityOption = 2
)

type PropertyCollection

type PropertyCollection struct {
	// contains filtered or unexported fields
}

PropertyCollection is a class to retrieve or set a property value from a property collection.

func NewPropertyCollectionFromHandle

func NewPropertyCollectionFromHandle(handle SPXHandle) PropertyCollection

NewPropertyCollectionFromHandle creates a PropertyCollection from a handle (for internal use)

func (PropertyCollection) Close

func (properties PropertyCollection) Close()

Close disposes the associated resources.

func (PropertyCollection) GetProperty

func (properties PropertyCollection) GetProperty(id PropertyID, defaultValue string) string

GetProperty returns value of a property. If the property value is not defined, the specified default value is returned.

func (PropertyCollection) GetPropertyByString

func (properties PropertyCollection) GetPropertyByString(name string, defaultValue string) string

GetPropertyByString returns value of a property. If the property value is not defined, the specified default value is returned.

func (PropertyCollection) SetProperty

func (properties PropertyCollection) SetProperty(id PropertyID, value string) error

SetProperty sets the value of a property.

func (PropertyCollection) SetPropertyByString

func (properties PropertyCollection) SetPropertyByString(name string, value string) error

SetPropertyByString sets the value of a property.

type PropertyID

type PropertyID int

PropertyID defines speech property ids. Changed in version 1.4.0.

const (
	// SpeechServiceConnectionKey is the Cognitive Services Speech Service subscription key. If you are using an
	// intent recognizer, you need to specify the LUIS endpoint key for your particular LUIS app. Under normal
	// circumstances, you shouldn't have to use this property directly.
	// Instead, use NewSpeechConfigFromSubscription.
	SpeechServiceConnectionKey PropertyID = 1000

	// SpeechServiceConnectionEndpoint is the Cognitive Services Speech Service endpoint (url).
	// Under normal circumstances, you shouldn't have to use this property directly.
	// Instead, use NewSpeechConfigFromEndpoint.
	// NOTE: This endpoint is not the same as the endpoint used to obtain an access token.
	SpeechServiceConnectionEndpoint PropertyID = 1001

	// SpeechServiceConnectionRegion is the Cognitive Services Speech Service region. Under normal circumstances,
	// you shouldn't have to use this property directly.
	// Instead, use NewSpeechConfigFromSubscription, NewSpeechConfigFromEndpoint, NewSpeechConfigFromHost,
	// NewSpeechConfigFromAuthorizationToken.
	SpeechServiceConnectionRegion PropertyID = 1002

	// SpeechServiceAuthorizationToken is the Cognitive Services Speech Service authorization token (aka access token).
	// Under normal circumstances, you shouldn't have to use this property directly.
	// Instead, use NewSpeechConfigFromAuthorizationToken,
	// Recognizer.SetAuthorizationToken
	SpeechServiceAuthorizationToken PropertyID = 1003

	// SpeechServiceAuthorizationType is the Cognitive Services Speech Service authorization type. Currently unused.
	SpeechServiceAuthorizationType PropertyID = 1004

	// SpeechServiceConnectionEndpointID is the Cognitive Services Custom Speech Service endpoint id. Under normal
	// circumstances, you shouldn't have to use this property directly.
	// Instead use SpeechConfig.SetEndpointId.
	// NOTE: The endpoint id is available in the Custom Speech Portal, listed under Endpoint Details.
	SpeechServiceConnectionEndpointID PropertyID = 1005

	// SpeechServiceConnectionHost is the Cognitive Services Speech Service host (url). Under normal circumstances,
	// you shouldn't have to use this property directly.
	// Instead, use NewSpeechConfigFromHost.
	SpeechServiceConnectionHost PropertyID = 1006

	// SpeechServiceConnectionProxyHostName is the host name of the proxy server used to connect to the Cognitive Services
	// Speech Service. Under normal circumstances, you shouldn't have to use this property directly.
	// Instead, use SpeechConfig.SetProxy.
	SpeechServiceConnectionProxyHostName PropertyID = 1100

	// SpeechServiceConnectionProxyPort is the port of the proxy server used to connect to the Cognitive Services Speech
	// Service. Under normal circumstances, you shouldn't have to use this property directly.
	// Instead, use SpeechConfig.SetProxy.
	SpeechServiceConnectionProxyPort PropertyID = 1101

	// SpeechServiceConnectionProxyUserName is the user name of the proxy server used to connect to the Cognitive Services
	// Speech Service. Under normal circumstances, you shouldn't have to use this property directly.
	// Instead, use SpeechConfig.SetProxy.
	SpeechServiceConnectionProxyUserName PropertyID = 1102

	// SpeechServiceConnectionProxyPassword is the password of the proxy server used to connect to the Cognitive Services
	// Speech Service. Under normal circumstances, you shouldn't have to use this property directly.
	// Instead, use SpeechConfig.SetProxy.
	SpeechServiceConnectionProxyPassword PropertyID = 1103

	// SpeechServiceConnectionURL is the URL string built from speech configuration. This property is intended to be read-only.
	// The SDK is using it internally.
	SpeechServiceConnectionURL PropertyID = 1104

	// SpeechServiceConnectionTranslationToLanguages is the list of comma separated languages used as target translation
	// languages. Under normal circumstances, you shouldn't have to use this property directly.
	// Instead use SpeechTranslationConfig.AddTargetLanguage and SpeechTranslationConfig.GetTargetLanguages.
	SpeechServiceConnectionTranslationToLanguages PropertyID = 2000

	// SpeechServiceConnectionTranslationVoice is the name of the Cognitive Service Text to Speech Service voice. Under normal
	// circumstances, you shouldn't have to use this property directly.
	// Instead use SpeechTranslationConfig.SetVoiceName.
	// NOTE: Valid voice names can be found at https://aka.ms/csspeech/voicenames.
	SpeechServiceConnectionTranslationVoice PropertyID = 2001

	// SpeechServiceConnectionTranslationFeatures is the translation features. For internal use.
	SpeechServiceConnectionTranslationFeatures PropertyID = 2002

	// SpeechServiceConnectionIntentRegion is the Language Understanding Service region. Under normal circumstances, you
	// shouldn't have to use this property directly.
	// Instead use LanguageUnderstandingModel.
	SpeechServiceConnectionIntentRegion PropertyID = 2003

	// SpeechServiceConnectionRecoMode is the Cognitive Services Speech Service recognition mode. Can be "INTERACTIVE",
	// "CONVERSATION" or "DICTATION".
	// This property is intended to be read-only. The SDK is using it internally.
	SpeechServiceConnectionRecoMode PropertyID = 3000

	// SpeechServiceConnectionRecoLanguage is the spoken language to be recognized (in BCP-47 format). Under normal
	// circumstances, you shouldn't have to use this property directly.
	// Instead, use SpeechConfig.SetSpeechRecognitionLanguage.
	SpeechServiceConnectionRecoLanguage PropertyID = 3001

	// SpeechSessionID is the session id. This id is a universally unique identifier (aka UUID) representing a specific
	// binding of an audio input stream and the underlying speech recognition instance to which it is bound. Under normal
	// circumstances, you shouldn't have to use this property directly.
	/// Instead use SessionEventArgs.SessionId.
	SpeechSessionID PropertyID = 3002

	// SpeechServiceConnectionUserDefinedQueryParameters are the query parameters provided by users. They will be passed
	// to the service as URL query parameters.
	SpeechServiceConnectionUserDefinedQueryParameters PropertyID = 3003

	// SpeechServiceConnectionSynthLanguage is the spoken language to be synthesized (e.g. en-US)
	SpeechServiceConnectionSynthLanguage PropertyID = 3100

	// SpeechServiceConnectionSynthVoice is the name of the TTS voice to be used for speech synthesis
	SpeechServiceConnectionSynthVoice PropertyID = 3101

	// SpeechServiceConnectionSynthOutputFormat is the string to specify TTS output audio format.
	SpeechServiceConnectionSynthOutputFormat PropertyID = 3102

	// SpeechServiceConnectionInitialSilenceTimeoutMs is the initial silence timeout value (in milliseconds) used by the
	// service.
	SpeechServiceConnectionInitialSilenceTimeoutMs PropertyID = 3200

	// SpeechServiceConnectionEndSilenceTimeoutMs is the end silence timeout value (in milliseconds) used by the service.
	SpeechServiceConnectionEndSilenceTimeoutMs PropertyID = 3201

	// SpeechServiceConnectionEnableAudioLogging is a boolean value specifying whether audio logging is enabled in the service
	// or not.
	SpeechServiceConnectionEnableAudioLogging PropertyID = 3202

	// SpeechServiceConnectionAutoDetectSourceLanguages is the auto detect source languages.
	SpeechServiceConnectionAutoDetectSourceLanguages PropertyID = 3300

	// SpeechServiceConnectionAutoDetectSourceLanguageResult is the auto detect source language result.
	SpeechServiceConnectionAutoDetectSourceLanguageResult PropertyID = 3301

	// SpeechServiceResponseRequestDetailedResultTrueFalse the requested Cognitive Services Speech Service response output
	// format (simple or detailed). Under normal circumstances, you shouldn't have to use this property directly.
	// Instead use SpeechConfig.SetOutputFormat.
	SpeechServiceResponseRequestDetailedResultTrueFalse PropertyID = 4000

	// SpeechServiceResponseRequestProfanityFilterTrueFalse is the requested Cognitive Services Speech Service response
	// output profanity level. Currently unused.
	SpeechServiceResponseRequestProfanityFilterTrueFalse PropertyID = 4001

	// SpeechServiceResponseProfanityOption is the requested Cognitive Services Speech Service response output profanity
	// setting.
	// Allowed values are "masked", "removed", and "raw".
	SpeechServiceResponseProfanityOption PropertyID = 4002

	// SpeechServiceResponsePostProcessingOption a string value specifying which post processing option should be used
	// by the service.
	// Allowed values are "TrueText".
	SpeechServiceResponsePostProcessingOption PropertyID = 4003

	// SpeechServiceResponseRequestWordLevelTimestamps is a boolean value specifying whether to include word-level
	// timestamps in the response result.
	SpeechServiceResponseRequestWordLevelTimestamps PropertyID = 4004

	// SpeechServiceResponseStablePartialResultThreshold is the number of times a word has to be in partial results
	// to be returned.
	SpeechServiceResponseStablePartialResultThreshold PropertyID = 4005

	// SpeechServiceResponseOutputFormatOption is a string value specifying the output format option in the response
	// result. Internal use only.
	SpeechServiceResponseOutputFormatOption PropertyID = 4006

	// SpeechServiceResponseTranslationRequestStablePartialResult is a boolean value to request for stabilizing translation
	// partial results by omitting words in the end.
	SpeechServiceResponseTranslationRequestStablePartialResult PropertyID = 4100

	// SpeechServiceResponseJSONResult is the Cognitive Services Speech Service response output (in JSON format). This
	// property is available on recognition result objects only.
	SpeechServiceResponseJSONResult PropertyID = 5000

	// SpeechServiceResponseJSONErrorDetails is the Cognitive Services Speech Service error details (in JSON format).
	// Under normal circumstances, you shouldn't have to use this property directly.
	// Instead, use CancellationDetails.ErrorDetails.
	SpeechServiceResponseJSONErrorDetails PropertyID = 5001

	// SpeechServiceResponseRecognitionLatencyMs is the recognition latency in milliseconds. Read-only, available on final
	// speech/translation/intent results. This measures the latency between when an audio input is received by the SDK, and
	// the moment the final result is received from the service. The SDK computes the time difference between the last audio
	// fragment from the audio input that is contributing to the final result, and the time the final result is received from
	// the speech service.
	SpeechServiceResponseRecognitionLatencyMs PropertyID = 5002

	// CancellationDetailsReason is the cancellation reason. Currently unused.
	CancellationDetailsReason PropertyID = 6000

	// CancellationDetailsReasonText the cancellation text. Currently unused.
	CancellationDetailsReasonText PropertyID = 6001

	// CancellationDetailsReasonDetailedText is the cancellation detailed text. Currently unused.
	CancellationDetailsReasonDetailedText PropertyID = 6002

	// LanguageUnderstandingServiceResponseJSONResult is the Language Understanding Service response output (in JSON format).
	// Available via IntentRecognitionResult.Properties.
	LanguageUnderstandingServiceResponseJSONResult PropertyID = 7000

	// AudioConfigDeviceNameForCapture is the device name for audio capture. Under normal circumstances, you shouldn't have
	// to use this property directly.
	// Instead, use AudioConfig.FromMicrophoneInput.
	AudioConfigDeviceNameForCapture PropertyID = 8000

	// AudioConfigNumberOfChannelsForCapture is the number of channels for audio capture. Internal use only.
	AudioConfigNumberOfChannelsForCapture PropertyID = 8001

	// AudioConfigSampleRateForCapture is the sample rate (in Hz) for audio capture. Internal use only.
	AudioConfigSampleRateForCapture PropertyID = 8002

	// AudioConfigBitsPerSampleForCapture is the number of bits of each sample for audio capture. Internal use only.
	AudioConfigBitsPerSampleForCapture PropertyID = 8003

	// AudioConfigAudioSource is the audio source. Allowed values are "Microphones", "File", and "Stream".
	AudioConfigAudioSource PropertyID = 8004

	// SpeechLogFilename is the file name to write logs.
	SpeechLogFilename PropertyID = 9001

	// ConversationApplicationID is the identifier used to connect to the backend service.
	ConversationApplicationID PropertyID = 10000

	// ConversationDialogType is the type of dialog backend to connect to.
	ConversationDialogType PropertyID = 10001

	// ConversationInitialSilenceTimeout is the silence timeout for listening.
	ConversationInitialSilenceTimeout PropertyID = 10002

	// ConversationFromID is the FromId to be used on speech recognition activities.
	ConversationFromID PropertyID = 10003

	// ConversationConversationID is the ConversationId for the session.
	ConversationConversationID PropertyID = 10004

	// ConversationCustomVoiceDeploymentIDs is a comma separated list of custom voice deployment ids.
	ConversationCustomVoiceDeploymentIDs PropertyID = 10005

	// ConversationSpeechActivityTemplate is use to stamp properties in the template on the activity generated by the service for speech.
	ConversationSpeechActivityTemplate PropertyID = 10006

	// DataBufferTimeStamp is the time stamp associated to data buffer written by client when using Pull/Push
	// audio input streams.
	// The time stamp is a 64-bit value with a resolution of 90 kHz. It is the same as the presentation timestamp
	// in an MPEG transport stream. See https://en.wikipedia.org/wiki/Presentation_timestamp
	DataBufferTimeStamp PropertyID = 11001

	// DataBufferUserID is the user id associated to data buffer written by client when using Pull/Push audio
	// input streams.
	DataBufferUserID PropertyID = 11002
)

type ResultReason

type ResultReason int

ResultReason specifies the possible reasons a recognition result might be generated.

const (
	// NoMatch indicates speech could not be recognized. More details can be found in the NoMatchDetails object.
	NoMatch ResultReason = 0

	// Canceled indicates that the recognition was canceled. More details can be found using the CancellationDetails object.
	Canceled ResultReason = 1

	// RecognizingSpeech indicates the speech result contains hypothesis text.
	RecognizingSpeech ResultReason = 2

	// RecognizedSpeech indicates the speech result contains final text that has been recognized.
	// Speech Recognition is now complete for this phrase.
	RecognizedSpeech ResultReason = 3

	// RecognizingIntent indicates the intent result contains hypothesis text and intent.
	RecognizingIntent ResultReason = 4

	// RecognizedIntent indicates the intent result contains final text and intent.
	// Speech Recognition and Intent determination are now complete for this phrase.
	RecognizedIntent ResultReason = 5

	// TranslatingSpeech indicates the translation result contains hypothesis text and its translation(s).
	TranslatingSpeech ResultReason = 6

	// TranslatedSpeech indicates the translation result contains final text and corresponding translation(s).
	// Speech Recognition and Translation are now complete for this phrase.
	TranslatedSpeech ResultReason = 7

	// SynthesizingAudio indicates the synthesized audio result contains a non-zero amount of audio data
	SynthesizingAudio ResultReason = 8

	// SynthesizingAudioCompleted indicates the synthesized audio is now complete for this phrase.
	SynthesizingAudioCompleted ResultReason = 9

	// RecognizingKeyword indicates the speech result contains (unverified) keyword text.
	RecognizingKeyword ResultReason = 10

	// RecognizedKeyword indicates that keyword recognition completed recognizing the given keyword.
	RecognizedKeyword ResultReason = 11

	// SynthesizingAudioStarted indicates the speech synthesis is now started
	SynthesizingAudioStarted ResultReason = 12
)

type SPXHandle

type SPXHandle uintptr

SPXHandle is the internal handle type

type ServicePropertyChannel

type ServicePropertyChannel int

ServicePropertyChannel defines channels used to pass property settings to service.

const (
	// URIQueryParameter uses URI query parameter to pass property settings to service.
	URIQueryParameter ServicePropertyChannel = 0
)

type SpeechSynthesisOutputFormat

type SpeechSynthesisOutputFormat int

SpeechSynthesisOutputFormat defines the possible speech synthesis output audio formats.

const (
	// Raw8Khz8BitMonoMULaw stands for raw-8khz-8bit-mono-mulaw
	Raw8Khz8BitMonoMULaw SpeechSynthesisOutputFormat = 1

	// Riff16Khz16KbpsMonoSiren stands for riff-16khz-16kbps-mono-siren
	Riff16Khz16KbpsMonoSiren SpeechSynthesisOutputFormat = 2

	// Audio16Khz16KbpsMonoSiren stands for audio-16khz-16kbps-mono-siren
	Audio16Khz16KbpsMonoSiren SpeechSynthesisOutputFormat = 3

	// Audio16Khz32KBitRateMonoMp3 stands for audio-16khz-32kbitrate-mono-mp3
	Audio16Khz32KBitRateMonoMp3 SpeechSynthesisOutputFormat = 4

	// Audio16Khz128KBitRateMonoMp3 stands for audio-16khz-128kbitrate-mono-mp3
	Audio16Khz128KBitRateMonoMp3 SpeechSynthesisOutputFormat = 5

	// Audio16Khz64KBitRateMonoMp3 stands for audio-16khz-64kbitrate-mono-mp3
	Audio16Khz64KBitRateMonoMp3 SpeechSynthesisOutputFormat = 6

	// Audio24Khz48KBitRateMonoMp3 stands for audio-24khz-48kbitrate-mono-mp3
	Audio24Khz48KBitRateMonoMp3 SpeechSynthesisOutputFormat = 7

	// Audio24Khz96KBitRateMonoMp3 stands for audio-24khz-96kbitrate-mono-mp3
	Audio24Khz96KBitRateMonoMp3 SpeechSynthesisOutputFormat = 8

	// Audio24Khz160KBitRateMonoMp3 stands for audio-24khz-160kbitrate-mono-mp3
	Audio24Khz160KBitRateMonoMp3 SpeechSynthesisOutputFormat = 9

	// Raw16Khz16BitMonoTrueSilk stands for raw-16khz-16bit-mono-truesilk
	Raw16Khz16BitMonoTrueSilk SpeechSynthesisOutputFormat = 10

	// Riff16Khz16BitMonoPcm stands for riff-16khz-16bit-mono-pcm
	Riff16Khz16BitMonoPcm SpeechSynthesisOutputFormat = 11

	// Riff8Khz16BitMonoPcm stands for riff-8khz-16bit-mono-pcm
	Riff8Khz16BitMonoPcm SpeechSynthesisOutputFormat = 12

	// Riff24Khz16BitMonoPcm stands for riff-24khz-16bit-mono-pcm
	Riff24Khz16BitMonoPcm SpeechSynthesisOutputFormat = 13

	// Riff8Khz8BitMonoMULaw stands for riff-8khz-8bit-mono-mulaw
	Riff8Khz8BitMonoMULaw SpeechSynthesisOutputFormat = 14

	// Raw16Khz16BitMonoPcm stands for raw-16khz-16bit-mono-pcm
	Raw16Khz16BitMonoPcm SpeechSynthesisOutputFormat = 15

	// Raw24Khz16BitMonoPcm stands for raw-24khz-16bit-mono-pcm
	Raw24Khz16BitMonoPcm SpeechSynthesisOutputFormat = 16

	// Raw8Khz16BitMonoPcm stands for raw-8khz-16bit-mono-pcm
	Raw8Khz16BitMonoPcm SpeechSynthesisOutputFormat = 17
)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL