sherpa_onnx

package module

v1.9.21 Latest Latest Go to latest Published: Apr 17, 2024 License: Apache-2.0 Imports: 2 Imported by: 0

Details

Valid go.mod file

The Go module system was introduced in Go 1.11 and is the official dependency management solution for Go.
Redistributable license

Redistributable licenses place minimal restrictions on how software can be used, modified, and redistributed.
Tagged version

Modules with tagged versions give importers more predictable builds.
Stable version

When a project reaches major version v1 it is considered stable.
Learn more about best practices

Repository

github.com/k2-fsa/sherpa-onnx-go-macos

Links

Open Source Insights

README ¶

Introduction

This repo contains the Go package of sherpa-onnx for macOS, supporting x86_64 (Intel chips) as well as aarch64 (Apple Silicon, e.g., M1).

Documentation ¶

Rendered for

Overview ¶

Speech recognition with Next-gen Kaldi.

sherpa-onnx is an open-source speech recognition framework for Next-gen Kaldi. It depends only on onnxruntime, supporting both streaming and non-streaming speech recognition.

It does not need to access the network during recognition and everything runs locally.

It supports a variety of platforms, such as Linux (x86_64, aarch64, arm), Windows (x86_64, x86), macOS (x86_64, arm64), etc.

Usage examples:

Real-time speech recognition from a microphone
Please see https://github.com/k2-fsa/sherpa-onnx/tree/master/go-api-examples/real-time-speech-recognition-from-microphone
Decode files using a non-streaming model
Please see https://github.com/k2-fsa/sherpa-onnx/tree/master/go-api-examples/non-streaming-decode-files
Decode files using a streaming model
Please see https://github.com/k2-fsa/sherpa-onnx/tree/master/go-api-examples/streaming-decode-files
Convert text to speech using a non-streaming model
Please see https://github.com/k2-fsa/sherpa-onnx/tree/master/go-api-examples/non-streaming-tts

Index ¶

func DeleteCircularBuffer(buffer *CircularBuffer)
func DeleteOfflineRecognizer(recognizer *OfflineRecognizer)
func DeleteOfflineStream(stream *OfflineStream)
func DeleteOfflineTts(tts *OfflineTts)
func DeleteOnlineRecognizer(recognizer *OnlineRecognizer)
func DeleteOnlineStream(stream *OnlineStream)
func DeleteSpeakerEmbeddingExtractor(ex *SpeakerEmbeddingExtractor)
func DeleteSpeakerEmbeddingManager(m *SpeakerEmbeddingManager)
func DeleteSpokenLanguageIdentification(slid *SpokenLanguageIdentification)
func DeleteVoiceActivityDetector(vad *VoiceActivityDetector)
type CircularBuffer
- func NewCircularBuffer(capacity int) *CircularBuffer
- func (buffer *CircularBuffer) Get(start int, n int) []float32
- func (buffer *CircularBuffer) Head() int
- func (buffer *CircularBuffer) Pop(n int)
- func (buffer *CircularBuffer) Push(samples []float32)
- func (buffer *CircularBuffer) Reset()
- func (buffer *CircularBuffer) Size() int
type FeatureConfig
type GeneratedAudio
- func (audio *GeneratedAudio) Save(filename string) bool
type OfflineLMConfig
type OfflineModelConfig
type OfflineNemoEncDecCtcModelConfig
type OfflineParaformerModelConfig
type OfflineRecognizer
- func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer
- func (recognizer *OfflineRecognizer) Decode(s *OfflineStream)
- func (recognizer *OfflineRecognizer) DecodeStreams(s []*OfflineStream)
type OfflineRecognizerConfig
type OfflineRecognizerResult
type OfflineStream
- func NewOfflineStream(recognizer *OfflineRecognizer) *OfflineStream
- func (s *OfflineStream) AcceptWaveform(sampleRate int, samples []float32)
- func (s *OfflineStream) GetResult() *OfflineRecognizerResult
type OfflineTdnnModelConfig
type OfflineTransducerModelConfig
type OfflineTts
- func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts
- func (tts *OfflineTts) Generate(text string, sid int, speed float32) *GeneratedAudio
type OfflineTtsConfig
type OfflineTtsModelConfig
type OfflineTtsVitsModelConfig
type OfflineWhisperModelConfig
type OnlineCtcFstDecoderConfig
type OnlineModelConfig
type OnlineParaformerModelConfig
type OnlineRecognizer
- func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer
- func (recognizer *OnlineRecognizer) Decode(s *OnlineStream)
- func (recognizer *OnlineRecognizer) DecodeStreams(s []*OnlineStream)
- func (recognizer *OnlineRecognizer) GetResult(s *OnlineStream) *OnlineRecognizerResult
- func (recognizer *OnlineRecognizer) IsEndpoint(s *OnlineStream) bool
- func (recognizer *OnlineRecognizer) IsReady(s *OnlineStream) bool
- func (recognizer *OnlineRecognizer) Reset(s *OnlineStream)
type OnlineRecognizerConfig
type OnlineRecognizerResult
type OnlineStream
- func NewOnlineStream(recognizer *OnlineRecognizer) *OnlineStream
- func (s *OnlineStream) AcceptWaveform(sampleRate int, samples []float32)
- func (s *OnlineStream) InputFinished()
type OnlineTransducerModelConfig
type OnlineZipformer2CtcModelConfig
type SileroVadModelConfig
type SpeakerEmbeddingExtractor
- func NewSpeakerEmbeddingExtractor(config *SpeakerEmbeddingExtractorConfig) *SpeakerEmbeddingExtractor
- func (ex *SpeakerEmbeddingExtractor) Compute(stream *OnlineStream) []float32
- func (ex *SpeakerEmbeddingExtractor) CreateStream() *OnlineStream
- func (ex *SpeakerEmbeddingExtractor) Dim() int
- func (ex *SpeakerEmbeddingExtractor) IsReady(stream *OnlineStream) bool
type SpeakerEmbeddingExtractorConfig
type SpeakerEmbeddingManager
- func NewSpeakerEmbeddingManager(dim int) *SpeakerEmbeddingManager
- func (m *SpeakerEmbeddingManager) AllSpeakers() []string
- func (m *SpeakerEmbeddingManager) Contains(name string) bool
- func (m *SpeakerEmbeddingManager) NumSpeakers() int
- func (m *SpeakerEmbeddingManager) Register(name string, embedding []float32) bool
- func (m *SpeakerEmbeddingManager) RegisterV(name string, embeddings [][]float32) bool
- func (m *SpeakerEmbeddingManager) Remove(name string) bool
- func (m *SpeakerEmbeddingManager) Search(embedding []float32, threshold float32) string
- func (m *SpeakerEmbeddingManager) Verify(name string, embedding []float32, threshold float32) bool
type SpeechSegment
type SpokenLanguageIdentification
- func NewSpokenLanguageIdentification(config *SpokenLanguageIdentificationConfig) *SpokenLanguageIdentification
- func (slid *SpokenLanguageIdentification) Compute(stream *OfflineStream) *SpokenLanguageIdentificationResult
- func (slid *SpokenLanguageIdentification) CreateStream() *OfflineStream
type SpokenLanguageIdentificationConfig
type SpokenLanguageIdentificationResult
type SpokenLanguageIdentificationWhisperConfig
type VadModelConfig
type VoiceActivityDetector
- func NewVoiceActivityDetector(config *VadModelConfig, bufferSizeInSeconds float32) *VoiceActivityDetector
- func (vad *VoiceActivityDetector) AcceptWaveform(samples []float32)
- func (vad *VoiceActivityDetector) Clear()
- func (vad *VoiceActivityDetector) Front() *SpeechSegment
- func (vad *VoiceActivityDetector) IsEmpty() bool
- func (vad *VoiceActivityDetector) IsSpeech() bool
- func (vad *VoiceActivityDetector) Pop()
- func (vad *VoiceActivityDetector) Reset()
type Wave
- func ReadWave(filename string) *Wave

Constants ¶

This section is empty.

Variables ¶

This section is empty.

Functions ¶

func DeleteCircularBuffer ¶ added in v1.9.15

func DeleteCircularBuffer(buffer *CircularBuffer)

func DeleteOfflineRecognizer ¶

func DeleteOfflineRecognizer(recognizer *OfflineRecognizer)

Frees the internal pointer of the recognition to avoid memory leak.

func DeleteOfflineStream ¶

func DeleteOfflineStream(stream *OfflineStream)

Frees the internal pointer of the stream to avoid memory leak.

func DeleteOfflineTts ¶ added in v1.9.1

func DeleteOfflineTts(tts *OfflineTts)

Free the internal pointer inside the tts to avoid memory leak.

func DeleteOnlineRecognizer ¶

func DeleteOnlineRecognizer(recognizer *OnlineRecognizer)

Free the internal pointer inside the recognizer to avoid memory leak.

func DeleteOnlineStream ¶

func DeleteOnlineStream(stream *OnlineStream)

Delete the internal pointer inside the stream to avoid memory leak.

func DeleteSpeakerEmbeddingExtractor ¶ added in v1.9.15

func DeleteSpeakerEmbeddingExtractor(ex *SpeakerEmbeddingExtractor)

func DeleteSpeakerEmbeddingManager ¶ added in v1.9.15

func DeleteSpeakerEmbeddingManager(m *SpeakerEmbeddingManager)

func DeleteSpokenLanguageIdentification ¶ added in v1.9.15

func DeleteSpokenLanguageIdentification(slid *SpokenLanguageIdentification)

func DeleteVoiceActivityDetector ¶ added in v1.9.15

func DeleteVoiceActivityDetector(vad *VoiceActivityDetector)

Types ¶

type CircularBuffer ¶ added in v1.9.15

type CircularBuffer struct {
	// contains filtered or unexported fields
}

func NewCircularBuffer ¶ added in v1.9.15

func NewCircularBuffer(capacity int) *CircularBuffer

func (*CircularBuffer) Get ¶ added in v1.9.15

func (buffer *CircularBuffer) Get(start int, n int) []float32

func (*CircularBuffer) Head ¶ added in v1.9.15

func (buffer *CircularBuffer) Head() int

func (*CircularBuffer) Pop ¶ added in v1.9.15

func (buffer *CircularBuffer) Pop(n int)

func (*CircularBuffer) Push ¶ added in v1.9.15

func (buffer *CircularBuffer) Push(samples []float32)

func (*CircularBuffer) Reset ¶ added in v1.9.15

func (buffer *CircularBuffer) Reset()

func (*CircularBuffer) Size ¶ added in v1.9.15

func (buffer *CircularBuffer) Size() int

type FeatureConfig ¶

type FeatureConfig struct {
	// Sample rate expected by the model. It is 16000 for all
	// pre-trained models provided by us
	SampleRate int
	// Feature dimension expected by the model. It is 80 for all
	// pre-trained models provided by us
	FeatureDim int
}

Configuration for the feature extractor

type GeneratedAudio ¶ added in v1.9.1

type GeneratedAudio struct {
	// Normalized samples in the range [-1, 1]
	Samples []float32

	SampleRate int
}

func (*GeneratedAudio) Save ¶ added in v1.9.1

func (audio *GeneratedAudio) Save(filename string) bool

type OfflineLMConfig ¶

type OfflineLMConfig struct {
	Model string  // Path to the model
	Scale float32 // scale for LM score
}

Configuration for offline LM.

type OfflineModelConfig ¶

type OfflineModelConfig struct {
	Transducer OfflineTransducerModelConfig
	Paraformer OfflineParaformerModelConfig
	NemoCTC    OfflineNemoEncDecCtcModelConfig
	Whisper    OfflineWhisperModelConfig
	Tdnn       OfflineTdnnModelConfig
	Tokens     string // Path to tokens.txt

	// Number of threads to use for neural network computation
	NumThreads int

	// 1 to print model meta information while loading
	Debug int

	// Optional. Valid values: cpu, cuda, coreml
	Provider string

	// Optional. Specify it for faster model initialization.
	ModelType string
}

type OfflineNemoEncDecCtcModelConfig ¶

type OfflineNemoEncDecCtcModelConfig struct {
	Model string // Path to the model, e.g., model.onnx or model.int8.onnx
}

Configuration for offline/non-streaming NeMo CTC models.

Please refer to https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.html to download pre-trained models

type OfflineParaformerModelConfig ¶

type OfflineParaformerModelConfig struct {
	Model string // Path to the model, e.g., model.onnx or model.int8.onnx
}

Configuration for offline/non-streaming paraformer.

please refer to https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html to download pre-trained models

type OfflineRecognizer ¶

type OfflineRecognizer struct {
	// contains filtered or unexported fields
}

It wraps a pointer from C

func NewOfflineRecognizer ¶

func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer

The user is responsible to invoke DeleteOfflineRecognizer() to free the returned recognizer to avoid memory leak

func (*OfflineRecognizer) Decode ¶

func (recognizer *OfflineRecognizer) Decode(s *OfflineStream)

Decode the offline stream.

func (*OfflineRecognizer) DecodeStreams ¶

func (recognizer *OfflineRecognizer) DecodeStreams(s []*OfflineStream)

Decode multiple streams in parallel, i.e., in batch.

type OfflineRecognizerConfig ¶

type OfflineRecognizerConfig struct {
	FeatConfig  FeatureConfig
	ModelConfig OfflineModelConfig
	LmConfig    OfflineLMConfig

	// Valid decoding method: greedy_search, modified_beam_search
	DecodingMethod string

	// Used only when DecodingMethod is modified_beam_search.
	MaxActivePaths int
}

Configuration for the offline/non-streaming recognizer.

type OfflineRecognizerResult ¶

type OfflineRecognizerResult struct {
	Text string
}

It contains recognition result of an offline stream.

type OfflineStream ¶

type OfflineStream struct {
	// contains filtered or unexported fields
}

It wraps a pointer from C

func NewOfflineStream ¶

func NewOfflineStream(recognizer *OfflineRecognizer) *OfflineStream

The user is responsible to invoke DeleteOfflineStream() to free the returned stream to avoid memory leak

func (*OfflineStream) AcceptWaveform ¶

func (s *OfflineStream) AcceptWaveform(sampleRate int, samples []float32)

Input audio samples for the offline stream. Please only call it once. That is, input all samples at once.

sampleRate is the sample rate of the input audio samples. If it is different from the value expected by the feature extractor, we will do resampling inside.

samples contains the actual audio samples. Each sample is in the range [-1, 1].

func (*OfflineStream) GetResult ¶

func (s *OfflineStream) GetResult() *OfflineRecognizerResult

Get the recognition result of the offline stream.

type OfflineTdnnModelConfig ¶ added in v1.7.8

type OfflineTdnnModelConfig struct {
	Model string
}

type OfflineTransducerModelConfig ¶

type OfflineTransducerModelConfig struct {
	Encoder string // Path to the encoder model, i.e., encoder.onnx or encoder.int8.onnx
	Decoder string // Path to the decoder model
	Joiner  string // Path to the joiner model
}

Configuration for offline/non-streaming transducer.

Please refer to https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/index.html to download pre-trained models

type OfflineTts ¶ added in v1.9.1

type OfflineTts struct {
	// contains filtered or unexported fields
}

The offline tts class. It wraps a pointer from C.

func NewOfflineTts ¶ added in v1.9.1

func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts

The user is responsible to invoke DeleteOfflineTts() to free the returned tts to avoid memory leak

func (*OfflineTts) Generate ¶ added in v1.9.1

func (tts *OfflineTts) Generate(text string, sid int, speed float32) *GeneratedAudio

type OfflineTtsConfig ¶ added in v1.9.1

type OfflineTtsConfig struct {
	Model           OfflineTtsModelConfig
	RuleFsts        string
	RuleFars        string
	MaxNumSentences int
}

type OfflineTtsModelConfig ¶ added in v1.9.1

type OfflineTtsModelConfig struct {
	Vits OfflineTtsVitsModelConfig

	// Number of threads to use for neural network computation
	NumThreads int

	// 1 to print model meta information while loading
	Debug int

	// Optional. Valid values: cpu, cuda, coreml
	Provider string
}

type OfflineTtsVitsModelConfig ¶ added in v1.9.1

type OfflineTtsVitsModelConfig struct {
	Model       string  // Path to the VITS onnx model
	Lexicon     string  // Path to lexicon.txt
	Tokens      string  // Path to tokens.txt
	DataDir     string  // Path to tokens.txt
	NoiseScale  float32 // noise scale for vits models. Please use 0.667 in general
	NoiseScaleW float32 // noise scale for vits models. Please use 0.8 in general
	LengthScale float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed
}

Configuration for offline/non-streaming text-to-speech (TTS).

Please refer to https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html to download pre-trained models

type OfflineWhisperModelConfig ¶ added in v1.7.8

type OfflineWhisperModelConfig struct {
	Encoder  string
	Decoder  string
	Language string
	Task     string
}

type OnlineCtcFstDecoderConfig ¶ added in v1.9.16

type OnlineCtcFstDecoderConfig struct {
	Graph     string
	MaxActive int
}

type OnlineModelConfig ¶ added in v1.7.6

type OnlineModelConfig struct {
	Transducer    OnlineTransducerModelConfig
	Paraformer    OnlineParaformerModelConfig
	Zipformer2Ctc OnlineZipformer2CtcModelConfig
	Tokens        string // Path to tokens.txt
	NumThreads    int    // Number of threads to use for neural network computation
	Provider      string // Optional. Valid values are: cpu, cuda, coreml
	Debug         int    // 1 to show model meta information while loading it.
	ModelType     string // Optional. You can specify it for faster model initialization
}

Configuration for online/streaming models

Please refer to https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html to download pre-trained models

type OnlineParaformerModelConfig ¶ added in v1.7.6

type OnlineParaformerModelConfig struct {
	Encoder string // Path to the encoder model, e.g., encoder.onnx or encoder.int8.onnx
	Decoder string // Path to the decoder model.
}

Configuration for online/streaming paraformer models

Please refer to https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html to download pre-trained models

type OnlineRecognizer ¶

type OnlineRecognizer struct {
	// contains filtered or unexported fields
}

The online recognizer class. It wraps a pointer from C.

func NewOnlineRecognizer ¶

func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer

The user is responsible to invoke DeleteOnlineRecognizer() to free the returned recognizer to avoid memory leak

func (*OnlineRecognizer) Decode ¶

func (recognizer *OnlineRecognizer) Decode(s *OnlineStream)

Decode the stream. Before calling this function, you have to ensure that recognizer.IsReady(s) returns true. Otherwise, you will be SAD.

You usually use it like below:

for recognizer.IsReady(s) {
  recognizer.Decode(s)
}

func (*OnlineRecognizer) DecodeStreams ¶

func (recognizer *OnlineRecognizer) DecodeStreams(s []*OnlineStream)

Decode multiple streams in parallel, i.e., in batch. You have to ensure that each stream is ready for decoding. Otherwise, you will be SAD.

func (*OnlineRecognizer) GetResult ¶

func (recognizer *OnlineRecognizer) GetResult(s *OnlineStream) *OnlineRecognizerResult

Get the current result of stream since the last invoke of Reset()

func (*OnlineRecognizer) IsEndpoint ¶

func (recognizer *OnlineRecognizer) IsEndpoint(s *OnlineStream) bool

Return true if an endpoint is detected.

You usually use it like below:

if recognizer.IsEndpoint(s) {
   // do your own stuff after detecting an endpoint

   recognizer.Reset(s)
}

func (*OnlineRecognizer) IsReady ¶

func (recognizer *OnlineRecognizer) IsReady(s *OnlineStream) bool

Check whether the stream has enough feature frames for decoding. Return true if this stream is ready for decoding. Return false otherwise.

You will usually use it like below:

for recognizer.IsReady(s) {
   recognizer.Decode(s)
}

func (*OnlineRecognizer) Reset ¶

func (recognizer *OnlineRecognizer) Reset(s *OnlineStream)

After calling this function, the internal neural network model states are reset and IsEndpoint(s) would return false. GetResult(s) would also return an empty string.

type OnlineRecognizerConfig ¶

type OnlineRecognizerConfig struct {
	FeatConfig  FeatureConfig
	ModelConfig OnlineModelConfig

	// Valid decoding methods: greedy_search, modified_beam_search
	DecodingMethod string

	// Used only when DecodingMethod is modified_beam_search. It specifies
	// the maximum number of paths to keep during the search
	MaxActivePaths int

	EnableEndpoint int // 1 to enable endpoint detection.

	// Please see
	// https://k2-fsa.github.io/sherpa/ncnn/endpoint.html
	// for the meaning of Rule1MinTrailingSilence, Rule2MinTrailingSilence
	// and Rule3MinUtteranceLength.
	Rule1MinTrailingSilence float32
	Rule2MinTrailingSilence float32
	Rule3MinUtteranceLength float32
	CtcFstDecoderConfig     OnlineCtcFstDecoderConfig
}

Configuration for the online/streaming recognizer.

type OnlineRecognizerResult ¶

type OnlineRecognizerResult struct {
	Text string
}

It contains the recognition result for a online stream.

type OnlineStream ¶

type OnlineStream struct {
	// contains filtered or unexported fields
}

The online stream class. It wraps a pointer from C.

func NewOnlineStream ¶

func NewOnlineStream(recognizer *OnlineRecognizer) *OnlineStream

The user is responsible to invoke DeleteOnlineStream() to free the returned stream to avoid memory leak

func (*OnlineStream) AcceptWaveform ¶

func (s *OnlineStream) AcceptWaveform(sampleRate int, samples []float32)

Input audio samples for the stream.

sampleRate is the actual sample rate of the input audio samples. If it is different from the sample rate expected by the feature extractor, we will do resampling inside.

samples contains audio samples. Each sample is in the range [-1, 1]

func (*OnlineStream) InputFinished ¶

func (s *OnlineStream) InputFinished()

Signal that there will be no incoming audio samples. After calling this function, you cannot call OnlineStream.AcceptWaveform any longer.

The main purpose of this function is to flush the remaining audio samples buffered inside for feature extraction.

type OnlineTransducerModelConfig ¶

type OnlineTransducerModelConfig struct {
	Encoder string // Path to the encoder model, e.g., encoder.onnx or encoder.int8.onnx
	Decoder string // Path to the decoder model.
	Joiner  string // Path to the joiner model.
}

Configuration for online/streaming transducer models

Please refer to https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html to download pre-trained models

type OnlineZipformer2CtcModelConfig ¶ added in v1.9.7

type OnlineZipformer2CtcModelConfig struct {
	Model string // Path to the onnx model
}

Please refer to https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-ctc/index.html to download pre-trained models

type SileroVadModelConfig ¶ added in v1.9.15

type SileroVadModelConfig struct {
	Model              string
	Threshold          float32
	MinSilenceDuration float32
	MinSpeechDuration  float32
	WindowSize         int
}

============================================================ For VAD ============================================================

type SpeakerEmbeddingExtractor ¶ added in v1.9.15

type SpeakerEmbeddingExtractor struct {
	// contains filtered or unexported fields
}

func NewSpeakerEmbeddingExtractor ¶ added in v1.9.15

func NewSpeakerEmbeddingExtractor(config *SpeakerEmbeddingExtractorConfig) *SpeakerEmbeddingExtractor

The user has to invoke DeleteSpeakerEmbeddingExtractor() to free the returned value to avoid memory leak

func (*SpeakerEmbeddingExtractor) Compute ¶ added in v1.9.15

func (ex *SpeakerEmbeddingExtractor) Compute(stream *OnlineStream) []float32

func (*SpeakerEmbeddingExtractor) CreateStream ¶ added in v1.9.15

func (ex *SpeakerEmbeddingExtractor) CreateStream() *OnlineStream

The user is responsible to invoke DeleteOnlineStream() to free the returned stream to avoid memory leak

func (*SpeakerEmbeddingExtractor) Dim ¶ added in v1.9.15

func (ex *SpeakerEmbeddingExtractor) Dim() int

func (*SpeakerEmbeddingExtractor) IsReady ¶ added in v1.9.15

func (ex *SpeakerEmbeddingExtractor) IsReady(stream *OnlineStream) bool

type SpeakerEmbeddingExtractorConfig ¶ added in v1.9.15

type SpeakerEmbeddingExtractorConfig struct {
	Model      string
	NumThreads int
	Debug      int
	Provider   string
}

type SpeakerEmbeddingManager ¶ added in v1.9.15

type SpeakerEmbeddingManager struct {
	// contains filtered or unexported fields
}

func NewSpeakerEmbeddingManager ¶ added in v1.9.15

func NewSpeakerEmbeddingManager(dim int) *SpeakerEmbeddingManager

The user has to invoke DeleteSpeakerEmbeddingManager() to free the returned value to avoid memory leak

func (*SpeakerEmbeddingManager) AllSpeakers ¶ added in v1.9.15

func (m *SpeakerEmbeddingManager) AllSpeakers() []string

func (*SpeakerEmbeddingManager) Contains ¶ added in v1.9.15

func (m *SpeakerEmbeddingManager) Contains(name string) bool

func (*SpeakerEmbeddingManager) NumSpeakers ¶ added in v1.9.15

func (m *SpeakerEmbeddingManager) NumSpeakers() int

func (*SpeakerEmbeddingManager) Register ¶ added in v1.9.15

func (m *SpeakerEmbeddingManager) Register(name string, embedding []float32) bool

func (*SpeakerEmbeddingManager) RegisterV ¶ added in v1.9.15

func (m *SpeakerEmbeddingManager) RegisterV(name string, embeddings [][]float32) bool

func (*SpeakerEmbeddingManager) Remove ¶ added in v1.9.15

func (m *SpeakerEmbeddingManager) Remove(name string) bool

func (*SpeakerEmbeddingManager) Search ¶ added in v1.9.15

func (m *SpeakerEmbeddingManager) Search(embedding []float32, threshold float32) string

func (*SpeakerEmbeddingManager) Verify ¶ added in v1.9.15

func (m *SpeakerEmbeddingManager) Verify(name string, embedding []float32, threshold float32) bool

type SpeechSegment ¶ added in v1.9.15

type SpeechSegment struct {
	Start   int
	Samples []float32
}

type SpokenLanguageIdentification ¶ added in v1.9.15

type SpokenLanguageIdentification struct {
	// contains filtered or unexported fields
}

func NewSpokenLanguageIdentification ¶ added in v1.9.15

func NewSpokenLanguageIdentification(config *SpokenLanguageIdentificationConfig) *SpokenLanguageIdentification

func (*SpokenLanguageIdentification) Compute ¶ added in v1.9.15

func (slid *SpokenLanguageIdentification) Compute(stream *OfflineStream) *SpokenLanguageIdentificationResult

func (*SpokenLanguageIdentification) CreateStream ¶ added in v1.9.15

func (slid *SpokenLanguageIdentification) CreateStream() *OfflineStream

The user has to invoke DeleteOfflineStream() to free the returned value to avoid memory leak

type SpokenLanguageIdentificationConfig ¶ added in v1.9.15

type SpokenLanguageIdentificationConfig struct {
	Whisper    SpokenLanguageIdentificationWhisperConfig
	NumThreads int
	Debug      int
	Provider   string
}

type SpokenLanguageIdentificationResult ¶ added in v1.9.15

type SpokenLanguageIdentificationResult struct {
	Lang string
}

type SpokenLanguageIdentificationWhisperConfig ¶ added in v1.9.15

type SpokenLanguageIdentificationWhisperConfig struct {
	Encoder      string
	Decoder      string
	TailPaddings int
}

type VadModelConfig ¶ added in v1.9.15

type VadModelConfig struct {
	SileroVad  SileroVadModelConfig
	SampleRate int
	NumThreads int
	Provider   string
	Debug      int
}

type VoiceActivityDetector ¶ added in v1.9.15

type VoiceActivityDetector struct {
	// contains filtered or unexported fields
}

func NewVoiceActivityDetector ¶ added in v1.9.15

func NewVoiceActivityDetector(config *VadModelConfig, bufferSizeInSeconds float32) *VoiceActivityDetector

func (*VoiceActivityDetector) AcceptWaveform ¶ added in v1.9.15

func (vad *VoiceActivityDetector) AcceptWaveform(samples []float32)

func (*VoiceActivityDetector) Clear ¶ added in v1.9.15

func (vad *VoiceActivityDetector) Clear()

func (*VoiceActivityDetector) Front ¶ added in v1.9.15

func (vad *VoiceActivityDetector) Front() *SpeechSegment

func (*VoiceActivityDetector) IsEmpty ¶ added in v1.9.15

func (vad *VoiceActivityDetector) IsEmpty() bool

func (*VoiceActivityDetector) IsSpeech ¶ added in v1.9.15

func (vad *VoiceActivityDetector) IsSpeech() bool

func (*VoiceActivityDetector) Pop ¶ added in v1.9.15

func (vad *VoiceActivityDetector) Pop()

func (*VoiceActivityDetector) Reset ¶ added in v1.9.15

func (vad *VoiceActivityDetector) Reset()

type Wave ¶ added in v1.9.15

type Wave = GeneratedAudio

single channel wave

func ReadWave ¶ added in v1.9.15

func ReadWave(filename string) *Wave

Source Files ¶

View all Source files

sherpa_onnx.go

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL