speech

package
v0.2.4 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 15, 2023 License: MIT Imports: 4 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func SetLogLevel

func SetLogLevel(level LogLevel)

func SetLogrusLogLevel added in v0.2.3

func SetLogrusLogLevel(level LogLevel)

Types

type Detector

type Detector struct {
	// contains filtered or unexported fields
}

func NewDetector

func NewDetector(cfg DetectorConfig) (*Detector, error)

func (*Detector) ChangeLogLevel

func (sd *Detector) ChangeLogLevel(logLevel LogLevel)

func (*Detector) Destroy

func (sd *Detector) Destroy() error

func (*Detector) Detect

func (sd *Detector) Detect(pcm []float32) ([]Segment, error)

func (*Detector) Reset

func (sd *Detector) Reset() error

type DetectorConfig

type DetectorConfig struct {
	// The path to the ONNX Silero VAD model file to load.
	ModelPath string
	// The sampling rate of the input audio samples. Supported values are 8000 and 16000.
	SampleRate int
	// The number of samples to process at each infer.
	WindowSize int
	// The probability threshold above which we detect speech. A good default is 0.5.
	Threshold float32
	// The duration of silence to wait for each speech segment before separating it.
	MinSilenceDurationMs int
	// The padding to add to speech segments to avoid aggressive cutting.
	SpeechPadMs int
}

func (DetectorConfig) IsValid

func (c DetectorConfig) IsValid() error

type LogLevel

type LogLevel int
const (
	LogLevelFatal LogLevel = iota
	LogLevelError
	LogLevelWarning
	LogLevelInfo
	LogLevelVerbose
)

type Segment

type Segment struct {
	// The relative timestamp in seconds of when a speech segment begins.
	SpeechStartAt float64
	// The relative timestamp in seconds of when a speech segment ends.
	SpeechEndAt float64
}

Segment contains timing information of a speech segment.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL