goEagi

package module
v0.2.1-mind Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 28, 2022 License: MIT Imports: 17 Imported by: 0

README

GoEagi

Package GoEagi provides some fundamental functionalities that work with Asterisk's EAGI. It has the following features:

  • Audio Streaming
  • Google's Speech to Text
  • Vosk server Speech to Text
  • Voice Activity Detection
  • Speech File Generation
  • Commands to Asterisk

Example Usage

  • Asterisk audio streaming + Google's speech to text
package main

import (
	"fmt"
	"github.com/andrewyang17/goEagi"
	"golang.org/x/net/context"
)

func main() {
	eagi, err := goEagi.New()
	if err != nil {
		panic(err)
	}

	googleService, err := goEagi.NewGoogleService("<GoogleSpeechToTextPrivateKey>", "en-GB")
	if err != nil {
		panic(err)
	}

	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()

	bridgeStream := make(chan []byte)
	defer close(bridgeStream)

	audioStream := goEagi.AudioStreaming(ctx)
	errStream := googleService.StartStreaming(ctx, bridgeStream)
	googleStream := googleService.SpeechToTextResponse(ctx)

loop:
	for {
		select {
		case <-errStream:
			cancel()
			break loop

		case a := <-audioStream:
			if a.Error != nil {
				cancel()
				break loop
			}

			if len(a.Stream) != 0 {
				bridgeStream <- a.Stream
			}

		case g := <-googleStream:
			if g.Error != nil {
				cancel()
				break loop
			}
            
			// Do whatever you want with the returning transcription,
			// in this case we stdout
			if err := eagi.Verbose(fmt.Sprintf("Transcription: %v\n", g.Transcription)); err != nil {
				panic(err)
            }
		}
	}
}

Vosk Example Usage

prerequisite - run the vosk server

docker run -d -p 2700:2700 alphacep/kaldi-en:latest
package main

import (
	"fmt"
	"github.com/andrewyang17/goEagi"
	"golang.org/x/net/context"
)

func main() {
	eagi, err := goEagi.New()
	if err != nil {
		panic(err)
	}


	//use phraseList to list the valid phrases/words. 
	//notes
	//	* if you use a phrase list, vosk will only detect these words, ignoring any other word
	//	* some vosk models doesn't support phrase list (i tested width spanish)
	//  * to disable phrase list, leave phraseList empty
	phraseList := []string{"hello world"}
	voskService, err := goEagi.NewVoskService(voskHost, voskPort, phraseList)
	if err != nil {
		panic(err)
		return
	}
	defer voskService.Close()


	ctx, cancel := context.WithCancel(context.Background())
	defer cancel()

	bridgeStream := make(chan []byte)
	defer close(bridgeStream)

	audioStream := goEagi.AudioStreaming(ctx)
	errStream := voskService.StartStreaming(ctx, bridgeStream)
	voskStream := voskService.SpeechToTextResponse(ctx)

	//detect SIGHUP from asterisk
	sigs := make(chan os.Signal, 1)
	signal.Notify(sigs, syscall.SIGHUP)

loop:
	for {
		select {
		case <-errStream:
			cancel()
			break loop

		case a := <-audioStream:
			if a.Error != nil {
				cancel()
				break loop
			}

			if len(a.Stream) != 0 {
				bridgeStream <- a.Stream
			}

		case v := <-voskStream:
			// Do whatever you want with the returning transcription,
			// in this case we stdout
			// you will receive partial data in v.Partial and, if the full text was recognized, you will receive v.Text
			if err := eagi.Verbose(fmt.Sprintf("Transcription: %v\n", v.Text)); err != nil {
				panic(err)
			}
		}
	}
}

Commands to Asterisk

Documentation

Overview

Package goEagi of vosk.go provides a simplified interface for calling Vosk Server's speech to text service. It provides flexibility to the callers and allow them to set their desired configuration.

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func AudioStreaming

func AudioStreaming(ctx context.Context) <-chan AudioResult

AudioStreaming launches a new goroutine for audio streaming via file descriptor 3.

func ComputeAmplitude

func ComputeAmplitude(sample []byte) (float64, error)

ComputeAmplitude analyzes the amplitude of a sample slice of bytes.

func GenerateAudio

func GenerateAudio(sample []byte, audioDirectory string, audioName string) (string, error)

GenerateAudio writes a sample slice of bytes into an audio file. It returns a location path of an audio which passed in the function parameters. Please note that only wav extension is supported.

Types

type AudioResult

type AudioResult struct {
	Error  error
	Stream []byte
}

type Eagi

type Eagi struct {
	*agi.Session
}

func New

func New() (*Eagi, error)

type GoogleResult

type GoogleResult struct {
	Error         error
	Transcription string
}

type GoogleService

type GoogleService struct {
	// contains filtered or unexported fields
}

GoogleService provides information to Google Speech Recognizer and speech to text methods.

func NewGoogleService

func NewGoogleService(privateKeyPath string, languageCode string, speechContext []string) (*GoogleService, error)

NewGoogleService is a constructor of GoogleService, it takes a privateKeyPath to set it in environment with key GOOGLE_APPLICATION_CREDENTIALS, a languageCode, example ["en-GB", "en-US", "ch", ...], see (https://cloud.google.com/speech-to-text/docs/languages), and a speech context, see (https://cloud.google.com/speech-to-text/docs/speech-adaptation).

func (*GoogleService) SpeechToTextResponse

func (g *GoogleService) SpeechToTextResponse(ctx context.Context) <-chan GoogleResult

SpeechToTextResponse sends the transcription response from Google's SpeechToText.

func (*GoogleService) StartStreaming

func (g *GoogleService) StartStreaming(ctx context.Context, stream <-chan []byte) <-chan error

StartStreaming takes a reading channel of audio stream and sends it as a gRPC request to Google service through the initialized client. Caller should run it in a goroutine.

type Vad

type Vad struct {
	AmplitudeDetectionThreshold float64
}

func NewVad

func NewVad() *Vad

NewVad is a constructor of Vad. The initialization will use the defaultAmplitudeDetectionThreshold.

func (*Vad) Detect

func (v *Vad) Detect(done <-chan interface{}, stream <-chan []byte) <-chan VadResult

Detect analyzes voice activity for a given slice of bytes.

type VadResult

type VadResult struct {
	Error     error
	Detected  bool
	Amplitude float64
	Frame     []byte
}

type VoskResult

type VoskResult struct {
	Result []struct {
		Conf  float64
		End   float64
		Start float64
		Word  string
	}
	Text    string
	Partial string
}

VoskResult represents a partial o complete response from vosk server

type VoskService

type VoskService struct {
	PhraseList []string        `json:"phrase_list"`
	Words      bool            `json:"words"`
	Client     *websocket.Conn `json:"-"`
	// contains filtered or unexported fields
}

VoskService provides information to Vosk Speech Recognizer as well as methods on calling speech to text.

func NewVoskService

func NewVoskService(host string, port string, phraseList []string) (*VoskService, error)

NewVoskService is a constructor of VoskService, @param

func (*VoskService) Close

func (v *VoskService) Close() error

Close closses vosk service connection

func (*VoskService) SpeechToTextResponse

func (v *VoskService) SpeechToTextResponse(ctx context.Context) <-chan VoskResult

SpeechToTextResponse sends the transcription response from Vosk's SpeechToText.

func (*VoskService) StartStreaming

func (v *VoskService) StartStreaming(ctx context.Context, stream <-chan []byte) <-chan error

StartStreaming takes a reading channel of audio stream and sends it as a gRPC request to Vosk service through the initialized client. Caller should run it in a goroutine.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL