govpr

package module
Version: v0.0.0-...-6d3268d Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 31, 2020 License: Apache-2.0 Imports: 8 Imported by: 2

README

声纹识别

来自于阿里聚安全对声纹识别的介绍:探秘身份认证利器——声纹识别

简介

govpr是golang 实现的基于 GMM-UBM 说话人识别引擎(声纹识别),可用于语音验证,身份识别的场景. 目前暂时仅支持汉语数字的语音,语音格式为wav格式(比特率16000,16bits,单声道)

安装

go get -v -u github.com/liuxp0827/govpr

cd $GOPATH/src/github.com/liuxp0827/govpr/example

go run main.go

示例

如下是一个简单的示例. 可跳转至 example 查看详细的例子,示例中的语音为纯数字8位数字.语音验证后得到一个得分,可设置阈值来判断验证语音是否为注册训练者本人. 示例中,预设阈值1.0,语音验证得分>=1.0,可认定为是本人语音,语音验证得分<1.0则非本人语音.

得分

(注:阈值设为1.0并非最优值,仅是给出一个示例.另女性声纹得分相对较低,理论上应对不同性别给出不同阈值等级,govpr暂未实现通过声音分辨性别,后续会开发该功能)

注意

示例中,使用了五组完全不同的语音内容进行训练和验证,但实际上 govpr 更适合于文本相关的说话人识别,采用五组训练语音和验证语音内容相同的语音数据,可得到更好的识别效果.

package main

import (
	"github.com/liuxp0827/govpr"
	"github.com/liuxp0827/govpr/log"
	"github.com/liuxp0827/govpr/waveIO"
	"io/ioutil"
)

type engine struct {
	vprEngine *govpr.VPREngine
}

func NewEngine(sampleRate, delSilRange int, ubmFile, userModelFile string) (*engine, error) {
	vprEngine, err := govpr.NewVPREngine(sampleRate, delSilRange, false, ubmFile, userModelFile)
	if err != nil {
		return nil, err
	}
	return &engine{vprEngine: vprEngine}, nil
}

func (this *engine) DestroyEngine() {
	this.vprEngine = nil
}

func (this *engine) TrainSpeech(buffers [][]byte) error {

	var err error
	count := len(buffers)
	for i := 0; i < count; i++ {
		err = this.vprEngine.AddTrainBuffer(buffers[i])
		if err != nil {
			log.Error(err)
			return err
		}
	}

	defer this.vprEngine.ClearTrainBuffer()
	defer this.vprEngine.ClearAllBuffer()

	err = this.vprEngine.TrainModel()
	if err != nil {
		log.Error(err)
		return err
	}

	return nil
}

func (this *engine) RecSpeech(buffer []byte) (float64, error) {

	err := this.vprEngine.AddVerifyBuffer(buffer)
	defer this.vprEngine.ClearVerifyBuffer()
	if err != nil {
		log.Error(err)
		return -1.0, err
	}

	err = this.vprEngine.VerifyModel()
	if err != nil {
		log.Error(err)
		return -1.0, err
	}

	return this.vprEngine.GetScore(), nil
}

func main() {
	log.SetLevel(log.LevelDebug)

	vprEngine, err := NewEngine(16000, 50, "../ubm/ubm", "model/test.dat")
	if err != nil {
		log.Fatal(err)
	}

	trainlist := []string{
		"wav/train/01_32468975.wav",
		"wav/train/02_58769423.wav",
		"wav/train/03_59682734.wav",
		"wav/train/04_64958273.wav",
		"wav/train/05_65432978.wav",
	}

	trainBuffer := make([][]byte, 0)

	for _, file := range trainlist {
		buf, err := loadWaveData(file)
		if err != nil {
			log.Error(err)
			return
		}
		trainBuffer = append(trainBuffer, buf)
	}

	err = vprEngine.TrainSpeech(trainBuffer)
	if err != nil {
		log.Fatal(err)
	}

	var threshold float64 = 1.0

	selfverifyBuffer, err := waveIO.WaveLoad("wav/verify/self_34986527.wav")
	if err != nil {
		log.Fatal(err)
	}

	self_score, err := vprEngine.RecSpeech(selfverifyBuffer)
	if err != nil {
		log.Fatal(err)
	}

	log.Infof("self score %f, pass? %v", self_score, self_score >= threshold)

	otherverifyBuffer, err := waveIO.WaveLoad("wav/verify/other_38974652.wav")
	if err != nil {
		log.Fatal(err)
	}

	other_score, err := vprEngine.RecSpeech(otherverifyBuffer)
	if err != nil {
		log.Fatal(err)
	}

	log.Infof("other score %f, pass? %v", other_score, other_score >= threshold)
}

func loadWaveData(file string) ([]byte, error) {
	data, err := ioutil.ReadFile(file)
	if err != nil {
		return nil, err
	}
	// remove .wav header info 44 bits
	data = data[44:]
	return data, nil
}

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	LSV_ERR_ENGINE_NOT_INIT   error = fmt.Errorf("engine not init")
	LSV_ERR_TIMEOUT           error = fmt.Errorf("timeout")
	LSV_ERR_NEED_MORE_SAMPLE  error = fmt.Errorf("need more sample ")
	LSV_ERR_ILLEGAL_HANDLE    error = fmt.Errorf("illegal handle")
	LSV_ERR_FILE_ERROR        error = fmt.Errorf("file error")
	LSV_ERR_NO_AVAILABLE_DATA error = fmt.Errorf("no available data")
	LSV_ERR_VOICE_TOO_SHORT   error = fmt.Errorf("voice too short")
	LSV_ERR_TRAINING_FAILED   error = fmt.Errorf("train failed")
	LSV_ERR_VERIFY_FAILED     error = fmt.Errorf("verify failed")
	LSV_ERR_MODEL_NOT_FOUND   error = fmt.Errorf("model not found")
	LSV_ERR_MODEL_LOAD_FAILED error = fmt.Errorf("model load failed")
	LSV_ERR_MEM_INSUFFICIENT  error = fmt.Errorf("memory insufficient")
	LSV_ERR_CONF_PARAM        error = fmt.Errorf("conf param error")
	LSV_ERR_NO_ACTIVE_SPEECH  error = fmt.Errorf("no active speech")
	LSV_ERR_INVALID_PARAM     error = fmt.Errorf("invalid param")
)

Functions

func NewError

func NewError(err error, e string) error

Types

type VPREngine

type VPREngine struct {
	// contains filtered or unexported fields
}

func NewVPREngine

func NewVPREngine(sampleRate, delSilRange int, deleteSil bool, ubmFile, userModelFile string) (*VPREngine, error)

func (*VPREngine) AddTrainBuffer

func (this *VPREngine) AddTrainBuffer(buf []byte) error

func (*VPREngine) AddVerifyBuffer

func (this *VPREngine) AddVerifyBuffer(buf []byte) error

func (*VPREngine) ClearAllBuffer

func (this *VPREngine) ClearAllBuffer()

func (*VPREngine) ClearTrainBuffer

func (this *VPREngine) ClearTrainBuffer()

func (*VPREngine) ClearVerifyBuffer

func (this *VPREngine) ClearVerifyBuffer()

func (*VPREngine) GetScore

func (this *VPREngine) GetScore() float64

func (*VPREngine) TrainModel

func (this *VPREngine) TrainModel() error

func (*VPREngine) VerifyModel

func (this *VPREngine) VerifyModel() error

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
t or T : Toggle theme light dark auto