diff --git a/go/example/test_simple.go b/go/example/test_simple.go index afab8d33..361de08c 100644 --- a/go/example/test_simple.go +++ b/go/example/test_simple.go @@ -1,36 +1,67 @@ -package test_simple +package main import ( - "flag" - "os" - "github.com/alphacep/vosk-api/go" + "bufio" + "flag" + "fmt" + "io" + "log" + "os" + + vosk "github.com/alphacep/vosk-api/go" ) func main() { - var filename string - flag.StringVar(&filename, "f", "", "file to transcribe") - flag.Parse() - model, err := vosk.NewModel("model") - rec, err := vosk.NewRecognizer(model) - - file, err := os.Open(filename) - if err != nil { - panic(err) - } - defer file.Close() - - fileinfo, err := file.Stat() - if err != nil { - panic(err) - } - - filesize := fileinfo.Size() - buffer := make([]byte, filesize) - - _, err = file.Read(buffer) - if err != nil { - panic(err) - } - - println(vosk.VoskFinalResult(rec, buffer)) + var filename string + flag.StringVar(&filename, "f", "", "file to transcribe") + flag.Parse() + + model, err := vosk.NewModel("model") + if err != nil { + log.Fatal(err) + } + + spkModel, err := vosk.NewSpkModel("model-spk") + if err != nil { + log.Fatal(err) + } + + sampleRate := 16000.0 + rec, err := vosk.NewRecognizer(model, sampleRate, spkModel) + if err != nil { + log.Fatal(err) + } + rec.SetWords(1) + + file, err := os.Open(filename) + if err != nil { + panic(err) + } + defer file.Close() + + file, err := os.Open(filename) + if err != nil { + panic(err) + } + defer file.Close() + + reader := bufio.NewReader(file) + buf := make([]byte, 4096) + + for { + _, err := reader.Read(buf) + if err != nil { + if err != io.EOF { + log.Fatal(err) + } + + break + } + + if rec.AcceptWaveform(buf) != 0 { + fmt.Println(string(rec.Result())) + } + } + + fmt.Println(string(rec.FinalResult())) } diff --git a/go/vosk.go b/go/vosk.go index 25008093..93baec74 100644 --- a/go/vosk.go +++ b/go/vosk.go @@ -8,55 +8,133 @@ import "C" // VoskModel contains a reference to the C VoskModel type VoskModel struct { - model *C.struct_VoskModel + model *C.struct_VoskModel +} + +// NewModel creates a new VoskModel instance +func NewModel(modelPath string) (*VoskModel, error) { + internal := C.vosk_model_new(C.CString(modelPath)) + model := &VoskModel{model: internal} + return model, nil +} + +func freeModel(model *VoskModel) { + C.vosk_model_free(model.model) +} + +// FindWord checks if a word can be recognized by the model. +// Returns the word symbol if the word exists inside the model or +// -1 otherwise. +func (m *VoskModel) FindWord(word []byte) int { + cbuf := C.CBytes(word) + defer C.free(cbuf) + i := C.vosk_model_find_word(m.model, (*C.char)(cbuf)) + return int(i) } // VoskSpkModel contains a reference to the C VoskSpkModel type VoskSpkModel struct { - spkModel *C.struct_VoskSpkModel + spkModel *C.struct_VoskSpkModel +} + +// NewSpkModel creates a new VoskSpkModel instance +func NewSpkModel(spkModelPath string) (*VoskSpkModel, error) { + internal := C.vosk_spk_model_new(C.CString(spkModelPath)) + spkModel := &VoskSpkModel{spkModel: internal} + return spkModel, nil +} + +func freeSpkModel(model *VoskSpkModel) { + C.vosk_spk_model_free(model.spkModel) } // VoskRecognizer contains a reference to the C VoskRecognizer type VoskRecognizer struct { - rec *C.struct_VoskRecognizer + rec *C.struct_VoskRecognizer } -func VoskFinalResult(recognizer *VoskRecognizer, buffer []byte) string { - cbuf := C.CBytes(buffer) - defer C.free(cbuf) - _ = C.vosk_recognizer_accept_waveform(recognizer.rec, (*C.char)(cbuf), C.int(len(buffer))) - result := C.GoString(C.vosk_recognizer_final_result(recognizer.rec)) - return result +func freeRecognizer(recognizer *VoskRecognizer) { + C.vosk_recognizer_free(recognizer.rec) } -// NewModel creates a new VoskModel instance -func NewModel(modelPath string) (*VoskModel, error) { - var internal *C.struct_VoskModel - internal = C.vosk_model_new(C.CString(modelPath)) - model := &VoskModel{model: internal} - return model, nil +// NewRecognizer creates a new VoskRecognizer instance +func NewRecognizer(model *VoskModel, sampleRate float64) (*VoskRecognizer, error) { + internal := C.vosk_recognizer_new(model.model, C.float(sampleRate)) + rec := &VoskRecognizer{rec: internal} + return rec, nil } -// NewRecognizer creates a new VoskRecognizer instance -func NewRecognizer(model *VoskModel) (*VoskRecognizer, error) { - var internal *C.struct_VoskRecognizer - internal = C.vosk_recognizer_new(model.model, 16000.0) - rec := &VoskRecognizer{rec: internal} - return rec, nil +// NewRecognizerSpk creates a new VoskRecognizer instance with a speaker model. +func NewRecognizerSpk(model *VoskModel, sampleRate float64, spkModel *VoskSpkModel) (*VoskRecognizer, error) { + internal := C.vosk_recognizer_new_spk(model.model, C.float(sampleRate), spkModel.spkModel) + rec := &VoskRecognizer{rec: internal} + return rec, nil } -func freeModel(model *VoskModel) { - C.vosk_model_free(model.model) +// NewRecognizerGrm creates a new VoskRecognizer instance with the phrase list. +func NewRecognizerGrm(model *VoskModel, sampleRate float64, grammer []byte) (*VoskRecognizer, error) { + cbuf := C.CBytes(grammer) + defer C.free(cbuf) + internal := C.vosk_recognizer_new_grm(model.model, C.float(sampleRate), (*C.char)(cbuf)) + rec := &VoskRecognizer{rec: internal} + return rec, nil } -func freeRecognizer(recognizer *VoskRecognizer) { - C.vosk_recognizer_free(recognizer.rec) +// SetSpkModel adds a speaker model to an already initialized recognizer. +func (r *VoskRecognizer) SetSpkModel(spkModel *VoskSpkModel) { + C.vosk_recognizer_set_spk_model(r.rec, spkModel.spkModel) } -// NewSpkModel creates a new VoskSpkModel instance -func NewSpkModel(spkModelPath string) (*VoskSpkModel, error) { - var internal *C.struct_VoskSpkModel - internal = C.vosk_spk_model_new(C.CString(spkModelPath)) - spkModel := &VoskSpkModel{spkModel: internal} - return spkModel, nil +// SetMaxAlternatives configures the recognizer to output n-best results. +func (r *VoskRecognizer) SetMaxAlternatives(maxAlternatives int) { + C.vosk_recognizer_set_max_alternatives(r.rec, C.int(maxAlternatives)) +} + +// SetWords enables words with times in the ouput. +func (r *VoskRecognizer) SetWords(words int) { + C.vosk_recognizer_set_words(r.rec, C.int(words)) +} + +// AcceptWaveform accepts and processes a new chunk of the voice data. +func (r *VoskRecognizer) AcceptWaveform(buffer []byte) int { + cbuf := C.CBytes(buffer) + defer C.free(cbuf) + i := C.vosk_recognizer_accept_waveform(r.rec, (*C.char)(cbuf), C.int(len(buffer))) + return int(i) +} + +// Result returns a speech recognition result. +func (r *VoskRecognizer) Result() []byte { + return []byte(C.GoString(C.vosk_recognizer_result(r.rec))) +} + +// PartialResult returns a partial speech recognition result. +func (r *VoskRecognizer) PartialResult() []byte { + return []byte(C.GoString(C.vosk_recognizer_result(r.rec))) +} + +// FinalResult returns a speech recognition result. Same as result, but doesn't wait +// for silence. +func (r *VoskRecognizer) FinalResult() []byte { + return []byte(C.GoString(C.vosk_recognizer_final_result(r.rec))) +} + +// Reset resets the recognizer. +func (r *VoskRecognizer) Reset() { + C.vosk_recognizer_reset(r.rec) +} + +// SetLogLevel sets the log level for Kaldi messages. +func SetLogLevel(logLevel int) { + C.vosk_set_log_level(C.int(logLevel)) +} + +// GPUInit automatically selects a CUDA device and allows multithreading. +func GPUInit() { + C.vosk_gpu_init() +} + +// GPUThreadInit inits CUDA device in a multi-threaded environment. +func GPUThreadInit() { + C.vosk_gpu_thread_init() }