From 4e20020a0656165cf00a940c4372423719f4af70 Mon Sep 17 00:00:00 2001 From: Cem Aydin Date: Tue, 31 Jan 2023 15:21:35 +0100 Subject: [PATCH] Vosk api: allow selecting different models and automatic model download recognize_vosk function: - Added function parameter 'model', to select a model based on the model directory. - Also make use of Vosk capability to download the model by itself, by providing language parameters. (the language parameter was previously present but not used) Also see the updated README. --- README.rst | 11 +++++++---- speech_recognition/__init__.py | 29 ++++++++++++++++++++++------- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/README.rst b/README.rst index f9bde14e..947c337c 100644 --- a/README.rst +++ b/README.rst @@ -57,8 +57,6 @@ The `library reference `__ for information about installing languages, compiling PocketSphinx, and building language packs from online resources. This document is also included under ``reference/pocketsphinx.rst``. -You have to install Vosk models for using Vosk. `Here `__ are models avaiable. You have to place them in models folder of your project, like "your-project-folder/models/your-vosk-model" - Examples -------- @@ -143,9 +141,14 @@ Vosk API is **required if and only if you want to use Vosk recognizer** (``recog You can install it with ``python3 -m pip install vosk``. -You also have to install Vosk Models: +Languages can be selected with the language parameter e.g. ``recognizer_instance.recognize_vosk(language='de')``. +Vosk will attempt to download the respective model from https://alphacephei.com/vosk/models automatically. +Language defaults to english ``'en-us'``. + +It is possible to manually download a model and place it in a directory in your project folder. +Reference this folder with the model parameter ``model='folder-name'``. This will take precedence over the language parameter. -`Here `__ are models avaiable for download. You have to place them in models folder of your project, like "your-project-folder/models/your-vosk-model" +Models are avaiable for download `here `__. Google Cloud Speech Library for Python (for Google Cloud Speech API users) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py index 66ebc04c..a3044d1c 100644 --- a/speech_recognition/__init__.py +++ b/speech_recognition/__init__.py @@ -1684,16 +1684,31 @@ def recognize_whisper(self, audio_data, model="base", show_dict=False, load_opti return result["text"] - def recognize_vosk(self, audio_data, language='en'): + def recognize_vosk(self, audio_data, model='', language='en-us'): from vosk import Model, KaldiRecognizer - + assert isinstance(audio_data, AudioData), "Data must be audio data" - + if not hasattr(self, 'vosk_model'): - if not os.path.exists("model"): - return "Please download the model from https://github.com/alphacep/vosk-api/blob/master/doc/models.md and unpack as 'model' in the current folder." - exit (1) - self.vosk_model = Model("model") + if model: + if not os.path.exists(model): + raise RequestError(f"Please download the model from https://github.com/alphacep/vosk-api/blob/master/doc/models.md and unpack as '{model}' in the current folder.") + self.vosk_model = Model(model) + else: + try: + import requests + except ImportError: + raise RequestError("requests module is required to download model data") + # verify this language is available via api + response = requests.get('https://alphacephei.com/vosk/models/model-list.json', timeout=10) + # raise error if bad response + response.raise_for_status() + + models = response.json() + languages = { m["lang"] for m in models } + if language not in languages: + raise RequestError(f"Language '{language}' not available. Available language codes are: {languages}") + self.vosk_model = Model(lang=language) rec = KaldiRecognizer(self.vosk_model, 16000);