-
Notifications
You must be signed in to change notification settings - Fork 162
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add Python ASR example with alsa (#324)
- Loading branch information
1 parent
3c7724c
commit 87899c2
Showing
11 changed files
with
227 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
111 changes: 111 additions & 0 deletions
111
python-api-examples/speech-recognition-from-microphone-with-endpoint-detection-alsa.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# Real-time speech recognition from a microphone with sherpa-ncnn Python API | ||
# with endpoint detection. | ||
# | ||
# Note: This script uses ALSA and works only on Linux systems, especially | ||
# for embedding Linux systems and for running Linux on Windows using WSL. | ||
# | ||
# Please refer to | ||
# https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html | ||
# to download pre-trained models | ||
|
||
import argparse | ||
import sys | ||
|
||
import sherpa_ncnn | ||
|
||
|
||
def get_args(): | ||
parser = argparse.ArgumentParser( | ||
formatter_class=argparse.ArgumentDefaultsHelpFormatter | ||
) | ||
|
||
parser.add_argument( | ||
"--device-name", | ||
type=str, | ||
required=True, | ||
help=""" | ||
The device name specifies which microphone to use in case there are several | ||
on your system. You can use | ||
arecord -l | ||
to find all available microphones on your computer. For instance, if it outputs | ||
**** List of CAPTURE Hardware Devices **** | ||
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio] | ||
Subdevices: 1/1 | ||
Subdevice #0: subdevice #0 | ||
and if you want to select card 3 and the device 0 on that card, please use: | ||
plughw:3,0 | ||
as the device_name. | ||
""", | ||
) | ||
|
||
return parser.parse_args() | ||
|
||
|
||
def create_recognizer(): | ||
# Please replace the model files if needed. | ||
# See https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html | ||
# for download links. | ||
recognizer = sherpa_ncnn.Recognizer( | ||
tokens="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/tokens.txt", | ||
encoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.param", | ||
encoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.bin", | ||
decoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.param", | ||
decoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.bin", | ||
joiner_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.param", | ||
joiner_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.bin", | ||
num_threads=4, | ||
decoding_method="modified_beam_search", | ||
enable_endpoint_detection=True, | ||
rule1_min_trailing_silence=2.4, | ||
rule2_min_trailing_silence=1.2, | ||
rule3_min_utterance_length=300, | ||
hotwords_file="", | ||
hotwords_score=1.5, | ||
) | ||
return recognizer | ||
|
||
|
||
def main(): | ||
args = get_args() | ||
device_name = args.device_name | ||
print(f"device_name: {device_name}") | ||
alsa = sherpa_ncnn.Alsa(device_name) | ||
|
||
recognizer = create_recognizer() | ||
print("Started! Please speak") | ||
sample_rate = recognizer.sample_rate | ||
samples_per_read = int(0.1 * sample_rate) # 0.1 second = 100 ms | ||
last_result = "" | ||
segment_id = 0 | ||
|
||
while True: | ||
samples = alsa.read(samples_per_read) # a blocking read | ||
recognizer.accept_waveform(sample_rate, samples) | ||
|
||
is_endpoint = recognizer.is_endpoint | ||
|
||
result = recognizer.text | ||
if result and (last_result != result): | ||
last_result = result | ||
print("\r{}:{}".format(segment_id, result), end="", flush=True) | ||
|
||
if is_endpoint: | ||
if result: | ||
print("\r{}:{}".format(segment_id, result), flush=True) | ||
segment_id += 1 | ||
recognizer.reset() | ||
|
||
|
||
if __name__ == "__main__": | ||
try: | ||
main() | ||
except KeyboardInterrupt: | ||
print("\nCaught Ctrl + C. Exiting") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ | |
#include "sherpa-ncnn/csrc/stream.h" | ||
|
||
#include <iostream> | ||
#include <utility> | ||
|
||
namespace sherpa_ncnn { | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
// sherpa-ncnn/python/csrc/alsa.cc | ||
// | ||
// Copyright (c) 2024 Xiaomi Corporation | ||
|
||
#include "sherpa-ncnn/python/csrc/alsa.h" | ||
|
||
#include <vector> | ||
|
||
#include "sherpa-ncnn/csrc/alsa.h" | ||
|
||
namespace sherpa_ncnn { | ||
|
||
void PybindAlsa(py::module *m) { | ||
using PyClass = Alsa; | ||
py::class_<PyClass>(*m, "Alsa") | ||
.def(py::init<const char *>(), py::arg("device_name"), | ||
py::call_guard<py::gil_scoped_release>()) | ||
.def( | ||
"read", | ||
[](PyClass &self, int32_t num_samples) -> std::vector<float> { | ||
return self.Read(num_samples); | ||
}, | ||
py::arg("num_samples"), py::call_guard<py::gil_scoped_release>()) | ||
.def_property_readonly("expected_sample_rate", | ||
&PyClass::GetExpectedSampleRate) | ||
.def_property_readonly("actual_sample_rate", | ||
&PyClass::GetActualSampleRate); | ||
} | ||
|
||
} // namespace sherpa_ncnn |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
// sherpa-ncnn/python/csrc/alsa.h | ||
// | ||
// Copyright (c) 2024 Xiaomi Corporation | ||
|
||
#ifndef SHERPA_NCNN_PYTHON_CSRC_ALSA_H_ | ||
#define SHERPA_NCNN_PYTHON_CSRC_ALSA_H_ | ||
|
||
#include "sherpa-ncnn/python/csrc/sherpa-ncnn.h" | ||
|
||
namespace sherpa_ncnn { | ||
|
||
void PybindAlsa(py::module *m); | ||
|
||
} // namespace sherpa_ncnn | ||
|
||
#endif // SHERPA_NCNN_PYTHON_CSRC_ALSA_H_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
// sherpa-ncnn/python/csrc/faked-alsa.cc | ||
// | ||
// Copyright (c) 2024 Xiaomi Corporation | ||
|
||
#include "sherpa-ncnn/csrc/macros.h" | ||
#include "sherpa-ncnn/python/csrc/alsa.h" | ||
|
||
namespace sherpa_ncnn { | ||
|
||
class FakedAlsa { | ||
public: | ||
explicit FakedAlsa(const char *) { | ||
SHERPA_NCNN_LOGE("This function is for Linux only."); | ||
#if (SHERPA_NCNN_ENABLE_ALSA == 0) && (defined(__unix__) || defined(__unix)) | ||
SHERPA_NCNN_LOGE(R"doc( | ||
sherpa-ncnn is compiled without alsa support. To enable that, please run | ||
(1) sudo apt-get install alsa-utils libasound2-dev | ||
(2) rebuild sherpa-ncnn | ||
)doc"); | ||
#endif | ||
exit(-1); | ||
} | ||
|
||
std::vector<float> Read(int32_t) const { return {}; } | ||
int32_t GetExpectedSampleRate() const { return -1; } | ||
int32_t GetActualSampleRate() const { return -1; } | ||
}; | ||
|
||
void PybindAlsa(py::module *m) { | ||
using PyClass = FakedAlsa; | ||
py::class_<PyClass>(*m, "Alsa") | ||
.def(py::init<const char *>(), py::arg("device_name")) | ||
.def( | ||
"read", | ||
[](PyClass &self, int32_t num_samples) -> std::vector<float> { | ||
return self.Read(num_samples); | ||
}, | ||
py::arg("num_samples"), py::call_guard<py::gil_scoped_release>()) | ||
.def_property_readonly("expected_sample_rate", | ||
&PyClass::GetExpectedSampleRate) | ||
.def_property_readonly("actual_sample_rate", | ||
&PyClass::GetActualSampleRate); | ||
} | ||
|
||
} // namespace sherpa_ncnn | ||
|
||
#endif // SHERPA_NCNN_PYTHON_CSRC_FAKED_ALSA_H_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
from _sherpa_ncnn import Alsa, Display | ||
|
||
from .recognizer import Recognizer | ||
from _sherpa_ncnn import Display |