Skip to content

Commit 87899c2

Browse files
authored
Add Python ASR example with alsa (#324)
1 parent 3c7724c commit 87899c2

11 files changed

+227
-4
lines changed

CMakeLists.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
22
project(sherpa-ncnn)
33

4-
set(SHERPA_NCNN_VERSION "2.1.9")
4+
set(SHERPA_NCNN_VERSION "2.1.10")
55

66
# Disable warning about
77
#
@@ -106,6 +106,7 @@ if(SHERPA_NCNN_ENABLE_BINARY AND UNIX AND NOT APPLE)
106106
include(CheckIncludeFileCXX)
107107
check_include_file_cxx(alsa/asoundlib.h SHERPA_NCNN_HAS_ALSA)
108108
if(SHERPA_NCNN_HAS_ALSA)
109+
message(STATUS "With Alsa")
109110
add_definitions(-DSHERPA_NCNN_ENABLE_ALSA=1)
110111
elseif(UNIX AND NOT APPLE)
111112
message(WARNING "\
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
#!/usr/bin/env python3
2+
3+
# Real-time speech recognition from a microphone with sherpa-ncnn Python API
4+
# with endpoint detection.
5+
#
6+
# Note: This script uses ALSA and works only on Linux systems, especially
7+
# for embedding Linux systems and for running Linux on Windows using WSL.
8+
#
9+
# Please refer to
10+
# https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html
11+
# to download pre-trained models
12+
13+
import argparse
14+
import sys
15+
16+
import sherpa_ncnn
17+
18+
19+
def get_args():
20+
parser = argparse.ArgumentParser(
21+
formatter_class=argparse.ArgumentDefaultsHelpFormatter
22+
)
23+
24+
parser.add_argument(
25+
"--device-name",
26+
type=str,
27+
required=True,
28+
help="""
29+
The device name specifies which microphone to use in case there are several
30+
on your system. You can use
31+
32+
arecord -l
33+
34+
to find all available microphones on your computer. For instance, if it outputs
35+
36+
**** List of CAPTURE Hardware Devices ****
37+
card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
38+
Subdevices: 1/1
39+
Subdevice #0: subdevice #0
40+
41+
and if you want to select card 3 and the device 0 on that card, please use:
42+
43+
plughw:3,0
44+
45+
as the device_name.
46+
""",
47+
)
48+
49+
return parser.parse_args()
50+
51+
52+
def create_recognizer():
53+
# Please replace the model files if needed.
54+
# See https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html
55+
# for download links.
56+
recognizer = sherpa_ncnn.Recognizer(
57+
tokens="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/tokens.txt",
58+
encoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.param",
59+
encoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.bin",
60+
decoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.param",
61+
decoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.bin",
62+
joiner_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.param",
63+
joiner_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.bin",
64+
num_threads=4,
65+
decoding_method="modified_beam_search",
66+
enable_endpoint_detection=True,
67+
rule1_min_trailing_silence=2.4,
68+
rule2_min_trailing_silence=1.2,
69+
rule3_min_utterance_length=300,
70+
hotwords_file="",
71+
hotwords_score=1.5,
72+
)
73+
return recognizer
74+
75+
76+
def main():
77+
args = get_args()
78+
device_name = args.device_name
79+
print(f"device_name: {device_name}")
80+
alsa = sherpa_ncnn.Alsa(device_name)
81+
82+
recognizer = create_recognizer()
83+
print("Started! Please speak")
84+
sample_rate = recognizer.sample_rate
85+
samples_per_read = int(0.1 * sample_rate) # 0.1 second = 100 ms
86+
last_result = ""
87+
segment_id = 0
88+
89+
while True:
90+
samples = alsa.read(samples_per_read) # a blocking read
91+
recognizer.accept_waveform(sample_rate, samples)
92+
93+
is_endpoint = recognizer.is_endpoint
94+
95+
result = recognizer.text
96+
if result and (last_result != result):
97+
last_result = result
98+
print("\r{}:{}".format(segment_id, result), end="", flush=True)
99+
100+
if is_endpoint:
101+
if result:
102+
print("\r{}:{}".format(segment_id, result), flush=True)
103+
segment_id += 1
104+
recognizer.reset()
105+
106+
107+
if __name__ == "__main__":
108+
try:
109+
main()
110+
except KeyboardInterrupt:
111+
print("\nCaught Ctrl + C. Exiting")

sherpa-ncnn/csrc/alsa.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
6262
6363
and if you want to select card 3 and the device 0 on that card, please use:
6464
65-
hw:3,0
65+
plughw:3,0
6666
6767
)";
6868

sherpa-ncnn/csrc/sherpa-ncnn-alsa.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
6767
6868
and if you want to select card 3 and the device 0 on that card, please use:
6969
70-
hw:3,0
70+
plughw:3,0
7171
7272
as the device_name.
7373
)usage";

sherpa-ncnn/csrc/stream.cc

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "sherpa-ncnn/csrc/stream.h"
2020

2121
#include <iostream>
22+
#include <utility>
2223

2324
namespace sherpa_ncnn {
2425

sherpa-ncnn/python/csrc/CMakeLists.txt

+13
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,12 @@ set(srcs
1111
stream.cc
1212
)
1313

14+
if(SHERPA_NCNN_HAS_ALSA)
15+
list(APPEND srcs ${CMAKE_SOURCE_DIR}/sherpa-ncnn/csrc/alsa.cc alsa.cc)
16+
else()
17+
list(APPEND srcs faked-alsa.cc)
18+
endif()
19+
1420
pybind11_add_module(_sherpa_ncnn ${srcs})
1521
target_link_libraries(_sherpa_ncnn PRIVATE sherpa-ncnn-core)
1622

@@ -28,6 +34,13 @@ if(NOT WIN32)
2834
target_link_libraries(_sherpa_ncnn PRIVATE "-Wl,-rpath,${SHERPA_NCNN_RPATH_ORIGIN}/sherpa_ncnn/lib")
2935
endif()
3036

37+
if(SHERPA_NCNN_HAS_ALSA)
38+
if(DEFINED ENV{SHERPA_NCNN_ALSA_LIB_DIR})
39+
target_link_libraries(_sherpa_ncnn PRIVATE -L$ENV{SHERPA_NCNN_ALSA_LIB_DIR} -lasound)
40+
else()
41+
target_link_libraries(_sherpa_ncnn PRIVATE asound)
42+
endif()
43+
endif()
3144

3245
install(TARGETS _sherpa_ncnn
3346
DESTINATION ../

sherpa-ncnn/python/csrc/alsa.cc

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// sherpa-ncnn/python/csrc/alsa.cc
2+
//
3+
// Copyright (c) 2024 Xiaomi Corporation
4+
5+
#include "sherpa-ncnn/python/csrc/alsa.h"
6+
7+
#include <vector>
8+
9+
#include "sherpa-ncnn/csrc/alsa.h"
10+
11+
namespace sherpa_ncnn {
12+
13+
void PybindAlsa(py::module *m) {
14+
using PyClass = Alsa;
15+
py::class_<PyClass>(*m, "Alsa")
16+
.def(py::init<const char *>(), py::arg("device_name"),
17+
py::call_guard<py::gil_scoped_release>())
18+
.def(
19+
"read",
20+
[](PyClass &self, int32_t num_samples) -> std::vector<float> {
21+
return self.Read(num_samples);
22+
},
23+
py::arg("num_samples"), py::call_guard<py::gil_scoped_release>())
24+
.def_property_readonly("expected_sample_rate",
25+
&PyClass::GetExpectedSampleRate)
26+
.def_property_readonly("actual_sample_rate",
27+
&PyClass::GetActualSampleRate);
28+
}
29+
30+
} // namespace sherpa_ncnn

sherpa-ncnn/python/csrc/alsa.h

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// sherpa-ncnn/python/csrc/alsa.h
2+
//
3+
// Copyright (c) 2024 Xiaomi Corporation
4+
5+
#ifndef SHERPA_NCNN_PYTHON_CSRC_ALSA_H_
6+
#define SHERPA_NCNN_PYTHON_CSRC_ALSA_H_
7+
8+
#include "sherpa-ncnn/python/csrc/sherpa-ncnn.h"
9+
10+
namespace sherpa_ncnn {
11+
12+
void PybindAlsa(py::module *m);
13+
14+
} // namespace sherpa_ncnn
15+
16+
#endif // SHERPA_NCNN_PYTHON_CSRC_ALSA_H_

sherpa-ncnn/python/csrc/faked-alsa.cc

+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
// sherpa-ncnn/python/csrc/faked-alsa.cc
2+
//
3+
// Copyright (c) 2024 Xiaomi Corporation
4+
5+
#include "sherpa-ncnn/csrc/macros.h"
6+
#include "sherpa-ncnn/python/csrc/alsa.h"
7+
8+
namespace sherpa_ncnn {
9+
10+
class FakedAlsa {
11+
public:
12+
explicit FakedAlsa(const char *) {
13+
SHERPA_NCNN_LOGE("This function is for Linux only.");
14+
#if (SHERPA_NCNN_ENABLE_ALSA == 0) && (defined(__unix__) || defined(__unix))
15+
SHERPA_NCNN_LOGE(R"doc(
16+
sherpa-ncnn is compiled without alsa support. To enable that, please run
17+
(1) sudo apt-get install alsa-utils libasound2-dev
18+
(2) rebuild sherpa-ncnn
19+
)doc");
20+
#endif
21+
exit(-1);
22+
}
23+
24+
std::vector<float> Read(int32_t) const { return {}; }
25+
int32_t GetExpectedSampleRate() const { return -1; }
26+
int32_t GetActualSampleRate() const { return -1; }
27+
};
28+
29+
void PybindAlsa(py::module *m) {
30+
using PyClass = FakedAlsa;
31+
py::class_<PyClass>(*m, "Alsa")
32+
.def(py::init<const char *>(), py::arg("device_name"))
33+
.def(
34+
"read",
35+
[](PyClass &self, int32_t num_samples) -> std::vector<float> {
36+
return self.Read(num_samples);
37+
},
38+
py::arg("num_samples"), py::call_guard<py::gil_scoped_release>())
39+
.def_property_readonly("expected_sample_rate",
40+
&PyClass::GetExpectedSampleRate)
41+
.def_property_readonly("actual_sample_rate",
42+
&PyClass::GetActualSampleRate);
43+
}
44+
45+
} // namespace sherpa_ncnn
46+
47+
#endif // SHERPA_NCNN_PYTHON_CSRC_FAKED_ALSA_H_

sherpa-ncnn/python/csrc/sherpa-ncnn.cc

+3
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
#include "sherpa-ncnn/python/csrc/sherpa-ncnn.h"
2020

21+
#include "sherpa-ncnn/python/csrc/alsa.h"
2122
#include "sherpa-ncnn/python/csrc/decoder.h"
2223
#include "sherpa-ncnn/python/csrc/display.h"
2324
#include "sherpa-ncnn/python/csrc/endpoint.h"
@@ -39,6 +40,8 @@ PYBIND11_MODULE(_sherpa_ncnn, m) {
3940
PybindRecognizer(&m);
4041

4142
PybindDisplay(&m);
43+
44+
PybindAlsa(&m);
4245
}
4346

4447
} // namespace sherpa_ncnn

sherpa-ncnn/python/sherpa_ncnn/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
1+
from _sherpa_ncnn import Alsa, Display
2+
13
from .recognizer import Recognizer
2-
from _sherpa_ncnn import Display

0 commit comments

Comments
 (0)