|
| 1 | +#!/usr/bin/env python3 |
| 2 | + |
| 3 | +# Real-time speech recognition from a microphone with sherpa-ncnn Python API |
| 4 | +# with endpoint detection. |
| 5 | +# |
| 6 | +# Note: This script uses ALSA and works only on Linux systems, especially |
| 7 | +# for embedding Linux systems and for running Linux on Windows using WSL. |
| 8 | +# |
| 9 | +# Please refer to |
| 10 | +# https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html |
| 11 | +# to download pre-trained models |
| 12 | + |
| 13 | +import argparse |
| 14 | +import sys |
| 15 | + |
| 16 | +import sherpa_ncnn |
| 17 | + |
| 18 | + |
| 19 | +def get_args(): |
| 20 | + parser = argparse.ArgumentParser( |
| 21 | + formatter_class=argparse.ArgumentDefaultsHelpFormatter |
| 22 | + ) |
| 23 | + |
| 24 | + parser.add_argument( |
| 25 | + "--device-name", |
| 26 | + type=str, |
| 27 | + required=True, |
| 28 | + help=""" |
| 29 | +The device name specifies which microphone to use in case there are several |
| 30 | +on your system. You can use |
| 31 | +
|
| 32 | + arecord -l |
| 33 | +
|
| 34 | +to find all available microphones on your computer. For instance, if it outputs |
| 35 | +
|
| 36 | +**** List of CAPTURE Hardware Devices **** |
| 37 | +card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio] |
| 38 | + Subdevices: 1/1 |
| 39 | + Subdevice #0: subdevice #0 |
| 40 | +
|
| 41 | +and if you want to select card 3 and the device 0 on that card, please use: |
| 42 | +
|
| 43 | + plughw:3,0 |
| 44 | +
|
| 45 | +as the device_name. |
| 46 | + """, |
| 47 | + ) |
| 48 | + |
| 49 | + return parser.parse_args() |
| 50 | + |
| 51 | + |
| 52 | +def create_recognizer(): |
| 53 | + # Please replace the model files if needed. |
| 54 | + # See https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html |
| 55 | + # for download links. |
| 56 | + recognizer = sherpa_ncnn.Recognizer( |
| 57 | + tokens="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/tokens.txt", |
| 58 | + encoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.param", |
| 59 | + encoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/encoder_jit_trace-pnnx.ncnn.bin", |
| 60 | + decoder_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.param", |
| 61 | + decoder_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/decoder_jit_trace-pnnx.ncnn.bin", |
| 62 | + joiner_param="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.param", |
| 63 | + joiner_bin="./sherpa-ncnn-conv-emformer-transducer-2022-12-06/joiner_jit_trace-pnnx.ncnn.bin", |
| 64 | + num_threads=4, |
| 65 | + decoding_method="modified_beam_search", |
| 66 | + enable_endpoint_detection=True, |
| 67 | + rule1_min_trailing_silence=2.4, |
| 68 | + rule2_min_trailing_silence=1.2, |
| 69 | + rule3_min_utterance_length=300, |
| 70 | + hotwords_file="", |
| 71 | + hotwords_score=1.5, |
| 72 | + ) |
| 73 | + return recognizer |
| 74 | + |
| 75 | + |
| 76 | +def main(): |
| 77 | + args = get_args() |
| 78 | + device_name = args.device_name |
| 79 | + print(f"device_name: {device_name}") |
| 80 | + alsa = sherpa_ncnn.Alsa(device_name) |
| 81 | + |
| 82 | + recognizer = create_recognizer() |
| 83 | + print("Started! Please speak") |
| 84 | + sample_rate = recognizer.sample_rate |
| 85 | + samples_per_read = int(0.1 * sample_rate) # 0.1 second = 100 ms |
| 86 | + last_result = "" |
| 87 | + segment_id = 0 |
| 88 | + |
| 89 | + while True: |
| 90 | + samples = alsa.read(samples_per_read) # a blocking read |
| 91 | + recognizer.accept_waveform(sample_rate, samples) |
| 92 | + |
| 93 | + is_endpoint = recognizer.is_endpoint |
| 94 | + |
| 95 | + result = recognizer.text |
| 96 | + if result and (last_result != result): |
| 97 | + last_result = result |
| 98 | + print("\r{}:{}".format(segment_id, result), end="", flush=True) |
| 99 | + |
| 100 | + if is_endpoint: |
| 101 | + if result: |
| 102 | + print("\r{}:{}".format(segment_id, result), flush=True) |
| 103 | + segment_id += 1 |
| 104 | + recognizer.reset() |
| 105 | + |
| 106 | + |
| 107 | +if __name__ == "__main__": |
| 108 | + try: |
| 109 | + main() |
| 110 | + except KeyboardInterrupt: |
| 111 | + print("\nCaught Ctrl + C. Exiting") |
0 commit comments