From 1e4d5e0c413711022ab5895b4e6bb53f6d7e783f Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 23 Feb 2024 22:51:34 +0800 Subject: [PATCH 01/12] WIP: Add WebAssembly + NodeJS example. --- CMakeLists.txt | 9 ++ build-wasm-simd-for-nodejs.sh | 76 +++++++++++++++ nodejs-wasm-examples/decode-file.js | 75 +++++++++++++++ wasm/CMakeLists.txt | 26 ++++- wasm/app.js | 2 +- wasm/sherpa-ncnn.js | 142 +++++++++++++++------------- 6 files changed, 258 insertions(+), 72 deletions(-) create mode 100755 build-wasm-simd-for-nodejs.sh create mode 100644 nodejs-wasm-examples/decode-file.js diff --git a/CMakeLists.txt b/CMakeLists.txt index bf4d4358..2da51588 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,6 +41,7 @@ option(SHERPA_NCNN_ENABLE_BINARY "Whether to build the binary sherpa-ncnn" ON) option(SHERPA_NCNN_ENABLE_TEST "Whether to build tests" OFF) option(SHERPA_NCNN_ENABLE_C_API "Whether to build C API" ON) option(SHERPA_NCNN_ENABLE_WASM "Whether to enable WASM" OFF) +option(SHERPA_NCNN_ENABLE_WASM_FOR_NODEJS "Whether to enable WASM for NodeJS" OFF) option(SHERPA_NCNN_ENABLE_GENERATE_INT8_SCALE_TABLE "Whether to generate-int8-scale-table" ON) option(SHERPA_NCNN_ENABLE_FFMPEG_EXAMPLES "Whether to enable ffmpeg-examples" OFF) @@ -77,6 +78,14 @@ message(STATUS "SHERPA_NCNN_ENABLE_TEST ${SHERPA_NCNN_ENABLE_TEST}") message(STATUS "SHERPA_NCNN_ENABLE_C_API ${SHERPA_NCNN_ENABLE_C_API}") message(STATUS "SHERPA_NCNN_ENABLE_GENERATE_INT8_SCALE_TABLE ${SHERPA_NCNN_ENABLE_GENERATE_INT8_SCALE_TABLE}") message(STATUS "SHERPA_NCNN_ENABLE_FFMPEG_EXAMPLES ${SHERPA_NCNN_ENABLE_FFMPEG_EXAMPLES}") +message(STATUS "SHERPA_NCNN_ENABLE_WASM ${SHERPA_NCNN_ENABLE_WASM}") +message(STATUS "SHERPA_NCNN_ENABLE_WASM_FOR_NODEJS ${SHERPA_NCNN_ENABLE_WASM_FOR_NODEJS}") + +if(SHERPA_NCNN_ENABLE_WASM_FOR_NODEJS) + if(NOT SHERPA_NCNN_ENABLE_WASM) + message(FATAL_ERROR "Please set SHERPA_NCNN_ENABLE_WASM to ON if you enable WASM for NodeJS") + endif() +endif() if(NOT CMAKE_BUILD_TYPE) message(STATUS "No CMAKE_BUILD_TYPE given, default to Release") diff --git a/build-wasm-simd-for-nodejs.sh b/build-wasm-simd-for-nodejs.sh new file mode 100755 index 00000000..e19f6f00 --- /dev/null +++ b/build-wasm-simd-for-nodejs.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +# Copyright (c) 2024 Xiaomi Corporation +# +# This script is to build sherpa-ncnn for WebAssembly (NodeJS) +# +# See also +# https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-webassembly +# +# Please refer to +# https://k2-fsa.github.io/sherpa/ncnn/wasm/index.html +# for more details. + +set -ex + +if [ x"$EMSCRIPTEN" == x"" ]; then + if ! command -v emcc &> /dev/null; then + echo "Please install emscripten first" + echo "" + echo "You can use the following commands to install it:" + echo "" + echo "git clone https://github.com/emscripten-core/emsdk.git" + echo "cd emsdk" + echo "git pull" + echo "./emsdk install latest" + echo "./emsdk activate latest" + echo "source ./emsdk_env.sh" + exit 1 + else + EMSCRIPTEN=$(dirname $(realpath $(which emcc))) + fi +fi + +export EMSCRIPTEN=$EMSCRIPTEN +echo "EMSCRIPTEN: $EMSCRIPTEN" +if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then + echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake" + echo "Please make sure you have installed emsdk correctly" + exit 1 +fi + +mkdir -p build-wasm-simd-for-nodejs +pushd build-wasm-simd-for-nodejs + +export SHERPA_NCNN_IS_USING_BUILD_WASM_SH=ON + +cmake \ + -DCMAKE_INSTALL_PREFIX=./install \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_TOOLCHAIN_FILE=$EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake \ + -DNCNN_THREADS=OFF \ + -DNCNN_OPENMP=OFF \ + -DNCNN_SIMPLEOMP=OFF \ + -DNCNN_RUNTIME_CPU=OFF \ + -DNCNN_SSE2=ON \ + -DNCNN_AVX2=OFF \ + -DNCNN_AVX=OFF \ + -DNCNN_BUILD_TOOLS=OFF \ + -DNCNN_BUILD_EXAMPLES=OFF \ + -DNCNN_BUILD_BENCHMARK=OFF \ + \ + -DSHERPA_NCNN_ENABLE_WASM=ON \ + -DSHERPA_NCNN_ENABLE_WASM_FOR_NODEJS=ON \ + -DBUILD_SHARED_LIBS=OFF \ + -DSHERPA_NCNN_ENABLE_PYTHON=OFF \ + -DSHERPA_NCNN_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_NCNN_ENABLE_JNI=OFF \ + -DSHERPA_NCNN_ENABLE_BINARY=OFF \ + -DSHERPA_NCNN_ENABLE_TEST=OFF \ + -DSHERPA_NCNN_ENABLE_C_API=ON \ + -DSHERPA_NCNN_ENABLE_GENERATE_INT8_SCALE_TABLE=OFF \ + -DSHERPA_NCNN_ENABLE_FFMPEG_EXAMPLES=OFF \ + .. + +make -j2 +make install +ls -lh install/bin/wasm diff --git a/nodejs-wasm-examples/decode-file.js b/nodejs-wasm-examples/decode-file.js new file mode 100644 index 00000000..0cff2337 --- /dev/null +++ b/nodejs-wasm-examples/decode-file.js @@ -0,0 +1,75 @@ +const fs = require('fs'); +const wav = require('wav'); +const {Readable} = require('stream'); + +let Module = require('./sherpa-ncnn-wasm-main.js')() +let b = require('./sherpa-ncnn.js'); + +let recognizer = b.createRecognizer(Module); +let stream = recognizer.createStream(); + +console.log(recognizer.config); + +const waveFilename = './0.wav'; + +const reader = new wav.Reader(); +const readable = new Readable().wrap(reader); +const buf = []; + +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { + if (sampleRate != recognizer.config.featConfig.samplingRate) { + throw new Error(`Only support sampleRate ${ + recognizer.config.featConfig.samplingRate}. Given ${sampleRate}`); + } + + if (audioFormat != 1) { + throw new Error(`Only support PCM format. Given ${audioFormat}`); + } + + if (channels != 1) { + throw new Error(`Only a single channel. Given ${channel}`); + } + + if (bitDepth != 16) { + throw new Error(`Only support 16-bit samples. Given ${bitDepth}`); + } +}); + +fs.createReadStream(waveFilename, {'highWaterMark': 4096}) + .pipe(reader) + .on('finish', function(err) { + // tail padding + const floatSamples = + new Float32Array(recognizer.config.featConfig.samplingRate * 0.5); + + buf.push(floatSamples); + const flattened = + Float32Array.from(buf.reduce((a, b) => [...a, ...b], [])); + + stream.acceptWaveform( + recognizer.config.featConfig.samplingRate, flattened); + while (recognizer.isReady(stream)) { + recognizer.decode(stream); + } + const r = recognizer.getResult(stream); + console.log('result', r); + + stream.free(); + recognizer.free(); + }); + +readable.on('readable', function() { + let chunk; + while ((chunk = readable.read()) != null) { + const int16Samples = new Int16Array( + chunk.buffer, chunk.byteOffset, + chunk.length / Int16Array.BYTES_PER_ELEMENT); + + const floatSamples = new Float32Array(int16Samples.length); + for (let i = 0; i < floatSamples.length; i++) { + floatSamples[i] = int16Samples[i] / 32768.0; + } + + buf.push(floatSamples); + } +}); diff --git a/wasm/CMakeLists.txt b/wasm/CMakeLists.txt index 4cc93276..578b5a20 100644 --- a/wasm/CMakeLists.txt +++ b/wasm/CMakeLists.txt @@ -31,8 +31,20 @@ list(JOIN mangled_exported_functions "," all_exported_functions) include_directories(${CMAKE_SOURCE_DIR}) set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB ") string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ") -string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ") -string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue'] ") + +if(SHERPA_NCNN_ENABLE_WASM_FOR_NODEJS) + string(APPEND MY_FLAGS " -sNODERAWFS=1 ") +else() + string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ") +endif() + +string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ") + +if(SHERPA_NCNN_ENABLE_WASM_FOR_NODEJS) + string(APPEND MY_FLAGS " -sMODULARIZE=1 -sWASM_ASYNC_COMPILATION=0 ") +endif() + + message(STATUS "MY_FLAGS: ${MY_FLAGS}") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}") @@ -50,7 +62,15 @@ install( "index.html" "$/sherpa-ncnn-wasm-main.js" "$/sherpa-ncnn-wasm-main.wasm" - "$/sherpa-ncnn-wasm-main.data" DESTINATION bin/wasm ) + +if(NOT SHERPA_NCNN_ENABLE_WASM_FOR_NODEJS) + install( + FILES + "$/sherpa-ncnn-wasm-main.data" + DESTINATION + bin/wasm + ) +endif() diff --git a/wasm/app.js b/wasm/app.js index cb27db97..963fcc4a 100644 --- a/wasm/app.js +++ b/wasm/app.js @@ -45,7 +45,7 @@ Module.onRuntimeInitialized = function() { startBtn.disabled = false; - recognizer = createRecognizer(); + recognizer = createRecognizer(Module); console.log('recognizer is created!', recognizer); }; diff --git a/wasm/sherpa-ncnn.js b/wasm/sherpa-ncnn.js index 6f3f8b2c..93b349e1 100644 --- a/wasm/sherpa-ncnn.js +++ b/wasm/sherpa-ncnn.js @@ -1,51 +1,51 @@ -function freeConfig(config) { +function freeConfig(config, Module) { if ('buffer' in config) { - _free(config.buffer); + Module._free(config.buffer); } - _free(config.ptr); + Module._free(config.ptr); } // The user should free the returned pointers -function initSherpaNcnnModelConfig(config) { - let encoderParamLen = lengthBytesUTF8(config.encoderParam) + 1; - let decoderParamLen = lengthBytesUTF8(config.decoderParam) + 1; - let joinerParamLen = lengthBytesUTF8(config.joinerParam) + 1; +function initSherpaNcnnModelConfig(config, Module) { + let encoderParamLen = Module.lengthBytesUTF8(config.encoderParam) + 1; + let decoderParamLen = Module.lengthBytesUTF8(config.decoderParam) + 1; + let joinerParamLen = Module.lengthBytesUTF8(config.joinerParam) + 1; - let encoderBinLen = lengthBytesUTF8(config.encoderBin) + 1; - let decoderBinLen = lengthBytesUTF8(config.decoderBin) + 1; - let joinerBinLen = lengthBytesUTF8(config.joinerBin) + 1; + let encoderBinLen = Module.lengthBytesUTF8(config.encoderBin) + 1; + let decoderBinLen = Module.lengthBytesUTF8(config.decoderBin) + 1; + let joinerBinLen = Module.lengthBytesUTF8(config.joinerBin) + 1; - let tokensLen = lengthBytesUTF8(config.tokens) + 1; + let tokensLen = Module.lengthBytesUTF8(config.tokens) + 1; let n = encoderParamLen + decoderParamLen + joinerParamLen; n += encoderBinLen + decoderBinLen + joinerBinLen; n += tokensLen; - let buffer = _malloc(n); - let ptr = _malloc(4 * 9); + let buffer = Module._malloc(n); + let ptr = Module._malloc(4 * 9); let offset = 0; - stringToUTF8(config.encoderParam, buffer + offset, encoderParamLen); + Module.stringToUTF8(config.encoderParam, buffer + offset, encoderParamLen); offset += encoderParamLen; - stringToUTF8(config.encoderBin, buffer + offset, encoderBinLen); + Module.stringToUTF8(config.encoderBin, buffer + offset, encoderBinLen); offset += encoderBinLen; - stringToUTF8(config.decoderParam, buffer + offset, decoderParamLen); + Module.stringToUTF8(config.decoderParam, buffer + offset, decoderParamLen); offset += decoderParamLen; - stringToUTF8(config.decoderBin, buffer + offset, decoderBinLen); + Module.stringToUTF8(config.decoderBin, buffer + offset, decoderBinLen); offset += decoderBinLen; - stringToUTF8(config.joinerParam, buffer + offset, joinerParamLen); + Module.stringToUTF8(config.joinerParam, buffer + offset, joinerParamLen); offset += joinerParamLen; - stringToUTF8(config.joinerBin, buffer + offset, joinerBinLen); + Module.stringToUTF8(config.joinerBin, buffer + offset, joinerBinLen); offset += joinerBinLen; - stringToUTF8(config.tokens, buffer + offset, tokensLen); + Module.stringToUTF8(config.tokens, buffer + offset, tokensLen); offset += tokensLen; offset = 0; @@ -78,12 +78,12 @@ function initSherpaNcnnModelConfig(config) { } } -function initSherpaNcnnDecoderConfig(config) { - let n = lengthBytesUTF8(config.decodingMethod) + 1; - let buffer = _malloc(n); - let ptr = _malloc(4 * 2); +function initSherpaNcnnDecoderConfig(config, Module) { + let n = Module.lengthBytesUTF8(config.decodingMethod) + 1; + let buffer = Module._malloc(n); + let ptr = Module._malloc(4 * 2); - stringToUTF8(config.decodingMethod, buffer, n); + Module.stringToUTF8(config.decodingMethod, buffer, n); Module.setValue(ptr, buffer, 'i8*'); Module.setValue(ptr + 4, config.numActivePaths, 'i32'); @@ -93,8 +93,8 @@ function initSherpaNcnnDecoderConfig(config) { } } -function initSherpaNcnnFeatureExtractorConfig(config) { - let ptr = _malloc(4 * 2); +function initSherpaNcnnFeatureExtractorConfig(config, Module) { + let ptr = Module._malloc(4 * 2); Module.setValue(ptr, config.samplingRate, 'float'); Module.setValue(ptr + 4, config.featureDim, 'i32'); return { @@ -102,23 +102,24 @@ function initSherpaNcnnFeatureExtractorConfig(config) { } } -function initSherpaNcnnRecognizerConfig(config) { - let featConfig = initSherpaNcnnFeatureExtractorConfig(config.featConfig); - let modelConfig = initSherpaNcnnModelConfig(config.modelConfig); - let decoderConfig = initSherpaNcnnDecoderConfig(config.decoderConfig); +function initSherpaNcnnRecognizerConfig(config, Module) { + let featConfig = + initSherpaNcnnFeatureExtractorConfig(config.featConfig, Module); + let modelConfig = initSherpaNcnnModelConfig(config.modelConfig, Module); + let decoderConfig = initSherpaNcnnDecoderConfig(config.decoderConfig, Module); let numBytes = featConfig.len + modelConfig.len + decoderConfig.len + 4 * 4 + 4 * 2; - let ptr = _malloc(numBytes); + let ptr = Module._malloc(numBytes); let offset = 0; - _CopyHeap(featConfig.ptr, featConfig.len, ptr + offset); + Module._CopyHeap(featConfig.ptr, featConfig.len, ptr + offset); offset += featConfig.len; - _CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset) + Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset) offset += modelConfig.len; - _CopyHeap(decoderConfig.ptr, decoderConfig.len, ptr + offset) + Module._CopyHeap(decoderConfig.ptr, decoderConfig.len, ptr + offset) offset += decoderConfig.len; Module.setValue(ptr + offset, config.enableEndpoint, 'i32'); @@ -146,17 +147,18 @@ function initSherpaNcnnRecognizerConfig(config) { } class Stream { - constructor(handle) { + constructor(handle, Module) { this.handle = handle; this.pointer = null; - this.n = 0 + this.n = 0; + this.Module = Module; } free() { if (this.handle) { - _DestroyStream(this.handle); + this.Module._DestroyStream(this.handle); this.handle = null; - _free(this.pointer); + this.Module._free(this.pointer); this.pointer = null; this.n = 0; } @@ -168,13 +170,15 @@ class Stream { */ acceptWaveform(sampleRate, samples) { if (this.n < samples.length) { - _free(this.pointer); - this.pointer = _malloc(samples.length * samples.BYTES_PER_ELEMENT); + this.Module._free(this.pointer); + this.pointer = + this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT); this.n = samples.length } - Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT); - _AcceptWaveform(this.handle, sampleRate, this.pointer, samples.length); + this.Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT); + this.Module._AcceptWaveform( + this.handle, sampleRate, this.pointer, samples.length); } inputFinished() { @@ -183,59 +187,56 @@ class Stream { }; class Recognizer { - constructor(configObj, borrowedHandle) { - if (borrowedHandle) { - this.handle = borrowedHandle; - return; - } - - let config = initSherpaNcnnRecognizerConfig(configObj) - let handle = _CreateRecognizer(config.ptr); + constructor(configObj, Module) { + this.config = configObj; + let config = initSherpaNcnnRecognizerConfig(configObj, Module) + let handle = Module._CreateRecognizer(config.ptr); - freeConfig(config.featConfig); - freeConfig(config.modelConfig); - freeConfig(config.decoderConfig); - freeConfig(config); + freeConfig(config.featConfig, Module); + freeConfig(config.modelConfig, Module); + freeConfig(config.decoderConfig, Module); + freeConfig(config, Module); this.handle = handle; + this.Module = Module; } free() { - _DestroyRecognizer(this.handle); + this.Module._DestroyRecognizer(this.handle); this.handle = 0 } createStream() { - let handle = _CreateStream(this.handle); - return new Stream(handle); + let handle = this.Module._CreateStream(this.handle); + return new Stream(handle, this.Module); } isReady(stream) { - return _IsReady(this.handle, stream.handle) == 1; + return this.Module._IsReady(this.handle, stream.handle) == 1; } isEndpoint(stream) { - return _IsEndpoint(this.handle, stream.handle) == 1; + return this.Module._IsEndpoint(this.handle, stream.handle) == 1; } decode(stream) { - return _Decode(this.handle, stream.handle); + return this.Module._Decode(this.handle, stream.handle); } reset(stream) { - _Reset(this.handle, stream.handle); + this.Module._Reset(this.handle, stream.handle); } getResult(stream) { - let r = _GetResult(this.handle, stream.handle); - let textPtr = getValue(r, 'i8*'); - let text = UTF8ToString(textPtr); - _DestroyResult(r); + let r = this.Module._GetResult(this.handle, stream.handle); + let textPtr = this.Module.getValue(r, 'i8*'); + let text = this.Module.UTF8ToString(textPtr); + this.Module._DestroyResult(r); return text; } } -function createRecognizer() { +function createRecognizer(Module) { let modelConfig = { encoderParam: './encoder_jit_trace-pnnx.ncnn.param', encoderBin: './encoder_jit_trace-pnnx.ncnn.bin', @@ -268,5 +269,10 @@ function createRecognizer() { rule3MinUtternceLength: 20, }; - return new Recognizer(configObj); + return new Recognizer(configObj, Module); } + +module.exports = { + createRecognizer, + +}; From 3805669463a81cd3eff7bc7bf9c4dfe403c8037f Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 24 Feb 2024 11:30:52 +0800 Subject: [PATCH 02/12] Publish npm package --- .github/workflows/npm.yaml | 71 +++++++++++++++++++++++++++++++++++++ scripts/nodejs/README.md | 9 +++++ scripts/nodejs/index.js | 14 ++++++++ scripts/nodejs/package.json | 36 +++++++++++++++++++ wasm/sherpa-ncnn.js | 16 ++++++--- 5 files changed, 141 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/npm.yaml create mode 100644 scripts/nodejs/README.md create mode 100644 scripts/nodejs/index.js create mode 100644 scripts/nodejs/package.json diff --git a/.github/workflows/npm.yaml b/.github/workflows/npm.yaml new file mode 100644 index 00000000..e8c7fa81 --- /dev/null +++ b/.github/workflows/npm.yaml @@ -0,0 +1,71 @@ +name: npm + +on: + push: + branches: + - wasm-nodejs-2 + workflow_dispatch: + +concurrency: + group: npm-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + nodejs: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + python-version: ["3.8"] + + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - uses: actions/setup-node@v4 + with: + registry-url: 'https://registry.npmjs.org' + + - name: Display node version + shell: bash + run: | + node --version + npm --version + + cd nodejs-examples + npm install npm + npm --version + + - name: Build nodejs package + shell: bash + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + run: | + ./build-wasm-simd-for-nodejs.sh + + cp -v build-wasm-simd-for-nodejs/install/bin/wasm/sherpa-ncnn-wasm-main.js ./scripts/nodejs + cp -v build-wasm-simd-for-nodejs/install/bin/wasm/sherpa-ncnn-wasm-main.wasm ./scripts/nodejs + cp -v build-wasm-simd-for-nodejs/install/bin/wasm/sherpa-ncnn.js ./scripts/nodejs + + SHERPA_NCNN_VERSION=$(grep "SHERPA_NCNN_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + echo "SHERPA_NCNN_VERSION $SHERPA_NCNN_VERSION" + + cd scripts/nodejs + + sed -i.bak s/SHERPA_NCNN_VERSION/$SHERPA_NCNN_VERSION/g ./package.json + + git diff + + npm install + npm ci + npm publish --provenance --access public diff --git a/scripts/nodejs/README.md b/scripts/nodejs/README.md new file mode 100644 index 00000000..e1cca5e5 --- /dev/null +++ b/scripts/nodejs/README.md @@ -0,0 +1,9 @@ +# Introduction + +Real-time speech-to-text with [Next-gen Kaldi](https://github.com/k2-fsa/). + +It processes everything locally without accessing the Internet. + +Please refer to +https://github.com/k2-fsa/sherpa-ncnn/tree/master/nodejs-examples +for examples. diff --git a/scripts/nodejs/index.js b/scripts/nodejs/index.js new file mode 100644 index 00000000..66a1eec7 --- /dev/null +++ b/scripts/nodejs/index.js @@ -0,0 +1,14 @@ +// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang) +// +'use strict' + +const wasmModule = require('./sherpa-ncnn-wasm-main.js')(); +const sherpa_ncnn = require('./sherpa-ncnn.js'); + +function createRecognizer(config) { + sherpa_ncnn.createRecognizer(wasmModule, config); +} + +module.exports = { + createRecognizer, +}; diff --git a/scripts/nodejs/package.json b/scripts/nodejs/package.json new file mode 100644 index 00000000..e1e59520 --- /dev/null +++ b/scripts/nodejs/package.json @@ -0,0 +1,36 @@ +{ + "name": "sherpa-ncnn", + "version": "SHERPA_NCNN_VERSION", + "description": "Real-time speech recognition with Next-gen Kaldi", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "repository": { + "type": "git", + "url": "git+https://github.com/k2-fsa/sherpa-ncnn.git" + }, + "keywords": [ + "speech-to-text", + "real-time speech recognition", + "without internet connection", + "embedded systems", + "open source", + "zipformer", + "asr", + "speech", + "WebAssembly", + "wasm", + "local", + "privacy", + "ncnn" + ], + "author": "The next-gen Kaldi team", + "license": "Apache-2.0", + "bugs": { + "url": "https://github.com/k2-fsa/sherpa-ncnn/issues" + }, + "homepage": "https://github.com/k2-fsa/sherpa-ncnn#readme", + "dependencies": { + } +} diff --git a/wasm/sherpa-ncnn.js b/wasm/sherpa-ncnn.js index 93b349e1..2619b151 100644 --- a/wasm/sherpa-ncnn.js +++ b/wasm/sherpa-ncnn.js @@ -236,7 +236,7 @@ class Recognizer { } } -function createRecognizer(Module) { +function createRecognizer(Module, myConfig) { let modelConfig = { encoderParam: './encoder_jit_trace-pnnx.ncnn.param', encoderBin: './encoder_jit_trace-pnnx.ncnn.bin', @@ -269,10 +269,16 @@ function createRecognizer(Module) { rule3MinUtternceLength: 20, }; + if (myConfig) { + configObj = myConfig; + } + return new Recognizer(configObj, Module); } -module.exports = { - createRecognizer, - -}; +if (typeof process == 'object' && typeof process.versions == 'object' && + typeof process.versions.node == 'string') { + module.exports = { + createRecognizer, + }; +} From 9a24aea9a8dc4ea16c23a8991ac605ecb893a526 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 24 Feb 2024 11:32:26 +0800 Subject: [PATCH 03/12] insall emscripten in CI --- .github/workflows/npm.yaml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/npm.yaml b/.github/workflows/npm.yaml index e8c7fa81..eb14559c 100644 --- a/.github/workflows/npm.yaml +++ b/.github/workflows/npm.yaml @@ -23,10 +23,20 @@ jobs: python-version: ["3.8"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: fetch-depth: 0 + - name: Install emsdk + uses: mymindstorm/setup-emsdk@v14 + + - name: View emsdk version + shell: bash + run: | + emcc -v + echo "--------------------" + emcc --check + - name: Setup Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: From c6ebf209fcbe621381a307dd411af801a102fa5c Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 24 Feb 2024 11:34:34 +0800 Subject: [PATCH 04/12] disable file check for nodejs --- .github/workflows/npm.yaml | 9 ++++++++- scripts/nodejs/package.json | 2 +- wasm/CMakeLists.txt | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/.github/workflows/npm.yaml b/.github/workflows/npm.yaml index eb14559c..ee0d4474 100644 --- a/.github/workflows/npm.yaml +++ b/.github/workflows/npm.yaml @@ -12,6 +12,7 @@ concurrency: permissions: contents: read + id-token: write jobs: nodejs: @@ -69,13 +70,19 @@ jobs: SHERPA_NCNN_VERSION=$(grep "SHERPA_NCNN_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_NCNN_VERSION $SHERPA_NCNN_VERSION" + SHERPA_NCNN_VERSION="2.1.0" cd scripts/nodejs + owner=${{ github.repository_owner }} + echo "owner: $owner" + sed -i.bak s/SHERPA_NCNN_VERSION/$SHERPA_NCNN_VERSION/g ./package.json + sed -i.bak s/k2-fsa/$owner/g ./package.json - git diff + git diff . npm install npm ci + # see https://docs.npmjs.com/generating-provenance-statements npm publish --provenance --access public diff --git a/scripts/nodejs/package.json b/scripts/nodejs/package.json index e1e59520..274fdee0 100644 --- a/scripts/nodejs/package.json +++ b/scripts/nodejs/package.json @@ -1,5 +1,5 @@ { - "name": "sherpa-ncnn", + "name": "sherpa-ncnn0", "version": "SHERPA_NCNN_VERSION", "description": "Real-time speech recognition with Next-gen Kaldi", "main": "index.js", diff --git a/wasm/CMakeLists.txt b/wasm/CMakeLists.txt index 578b5a20..09a4e0ee 100644 --- a/wasm/CMakeLists.txt +++ b/wasm/CMakeLists.txt @@ -2,7 +2,7 @@ if(NOT $ENV{SHERPA_NCNN_IS_USING_BUILD_WASM_SH}) message(FATAL_ERROR "Please use ./build-wasm.sh to build for wasm") endif() -if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/decoder_jit_trace-pnnx.ncnn.bin") +if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/decoder_jit_trace-pnnx.ncnn.bin" AND NOT SHERPA_NCNN_ENABLE_WASM_FOR_NODEJS) message(WARNING "${CMAKE_CURRENT_SOURCE_DIR}/assets/decoder_jit_trace-pnnx.ncnn.bin does not exist") message(FATAL_ERROR "Please read ${CMAKE_CURRENT_SOURCE_DIR}/assets/README.md before you continue") endif() From 7c313cc28875f3284f3b3c37168bda64fdb0a1ba Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 24 Feb 2024 12:19:29 +0800 Subject: [PATCH 05/12] Add examples for nodejs with WebAssembly --- .github/workflows/npm.yaml | 2 + .gitignore | 1 + nodejs-wasm-examples/README.md | 41 +++++++ nodejs-wasm-examples/decode-file.js | 58 ++++++++-- nodejs-wasm-examples/package.json | 7 ++ ...real-time-speech-recognition-microphone.js | 100 ++++++++++++++++++ scripts/nodejs/index.js | 2 +- 7 files changed, 203 insertions(+), 8 deletions(-) create mode 100644 nodejs-wasm-examples/README.md create mode 100644 nodejs-wasm-examples/package.json create mode 100644 nodejs-wasm-examples/real-time-speech-recognition-microphone.js diff --git a/.github/workflows/npm.yaml b/.github/workflows/npm.yaml index ee0d4474..c6d6ad75 100644 --- a/.github/workflows/npm.yaml +++ b/.github/workflows/npm.yaml @@ -80,6 +80,8 @@ jobs: sed -i.bak s/SHERPA_NCNN_VERSION/$SHERPA_NCNN_VERSION/g ./package.json sed -i.bak s/k2-fsa/$owner/g ./package.json + rm package.json.bak + git diff . npm install diff --git a/.gitignore b/.gitignore index d274a211..98a8cd56 100644 --- a/.gitignore +++ b/.gitignore @@ -116,3 +116,4 @@ generate-int8-*.sh cmake-build-release cmake-build-debug +node_modules diff --git a/nodejs-wasm-examples/README.md b/nodejs-wasm-examples/README.md new file mode 100644 index 00000000..a2d3f401 --- /dev/null +++ b/nodejs-wasm-examples/README.md @@ -0,0 +1,41 @@ +# Introduction + +This folder contains examples about how to use the sherpa-ncnn WebAssembly module +with nodejs for speech recognition. + +- [decode-file.js](./decode-file.js) it shows how to decode a file + +## Usage + +### Install dependencies + +```bash +cd ./nodejs-wasm-examples +npm i +``` + +### Download a model + +Please visit to +select more models. + +The following is an example: + +```bash +cd ./nodejs-wasm-examples +wget https://github.com/k2-fsa/sherpa-ncnn/releases/download/models/sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13.tar.bz2 +tar xvf sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13.tar.bz2 +rm sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13.tar.bz2 +``` + +### Decode a file + +```bash +node ./decode-file.js +``` + +### Real-time speech recognition from a microphone + +```bash +node ./real-time-speech-recognition-microphone.js +``` diff --git a/nodejs-wasm-examples/decode-file.js b/nodejs-wasm-examples/decode-file.js index 0cff2337..aa6b87c0 100644 --- a/nodejs-wasm-examples/decode-file.js +++ b/nodejs-wasm-examples/decode-file.js @@ -1,16 +1,60 @@ +// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang) const fs = require('fs'); const wav = require('wav'); const {Readable} = require('stream'); -let Module = require('./sherpa-ncnn-wasm-main.js')() -let b = require('./sherpa-ncnn.js'); - -let recognizer = b.createRecognizer(Module); -let stream = recognizer.createStream(); +const sherpa_ncnn = require('sherpa-ncnn0'); + +function createRecognizer() { + let modelConfig = { + encoderParam: + './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/encoder_jit_trace-pnnx.ncnn.param', + encoderBin: + './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/encoder_jit_trace-pnnx.ncnn.bin', + decoderParam: + './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/decoder_jit_trace-pnnx.ncnn.param', + decoderBin: + './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/decoder_jit_trace-pnnx.ncnn.bin', + joinerParam: + './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/joiner_jit_trace-pnnx.ncnn.param', + joinerBin: + './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/joiner_jit_trace-pnnx.ncnn.bin', + tokens: + './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/tokens.txt', + useVulkanCompute: 0, + numThreads: 1, + }; + + let decoderConfig = { + decodingMethod: 'greedy_search', + numActivePaths: 4, + }; + + let featConfig = { + samplingRate: 16000, + featureDim: 80, + }; + + let config = { + featConfig: featConfig, + modelConfig: modelConfig, + decoderConfig: decoderConfig, + enableEndpoint: 1, + rule1MinTrailingSilence: 1.2, + rule2MinTrailingSilence: 2.4, + rule3MinUtternceLength: 20, + }; + + return sherpa_ncnn.createRecognizer(config); +} + +const recognizer = createRecognizer(); +const stream = recognizer.createStream(); console.log(recognizer.config); -const waveFilename = './0.wav'; +const waveFilename = + './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/test_wavs/0.wav'; const reader = new wav.Reader(); const readable = new Readable().wrap(reader); @@ -52,7 +96,7 @@ fs.createReadStream(waveFilename, {'highWaterMark': 4096}) recognizer.decode(stream); } const r = recognizer.getResult(stream); - console.log('result', r); + console.log('result:', r); stream.free(); recognizer.free(); diff --git a/nodejs-wasm-examples/package.json b/nodejs-wasm-examples/package.json new file mode 100644 index 00000000..1fe5486e --- /dev/null +++ b/nodejs-wasm-examples/package.json @@ -0,0 +1,7 @@ +{ + "dependencies": { + "naudiodon2": "^2.4.0", + "sherpa-ncnn0": "*", + "wav": "^1.0.2" + } +} diff --git a/nodejs-wasm-examples/real-time-speech-recognition-microphone.js b/nodejs-wasm-examples/real-time-speech-recognition-microphone.js new file mode 100644 index 00000000..e81f9d8d --- /dev/null +++ b/nodejs-wasm-examples/real-time-speech-recognition-microphone.js @@ -0,0 +1,100 @@ +// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang) +// +const portAudio = require('naudiodon2'); +// console.log(portAudio.getDevices()); + +const sherpa_ncnn = require('sherpa-ncnn0'); + +function createRecognizer() { + let modelConfig = { + encoderParam: + './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/encoder_jit_trace-pnnx.ncnn.param', + encoderBin: + './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/encoder_jit_trace-pnnx.ncnn.bin', + decoderParam: + './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/decoder_jit_trace-pnnx.ncnn.param', + decoderBin: + './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/decoder_jit_trace-pnnx.ncnn.bin', + joinerParam: + './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/joiner_jit_trace-pnnx.ncnn.param', + joinerBin: + './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/joiner_jit_trace-pnnx.ncnn.bin', + tokens: + './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/tokens.txt', + useVulkanCompute: 0, + numThreads: 1, + }; + + let decoderConfig = { + decodingMethod: 'greedy_search', + numActivePaths: 4, + }; + + let featConfig = { + samplingRate: 16000, + featureDim: 80, + }; + + let config = { + featConfig: featConfig, + modelConfig: modelConfig, + decoderConfig: decoderConfig, + enableEndpoint: 1, + rule1MinTrailingSilence: 1.2, + rule2MinTrailingSilence: 2.4, + rule3MinUtternceLength: 20, + }; + + return sherpa_ncnn.createRecognizer(config); +} + +const recognizer = createRecognizer(); +const stream = recognizer.createStream(); + +let lastText = ''; +let segmentIndex = 0; + +const ai = new portAudio.AudioIO({ + inOptions: { + channelCount: 1, + closeOnError: true, // Close the stream if an audio error is detected, if + // set false then just log the error + deviceId: -1, // Use -1 or omit the deviceId to select the default device + sampleFormat: portAudio.SampleFormatFloat32, + sampleRate: recognizer.config.featConfig.samplingRate + } +}); + +ai.on('data', data => { + const samples = new Float32Array(data.buffer); + + stream.acceptWaveform(recognizer.config.featConfig.samplingRate, samples); + + while (recognizer.isReady(stream)) { + recognizer.decode(stream); + } + + const isEndpoint = recognizer.isEndpoint(stream); + const text = recognizer.getResult(stream); + + if (text.length > 0 && lastText != text) { + lastText = text; + console.log(segmentIndex, lastText); + } + if (isEndpoint) { + if (text.length > 0) { + lastText = text; + segmentIndex += 1; + } + recognizer.reset(stream) + } +}); + +ai.on('close', () => { + console.log('Free resources'); + stream.free(); + recognizer.free(); +}); + +ai.start(); +console.log('Started! Please speak') diff --git a/scripts/nodejs/index.js b/scripts/nodejs/index.js index 66a1eec7..b11c4cd7 100644 --- a/scripts/nodejs/index.js +++ b/scripts/nodejs/index.js @@ -6,7 +6,7 @@ const wasmModule = require('./sherpa-ncnn-wasm-main.js')(); const sherpa_ncnn = require('./sherpa-ncnn.js'); function createRecognizer(config) { - sherpa_ncnn.createRecognizer(wasmModule, config); + return sherpa_ncnn.createRecognizer(wasmModule, config); } module.exports = { From ac9c180a54e6c8e3d16d557460f286344ee61501 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 24 Feb 2024 12:19:59 +0800 Subject: [PATCH 06/12] Remove nodejs examples --- nodejs-examples/.gitignore | 4 - nodejs-examples/index.js | 181 ----------------------------------- nodejs-examples/package.json | 29 ------ nodejs-examples/run.sh | 34 ------- nodejs-examples/test.js | 98 ------------------- 5 files changed, 346 deletions(-) delete mode 100644 nodejs-examples/.gitignore delete mode 100644 nodejs-examples/index.js delete mode 100644 nodejs-examples/package.json delete mode 100755 nodejs-examples/run.sh delete mode 100644 nodejs-examples/test.js diff --git a/nodejs-examples/.gitignore b/nodejs-examples/.gitignore deleted file mode 100644 index 5bb28f68..00000000 --- a/nodejs-examples/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -install -node_modules -package.json -package-lock.json diff --git a/nodejs-examples/index.js b/nodejs-examples/index.js deleted file mode 100644 index f33bd676..00000000 --- a/nodejs-examples/index.js +++ /dev/null @@ -1,181 +0,0 @@ -// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) -// -// Please use -// -// npm install ffi-napi ref-struct-napi -// -// before you use this file -// -// -// Please use node 13. node 16, 18, 20, and 21 are known not working. -// See also -// https://github.com/node-ffi-napi/node-ffi-napi/issues/244 -// and -// https://github.com/node-ffi-napi/node-ffi-napi/issues/97 -'use strict' - -const debug = require('debug')('sherpa-ncnn'); -const os = require('os'); -const path = require('path'); -const ffi = require('ffi-napi'); -const ref = require('ref-napi'); -const fs = require('fs'); - -const StructType = require('ref-struct-napi'); -const cstring = ref.types.CString; -const int32_t = ref.types.int32; -const float = ref.types.float; -const floatPtr = ref.refType(float); - -const RecognizerPtr = ref.refType(ref.types.void); -const StreamPtr = ref.refType(ref.types.void); -const SherpaNcnnModelConfig = StructType({ - 'encoderParam': cstring, - 'encoderBin': cstring, - 'decoderParam': cstring, - 'decoderBin': cstring, - 'joinerParam': cstring, - 'joinerBin': cstring, - 'tokens': cstring, - 'useVulkanCompute': int32_t, - 'numThreads': int32_t, -}); - -const SherpaNcnnDecoderConfig = StructType({ - 'decodingMethod': cstring, - 'numActivePaths': int32_t, -}); - -const SherpaNcnnFeatureExtractorConfig = StructType({ - 'sampleRate': float, - 'featureDim': int32_t, -}); - -const SherpaNcnnRecognizerConfig = StructType({ - 'featConfig': SherpaNcnnFeatureExtractorConfig, - 'modelConfig': SherpaNcnnModelConfig, - 'decoderConfig': SherpaNcnnDecoderConfig, - 'enableEndpoint': int32_t, - 'rule1MinTrailingSilence': float, - 'rule2MinTrailingSilence': float, - 'rule3MinUtteranceLength': float, - 'hotwordsFile': cstring, - 'hotwordsScore': cstring, -}); - -const SherpaNcnnResult = StructType({ - 'text': cstring, - 'tokens': cstring, - 'timestamps': floatPtr, - 'count': int32_t, -}); - - -const ResultPtr = ref.refType(SherpaNcnnResult); -const RecognizerConfigPtr = ref.refType(SherpaNcnnRecognizerConfig) - -let soname; -if (os.platform() == 'win32') { - soname = path.join(__dirname, 'install', 'lib', 'sherpa-ncnn-c-api.dll'); -} else if (os.platform() == 'darwin') { - soname = path.join(__dirname, 'install', 'lib', 'libsherpa-ncnn-c-api.dylib'); -} else if (os.platform() == 'linux') { - soname = path.join(__dirname, 'install', 'lib', 'libsherpa-ncnn-c-api.so'); -} else { - throw new Error(`Unsupported platform ${os.platform()}`); -} -if (!fs.existsSync(soname)) { - throw new Error(`Cannot find file ${soname}. Please make sure you have run - ./build.sh`); -} - -debug('soname ', soname) - -const libsherpa_ncnn = ffi.Library(soname, { - 'CreateRecognizer': [RecognizerPtr, [RecognizerConfigPtr]], - 'DestroyRecognizer': ['void', [RecognizerPtr]], - 'CreateStream': [StreamPtr, [RecognizerPtr]], - 'DestroyStream': ['void', [StreamPtr]], - 'AcceptWaveform': ['void', [StreamPtr, float, floatPtr, int32_t]], - 'IsReady': [int32_t, [RecognizerPtr, StreamPtr]], - 'Decode': ['void', [RecognizerPtr, StreamPtr]], - 'GetResult': [ResultPtr, [RecognizerPtr, StreamPtr]], - 'DestroyResult': ['void', [ResultPtr]], - 'Reset': ['void', [RecognizerPtr, StreamPtr]], - 'InputFinished': ['void', [StreamPtr]], - 'IsEndpoint': [int32_t, [RecognizerPtr, StreamPtr]], -}); - -class Recognizer { - /** - * @param {SherpaNcnnRecognizerConfig} config Configuration for the recognizer - * - * The user has to invoke this.free() at the end to avoid memory leak. - */ - constructor(config) { - this.recognizer_handle = libsherpa_ncnn.CreateRecognizer(config.ref()); - this.stream_handle = libsherpa_ncnn.CreateStream(this.recognizer_handle); - } - - free() { - if (this.stream_handle) { - libsherpa_ncnn.DestroyStream(this.stream_handle); - this.stream_handle = null; - } - - libsherpa_ncnn.DestroyRecognizer(this.recognizer_handle); - this.handle = null; - } - - /** - * @param {bool} true to create a new stream - */ - reset(recreate) { - if (recreate) { - libsherpa_ncnn.DestroyStream(this.stream_handle); - this.stream_handle = libsherpa_ncnn.CreateStream(this.recognizer_handle); - return; - } - libsherpa_ncnn.Reset(this.recognizer_handle, this.stream_handle) - } - /** - * @param {float} Sample rate of the input data - * @param {float[]} A 1-d float array containing audio samples. It should be - * in the range [-1, 1]. - */ - acceptWaveform(sampleRate, samples) { - libsherpa_ncnn.AcceptWaveform( - this.stream_handle, sampleRate, samples, samples.length); - } - - isReady() { - return libsherpa_ncnn.IsReady(this.recognizer_handle, this.stream_handle); - } - - decode() { - libsherpa_ncnn.Decode(this.recognizer_handle, this.stream_handle); - } - - getResult() { - const h = - libsherpa_ncnn.GetResult(this.recognizer_handle, this.stream_handle); - const text = Buffer.from(h.deref().text, 'utf-8').toString(); - libsherpa_ncnn.DestroyResult(h); - return text; - } -}; - -// alias - -const ModelConfig = SherpaNcnnModelConfig; -const DecoderConfig = SherpaNcnnDecoderConfig; -const FeatureConfig = SherpaNcnnFeatureExtractorConfig; -const RecognizerConfig = SherpaNcnnRecognizerConfig; - -module.exports = { - FeatureConfig, - ModelConfig, - DecoderConfig, - Recognizer, - RecognizerConfig, -}; diff --git a/nodejs-examples/package.json b/nodejs-examples/package.json deleted file mode 100644 index 7583d6b6..00000000 --- a/nodejs-examples/package.json +++ /dev/null @@ -1,29 +0,0 @@ -{ - "name": "sherpa-ncnn", - "version": "2.1.4", - "description": "real-time speech recognition with Next-gen Kaldi", - "main": "index.js", - "dependencies": { - "ffi-napi": "^4.0.3", - "ref-struct-napi": "^1.1.1", - "wav": "^1.0.2" - }, - "devDependencies": {}, - "scripts": { - "test": "echo \"Error: no test specified\" && exit 1" - }, - "repository": { - "type": "git", - "url": "git+ssh://git@github.com/k2-fsa/sherpa-ncnn.git" - }, - "keywords": [ - "speech-to-text;", - "ASR" - ], - "author": "The Next-gen Kaldi team", - "license": "Apache-2.0", - "bugs": { - "url": "https://github.com/k2-fsa/sherpa-ncnn/issues" - }, - "homepage": "https://github.com/k2-fsa/sherpa-ncnn#readme" -} diff --git a/nodejs-examples/run.sh b/nodejs-examples/run.sh deleted file mode 100755 index 76d57873..00000000 --- a/nodejs-examples/run.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env bash -# Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) - -npm list | grep ffi-napi >/dev/null || npm install ffi-napi -npm list | grep ref-struct-napi >/dev/null || npm install ref-struct-napi -npm list | grep wav >/dev/null || npm install wav - -if [ ! -e ./install ]; then - cd .. - mkdir -p build - cd build - cmake -DBUILD_SHARED_LIBS=ON \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_INSTALL_PREFIX=./install \ - -DSHERPA_NCNN_ENABLE_PORTAUDIO=OFF \ - -DSHERPA_NCNN_ENABLE_BINARY=OFF \ - -DSHERPA_NCNN_ENABLE_C_API=ON \ - -DSHERPA_NCNN_ENABLE_GENERATE_INT8_SCALE_TABLE=OFF \ - -DSHERPA_NCNN_ENABLE_PYTHON=OFF \ - .. - make -j3 - make install - cd ../nodejs-examples - ln -s $PWD/../build/install . -fi - -if [ ! -d ./sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13 ]; then - echo "Please refer to" - echo "https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/zipformer-transucer-models.html#csukuangfj-sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13-bilingual-chinese-english" - echo "to download the models" - exit 0 -fi - -node ./test.js diff --git a/nodejs-examples/test.js b/nodejs-examples/test.js deleted file mode 100644 index 119dff7d..00000000 --- a/nodejs-examples/test.js +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) -// -const fs = require('fs'); -const {Readable} = require('stream'); -const wav = require('wav'); - -sherpa_ncnn = require('./index.js') - -const featConfig = new sherpa_ncnn.FeatureConfig(); -featConfig.sampleRate = 16000; -featConfig.featureDim = 80; - -const decoderConfig = new sherpa_ncnn.DecoderConfig(); -decoderConfig.decodingMethod = 'greedy_search'; -decoderConfig.numActivePaths = 4; - -const modelConfig = new sherpa_ncnn.ModelConfig(); -modelConfig.encoderParam = - './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/encoder_jit_trace-pnnx.ncnn.param'; -modelConfig.encoderBin = - './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/encoder_jit_trace-pnnx.ncnn.bin'; - -modelConfig.decoderParam = - './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/decoder_jit_trace-pnnx.ncnn.param'; -modelConfig.decoderBin = - './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/decoder_jit_trace-pnnx.ncnn.bin'; - -modelConfig.joinerParam = - './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/joiner_jit_trace-pnnx.ncnn.param'; -modelConfig.joinerBin = - './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/joiner_jit_trace-pnnx.ncnn.bin'; - -modelConfig.tokens = - './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/tokens.txt'; -modelConfig.useVulkanCompute = 0; -modelConfig.numThreads = 1; - -const recognizerConfig = new sherpa_ncnn.RecognizerConfig(); -recognizerConfig.featConfig = featConfig; -recognizerConfig.modelConfig = modelConfig; -recognizerConfig.decoderConfig = decoderConfig; - -const recognizer = new sherpa_ncnn.Recognizer(recognizerConfig); - -const waveFilename = - './sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13/test_wavs/2.wav' - -const reader = new wav.Reader(); -const readable = new Readable().wrap(reader); - -function decode(samples) { - recognizer.acceptWaveform(recognizerConfig.featConfig.sampleRate, samples); - - while (recognizer.isReady()) { - recognizer.decode(); - } - const text = recognizer.getResult(); - console.log(text); -} - -reader.on('format', ({audioFormat, sampleRate, channels, bitDepth}) => { - if (audioFormat != 1) { - throw new Error(`Only support PCM format. Given ${audioFormat}`); - } - if (channels != 1) { - throw new Error(`Only a single channel. Given ${channel}`); - } - - if (bitDepth != 16) { - throw new Error(`Only support 16-bit samples. Given ${bitDepth}`); - } -}); - -fs.createReadStream(waveFilename, {'highWaterMark': 4096}) - .pipe(reader) - .on('finish', function(err) { - // tail padding - const floatSamples = - new Float32Array(recognizerConfig.featConfig.sampleRate * 0.5); - decode(floatSamples); - recognizer.free() - }); - - -readable.on('readable', function() { - let chunk; - while ((chunk = readable.read()) != null) { - const int16Samples = new Int16Array( - chunk.buffer, chunk.byteOffset, - chunk.length / Int16Array.BYTES_PER_ELEMENT); - - let floatSamples = new Float32Array(int16Samples.length); - for (let i = 0; i < floatSamples.length; i++) { - floatSamples[i] = int16Samples[i] / 32768.0; - } - decode(floatSamples); - } -}); From 2dcd94545f4ce0336f0783f58e3077609b3369c5 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 24 Feb 2024 12:20:23 +0800 Subject: [PATCH 07/12] Rename --- {nodejs-wasm-examples => nodejs-examples}/README.md | 0 {nodejs-wasm-examples => nodejs-examples}/decode-file.js | 0 {nodejs-wasm-examples => nodejs-examples}/package.json | 0 .../real-time-speech-recognition-microphone.js | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename {nodejs-wasm-examples => nodejs-examples}/README.md (100%) rename {nodejs-wasm-examples => nodejs-examples}/decode-file.js (100%) rename {nodejs-wasm-examples => nodejs-examples}/package.json (100%) rename {nodejs-wasm-examples => nodejs-examples}/real-time-speech-recognition-microphone.js (100%) diff --git a/nodejs-wasm-examples/README.md b/nodejs-examples/README.md similarity index 100% rename from nodejs-wasm-examples/README.md rename to nodejs-examples/README.md diff --git a/nodejs-wasm-examples/decode-file.js b/nodejs-examples/decode-file.js similarity index 100% rename from nodejs-wasm-examples/decode-file.js rename to nodejs-examples/decode-file.js diff --git a/nodejs-wasm-examples/package.json b/nodejs-examples/package.json similarity index 100% rename from nodejs-wasm-examples/package.json rename to nodejs-examples/package.json diff --git a/nodejs-wasm-examples/real-time-speech-recognition-microphone.js b/nodejs-examples/real-time-speech-recognition-microphone.js similarity index 100% rename from nodejs-wasm-examples/real-time-speech-recognition-microphone.js rename to nodejs-examples/real-time-speech-recognition-microphone.js From 0b54fd40231a38ff59e5322db63e66cdb28dc6f4 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 24 Feb 2024 12:21:16 +0800 Subject: [PATCH 08/12] Release sherpa-ncnn --- .github/workflows/npm.yaml | 2 +- scripts/nodejs/package.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/npm.yaml b/.github/workflows/npm.yaml index c6d6ad75..8e99b35a 100644 --- a/.github/workflows/npm.yaml +++ b/.github/workflows/npm.yaml @@ -70,7 +70,7 @@ jobs: SHERPA_NCNN_VERSION=$(grep "SHERPA_NCNN_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_NCNN_VERSION $SHERPA_NCNN_VERSION" - SHERPA_NCNN_VERSION="2.1.0" + SHERPA_NCNN_VERSION="2.1.5" cd scripts/nodejs diff --git a/scripts/nodejs/package.json b/scripts/nodejs/package.json index 274fdee0..e1e59520 100644 --- a/scripts/nodejs/package.json +++ b/scripts/nodejs/package.json @@ -1,5 +1,5 @@ { - "name": "sherpa-ncnn0", + "name": "sherpa-ncnn", "version": "SHERPA_NCNN_VERSION", "description": "Real-time speech recognition with Next-gen Kaldi", "main": "index.js", From d5870699a01448c38b422689ae2936c5df5fb814 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 24 Feb 2024 12:28:17 +0800 Subject: [PATCH 09/12] minor fixes --- .github/workflows/npm.yaml | 4 ---- nodejs-examples/.gitignore | 1 + nodejs-examples/decode-file.js | 2 +- nodejs-examples/package.json | 2 +- nodejs-examples/real-time-speech-recognition-microphone.js | 2 +- 5 files changed, 4 insertions(+), 7 deletions(-) create mode 100644 nodejs-examples/.gitignore diff --git a/.github/workflows/npm.yaml b/.github/workflows/npm.yaml index 8e99b35a..81066ab7 100644 --- a/.github/workflows/npm.yaml +++ b/.github/workflows/npm.yaml @@ -1,9 +1,6 @@ name: npm on: - push: - branches: - - wasm-nodejs-2 workflow_dispatch: concurrency: @@ -70,7 +67,6 @@ jobs: SHERPA_NCNN_VERSION=$(grep "SHERPA_NCNN_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_NCNN_VERSION $SHERPA_NCNN_VERSION" - SHERPA_NCNN_VERSION="2.1.5" cd scripts/nodejs diff --git a/nodejs-examples/.gitignore b/nodejs-examples/.gitignore new file mode 100644 index 00000000..d8b83df9 --- /dev/null +++ b/nodejs-examples/.gitignore @@ -0,0 +1 @@ +package-lock.json diff --git a/nodejs-examples/decode-file.js b/nodejs-examples/decode-file.js index aa6b87c0..f24a60e7 100644 --- a/nodejs-examples/decode-file.js +++ b/nodejs-examples/decode-file.js @@ -3,7 +3,7 @@ const fs = require('fs'); const wav = require('wav'); const {Readable} = require('stream'); -const sherpa_ncnn = require('sherpa-ncnn0'); +const sherpa_ncnn = require('sherpa-ncnn'); function createRecognizer() { let modelConfig = { diff --git a/nodejs-examples/package.json b/nodejs-examples/package.json index 1fe5486e..b5c373ee 100644 --- a/nodejs-examples/package.json +++ b/nodejs-examples/package.json @@ -1,7 +1,7 @@ { "dependencies": { "naudiodon2": "^2.4.0", - "sherpa-ncnn0": "*", + "sherpa-ncnn": "*", "wav": "^1.0.2" } } diff --git a/nodejs-examples/real-time-speech-recognition-microphone.js b/nodejs-examples/real-time-speech-recognition-microphone.js index e81f9d8d..9801b978 100644 --- a/nodejs-examples/real-time-speech-recognition-microphone.js +++ b/nodejs-examples/real-time-speech-recognition-microphone.js @@ -3,7 +3,7 @@ const portAudio = require('naudiodon2'); // console.log(portAudio.getDevices()); -const sherpa_ncnn = require('sherpa-ncnn0'); +const sherpa_ncnn = require('sherpa-ncnn'); function createRecognizer() { let modelConfig = { From 325621dce04a08a1a9fcd918f77b35224f32dd86 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 24 Feb 2024 12:33:12 +0800 Subject: [PATCH 10/12] Update CI for nodejs --- .github/workflows/nodejs.yaml | 69 ++++++++++++----------------------- 1 file changed, 23 insertions(+), 46 deletions(-) diff --git a/.github/workflows/nodejs.yaml b/.github/workflows/nodejs.yaml index 6dadf78e..84925e39 100644 --- a/.github/workflows/nodejs.yaml +++ b/.github/workflows/nodejs.yaml @@ -1,24 +1,16 @@ name: nodejs on: - push: - branches: - - master - paths: - - '.github/workflows/nodejs.yaml' - - 'CMakeLists.txt' - - 'cmake/**' - - 'nodejs-examples/**' - - 'sherpa-ncnn/csrc/*' - pull_request: - branches: - - master - paths: - - '.github/workflows/nodejs.yaml' - - 'CMakeLists.txt' - - 'cmake/**' - - 'nodejs-examples/**' - - 'sherpa-ncnn/csrc/*' + workflow_dispatch: + + schedule: + # minute (0-59) + # hour (0-23) + # day of the month (1-31) + # month (1-12) + # day of the week (0-6) + # nightly test at 22:50 UTC time every day + - cron: "50 22 * * *" concurrency: group: nodejs-${{ github.ref }} @@ -33,63 +25,48 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-latest] + os: [ubuntu-latest, macos-latest, windows-latest] python-version: ["3.8"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: fetch-depth: 0 - name: Setup Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - uses: actions/setup-node@v3 + - uses: actions/setup-node@v4 with: - node-version: 13 + registry-url: 'https://registry.npmjs.org' - name: Display node version shell: bash run: | node --version - npm --version - cd nodejs-examples - - npm install npm@6.14.4 -g - npm install npm@6.14.4 - npm --version - name: Install npm packages shell: bash run: | cd nodejs-examples - npm install ffi-napi ref-struct-napi wav + npm install npm + npm --version npm list - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2 - with: - key: ${{ matrix.os }}-shared - - name: Download model shell: bash run: | cd nodejs-examples - GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13 - cd sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13 - git lfs pull --include "*.bin" - ls -lh + + curl -OL https://github.com/k2-fsa/sherpa-ncnn/releases/download/models/sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13.tar.bz2 + tar xvf sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13.tar.bz2 + rm sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13.tar.bz2 + ls -lh sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13 - name: Test shell: bash run: | - export CMAKE_CXX_COMPILER_LAUNCHER=ccache - export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" - cmake --version - cd nodejs-examples - ls -lh - - ./run.sh + node ./decode-file.js From 99aaa2a34e445192535c6b2937c6360b3cd181ae Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 24 Feb 2024 12:43:37 +0800 Subject: [PATCH 11/12] Release v2.1.9 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2da51588..d21a183f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR) project(sherpa-ncnn) -set(SHERPA_NCNN_VERSION "2.1.8") +set(SHERPA_NCNN_VERSION "2.1.9") # Disable warning about # From 69a03d8a53b5bf3f3b17e96ec7637f43c55fa036 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 24 Feb 2024 12:45:54 +0800 Subject: [PATCH 12/12] Fix typos --- nodejs-examples/README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/nodejs-examples/README.md b/nodejs-examples/README.md index a2d3f401..f4cffd89 100644 --- a/nodejs-examples/README.md +++ b/nodejs-examples/README.md @@ -4,13 +4,15 @@ This folder contains examples about how to use the sherpa-ncnn WebAssembly modul with nodejs for speech recognition. - [decode-file.js](./decode-file.js) it shows how to decode a file +- [real-time-speech-recognition-microphone.js](./real-time-speech-recognition-microphone.js) it shows + how to do real-time speech recognition with a microphone ## Usage ### Install dependencies ```bash -cd ./nodejs-wasm-examples +cd ./nodejs-examples npm i ``` @@ -22,7 +24,7 @@ select more models. The following is an example: ```bash -cd ./nodejs-wasm-examples +cd ./nodejs-examples wget https://github.com/k2-fsa/sherpa-ncnn/releases/download/models/sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13.tar.bz2 tar xvf sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13.tar.bz2 rm sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13.tar.bz2 @@ -31,11 +33,15 @@ rm sherpa-ncnn-streaming-zipformer-bilingual-zh-en-2023-02-13.tar.bz2 ### Decode a file ```bash +cd ./nodejs-examples + node ./decode-file.js ``` ### Real-time speech recognition from a microphone ```bash +cd ./nodejs-examples + node ./real-time-speech-recognition-microphone.js ```