diff --git a/CMakeLists.txt b/CMakeLists.txt index bf4d4358..2da51588 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,6 +41,7 @@ option(SHERPA_NCNN_ENABLE_BINARY "Whether to build the binary sherpa-ncnn" ON) option(SHERPA_NCNN_ENABLE_TEST "Whether to build tests" OFF) option(SHERPA_NCNN_ENABLE_C_API "Whether to build C API" ON) option(SHERPA_NCNN_ENABLE_WASM "Whether to enable WASM" OFF) +option(SHERPA_NCNN_ENABLE_WASM_FOR_NODEJS "Whether to enable WASM for NodeJS" OFF) option(SHERPA_NCNN_ENABLE_GENERATE_INT8_SCALE_TABLE "Whether to generate-int8-scale-table" ON) option(SHERPA_NCNN_ENABLE_FFMPEG_EXAMPLES "Whether to enable ffmpeg-examples" OFF) @@ -77,6 +78,14 @@ message(STATUS "SHERPA_NCNN_ENABLE_TEST ${SHERPA_NCNN_ENABLE_TEST}") message(STATUS "SHERPA_NCNN_ENABLE_C_API ${SHERPA_NCNN_ENABLE_C_API}") message(STATUS "SHERPA_NCNN_ENABLE_GENERATE_INT8_SCALE_TABLE ${SHERPA_NCNN_ENABLE_GENERATE_INT8_SCALE_TABLE}") message(STATUS "SHERPA_NCNN_ENABLE_FFMPEG_EXAMPLES ${SHERPA_NCNN_ENABLE_FFMPEG_EXAMPLES}") +message(STATUS "SHERPA_NCNN_ENABLE_WASM ${SHERPA_NCNN_ENABLE_WASM}") +message(STATUS "SHERPA_NCNN_ENABLE_WASM_FOR_NODEJS ${SHERPA_NCNN_ENABLE_WASM_FOR_NODEJS}") + +if(SHERPA_NCNN_ENABLE_WASM_FOR_NODEJS) + if(NOT SHERPA_NCNN_ENABLE_WASM) + message(FATAL_ERROR "Please set SHERPA_NCNN_ENABLE_WASM to ON if you enable WASM for NodeJS") + endif() +endif() if(NOT CMAKE_BUILD_TYPE) message(STATUS "No CMAKE_BUILD_TYPE given, default to Release") diff --git a/build-wasm-simd-for-nodejs.sh b/build-wasm-simd-for-nodejs.sh new file mode 100755 index 00000000..e19f6f00 --- /dev/null +++ b/build-wasm-simd-for-nodejs.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +# Copyright (c) 2024 Xiaomi Corporation +# +# This script is to build sherpa-ncnn for WebAssembly (NodeJS) +# +# See also +# https://github.com/Tencent/ncnn/wiki/how-to-build#build-for-webassembly +# +# Please refer to +# https://k2-fsa.github.io/sherpa/ncnn/wasm/index.html +# for more details. + +set -ex + +if [ x"$EMSCRIPTEN" == x"" ]; then + if ! command -v emcc &> /dev/null; then + echo "Please install emscripten first" + echo "" + echo "You can use the following commands to install it:" + echo "" + echo "git clone https://github.com/emscripten-core/emsdk.git" + echo "cd emsdk" + echo "git pull" + echo "./emsdk install latest" + echo "./emsdk activate latest" + echo "source ./emsdk_env.sh" + exit 1 + else + EMSCRIPTEN=$(dirname $(realpath $(which emcc))) + fi +fi + +export EMSCRIPTEN=$EMSCRIPTEN +echo "EMSCRIPTEN: $EMSCRIPTEN" +if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then + echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake" + echo "Please make sure you have installed emsdk correctly" + exit 1 +fi + +mkdir -p build-wasm-simd-for-nodejs +pushd build-wasm-simd-for-nodejs + +export SHERPA_NCNN_IS_USING_BUILD_WASM_SH=ON + +cmake \ + -DCMAKE_INSTALL_PREFIX=./install \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_TOOLCHAIN_FILE=$EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake \ + -DNCNN_THREADS=OFF \ + -DNCNN_OPENMP=OFF \ + -DNCNN_SIMPLEOMP=OFF \ + -DNCNN_RUNTIME_CPU=OFF \ + -DNCNN_SSE2=ON \ + -DNCNN_AVX2=OFF \ + -DNCNN_AVX=OFF \ + -DNCNN_BUILD_TOOLS=OFF \ + -DNCNN_BUILD_EXAMPLES=OFF \ + -DNCNN_BUILD_BENCHMARK=OFF \ + \ + -DSHERPA_NCNN_ENABLE_WASM=ON \ + -DSHERPA_NCNN_ENABLE_WASM_FOR_NODEJS=ON \ + -DBUILD_SHARED_LIBS=OFF \ + -DSHERPA_NCNN_ENABLE_PYTHON=OFF \ + -DSHERPA_NCNN_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_NCNN_ENABLE_JNI=OFF \ + -DSHERPA_NCNN_ENABLE_BINARY=OFF \ + -DSHERPA_NCNN_ENABLE_TEST=OFF \ + -DSHERPA_NCNN_ENABLE_C_API=ON \ + -DSHERPA_NCNN_ENABLE_GENERATE_INT8_SCALE_TABLE=OFF \ + -DSHERPA_NCNN_ENABLE_FFMPEG_EXAMPLES=OFF \ + .. + +make -j2 +make install +ls -lh install/bin/wasm diff --git a/nodejs-wasm-examples/decode-file.js b/nodejs-wasm-examples/decode-file.js new file mode 100644 index 00000000..0cff2337 --- /dev/null +++ b/nodejs-wasm-examples/decode-file.js @@ -0,0 +1,75 @@ +const fs = require('fs'); +const wav = require('wav'); +const {Readable} = require('stream'); + +let Module = require('./sherpa-ncnn-wasm-main.js')() +let b = require('./sherpa-ncnn.js'); + +let recognizer = b.createRecognizer(Module); +let stream = recognizer.createStream(); + +console.log(recognizer.config); + +const waveFilename = './0.wav'; + +const reader = new wav.Reader(); +const readable = new Readable().wrap(reader); +const buf = []; + +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { + if (sampleRate != recognizer.config.featConfig.samplingRate) { + throw new Error(`Only support sampleRate ${ + recognizer.config.featConfig.samplingRate}. Given ${sampleRate}`); + } + + if (audioFormat != 1) { + throw new Error(`Only support PCM format. Given ${audioFormat}`); + } + + if (channels != 1) { + throw new Error(`Only a single channel. Given ${channel}`); + } + + if (bitDepth != 16) { + throw new Error(`Only support 16-bit samples. Given ${bitDepth}`); + } +}); + +fs.createReadStream(waveFilename, {'highWaterMark': 4096}) + .pipe(reader) + .on('finish', function(err) { + // tail padding + const floatSamples = + new Float32Array(recognizer.config.featConfig.samplingRate * 0.5); + + buf.push(floatSamples); + const flattened = + Float32Array.from(buf.reduce((a, b) => [...a, ...b], [])); + + stream.acceptWaveform( + recognizer.config.featConfig.samplingRate, flattened); + while (recognizer.isReady(stream)) { + recognizer.decode(stream); + } + const r = recognizer.getResult(stream); + console.log('result', r); + + stream.free(); + recognizer.free(); + }); + +readable.on('readable', function() { + let chunk; + while ((chunk = readable.read()) != null) { + const int16Samples = new Int16Array( + chunk.buffer, chunk.byteOffset, + chunk.length / Int16Array.BYTES_PER_ELEMENT); + + const floatSamples = new Float32Array(int16Samples.length); + for (let i = 0; i < floatSamples.length; i++) { + floatSamples[i] = int16Samples[i] / 32768.0; + } + + buf.push(floatSamples); + } +}); diff --git a/wasm/CMakeLists.txt b/wasm/CMakeLists.txt index 4cc93276..578b5a20 100644 --- a/wasm/CMakeLists.txt +++ b/wasm/CMakeLists.txt @@ -31,8 +31,20 @@ list(JOIN mangled_exported_functions "," all_exported_functions) include_directories(${CMAKE_SOURCE_DIR}) set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB ") string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ") -string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ") -string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue'] ") + +if(SHERPA_NCNN_ENABLE_WASM_FOR_NODEJS) + string(APPEND MY_FLAGS " -sNODERAWFS=1 ") +else() + string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ") +endif() + +string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ") + +if(SHERPA_NCNN_ENABLE_WASM_FOR_NODEJS) + string(APPEND MY_FLAGS " -sMODULARIZE=1 -sWASM_ASYNC_COMPILATION=0 ") +endif() + + message(STATUS "MY_FLAGS: ${MY_FLAGS}") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}") @@ -50,7 +62,15 @@ install( "index.html" "$/sherpa-ncnn-wasm-main.js" "$/sherpa-ncnn-wasm-main.wasm" - "$/sherpa-ncnn-wasm-main.data" DESTINATION bin/wasm ) + +if(NOT SHERPA_NCNN_ENABLE_WASM_FOR_NODEJS) + install( + FILES + "$/sherpa-ncnn-wasm-main.data" + DESTINATION + bin/wasm + ) +endif() diff --git a/wasm/app.js b/wasm/app.js index cb27db97..963fcc4a 100644 --- a/wasm/app.js +++ b/wasm/app.js @@ -45,7 +45,7 @@ Module.onRuntimeInitialized = function() { startBtn.disabled = false; - recognizer = createRecognizer(); + recognizer = createRecognizer(Module); console.log('recognizer is created!', recognizer); }; diff --git a/wasm/sherpa-ncnn.js b/wasm/sherpa-ncnn.js index 6f3f8b2c..93b349e1 100644 --- a/wasm/sherpa-ncnn.js +++ b/wasm/sherpa-ncnn.js @@ -1,51 +1,51 @@ -function freeConfig(config) { +function freeConfig(config, Module) { if ('buffer' in config) { - _free(config.buffer); + Module._free(config.buffer); } - _free(config.ptr); + Module._free(config.ptr); } // The user should free the returned pointers -function initSherpaNcnnModelConfig(config) { - let encoderParamLen = lengthBytesUTF8(config.encoderParam) + 1; - let decoderParamLen = lengthBytesUTF8(config.decoderParam) + 1; - let joinerParamLen = lengthBytesUTF8(config.joinerParam) + 1; +function initSherpaNcnnModelConfig(config, Module) { + let encoderParamLen = Module.lengthBytesUTF8(config.encoderParam) + 1; + let decoderParamLen = Module.lengthBytesUTF8(config.decoderParam) + 1; + let joinerParamLen = Module.lengthBytesUTF8(config.joinerParam) + 1; - let encoderBinLen = lengthBytesUTF8(config.encoderBin) + 1; - let decoderBinLen = lengthBytesUTF8(config.decoderBin) + 1; - let joinerBinLen = lengthBytesUTF8(config.joinerBin) + 1; + let encoderBinLen = Module.lengthBytesUTF8(config.encoderBin) + 1; + let decoderBinLen = Module.lengthBytesUTF8(config.decoderBin) + 1; + let joinerBinLen = Module.lengthBytesUTF8(config.joinerBin) + 1; - let tokensLen = lengthBytesUTF8(config.tokens) + 1; + let tokensLen = Module.lengthBytesUTF8(config.tokens) + 1; let n = encoderParamLen + decoderParamLen + joinerParamLen; n += encoderBinLen + decoderBinLen + joinerBinLen; n += tokensLen; - let buffer = _malloc(n); - let ptr = _malloc(4 * 9); + let buffer = Module._malloc(n); + let ptr = Module._malloc(4 * 9); let offset = 0; - stringToUTF8(config.encoderParam, buffer + offset, encoderParamLen); + Module.stringToUTF8(config.encoderParam, buffer + offset, encoderParamLen); offset += encoderParamLen; - stringToUTF8(config.encoderBin, buffer + offset, encoderBinLen); + Module.stringToUTF8(config.encoderBin, buffer + offset, encoderBinLen); offset += encoderBinLen; - stringToUTF8(config.decoderParam, buffer + offset, decoderParamLen); + Module.stringToUTF8(config.decoderParam, buffer + offset, decoderParamLen); offset += decoderParamLen; - stringToUTF8(config.decoderBin, buffer + offset, decoderBinLen); + Module.stringToUTF8(config.decoderBin, buffer + offset, decoderBinLen); offset += decoderBinLen; - stringToUTF8(config.joinerParam, buffer + offset, joinerParamLen); + Module.stringToUTF8(config.joinerParam, buffer + offset, joinerParamLen); offset += joinerParamLen; - stringToUTF8(config.joinerBin, buffer + offset, joinerBinLen); + Module.stringToUTF8(config.joinerBin, buffer + offset, joinerBinLen); offset += joinerBinLen; - stringToUTF8(config.tokens, buffer + offset, tokensLen); + Module.stringToUTF8(config.tokens, buffer + offset, tokensLen); offset += tokensLen; offset = 0; @@ -78,12 +78,12 @@ function initSherpaNcnnModelConfig(config) { } } -function initSherpaNcnnDecoderConfig(config) { - let n = lengthBytesUTF8(config.decodingMethod) + 1; - let buffer = _malloc(n); - let ptr = _malloc(4 * 2); +function initSherpaNcnnDecoderConfig(config, Module) { + let n = Module.lengthBytesUTF8(config.decodingMethod) + 1; + let buffer = Module._malloc(n); + let ptr = Module._malloc(4 * 2); - stringToUTF8(config.decodingMethod, buffer, n); + Module.stringToUTF8(config.decodingMethod, buffer, n); Module.setValue(ptr, buffer, 'i8*'); Module.setValue(ptr + 4, config.numActivePaths, 'i32'); @@ -93,8 +93,8 @@ function initSherpaNcnnDecoderConfig(config) { } } -function initSherpaNcnnFeatureExtractorConfig(config) { - let ptr = _malloc(4 * 2); +function initSherpaNcnnFeatureExtractorConfig(config, Module) { + let ptr = Module._malloc(4 * 2); Module.setValue(ptr, config.samplingRate, 'float'); Module.setValue(ptr + 4, config.featureDim, 'i32'); return { @@ -102,23 +102,24 @@ function initSherpaNcnnFeatureExtractorConfig(config) { } } -function initSherpaNcnnRecognizerConfig(config) { - let featConfig = initSherpaNcnnFeatureExtractorConfig(config.featConfig); - let modelConfig = initSherpaNcnnModelConfig(config.modelConfig); - let decoderConfig = initSherpaNcnnDecoderConfig(config.decoderConfig); +function initSherpaNcnnRecognizerConfig(config, Module) { + let featConfig = + initSherpaNcnnFeatureExtractorConfig(config.featConfig, Module); + let modelConfig = initSherpaNcnnModelConfig(config.modelConfig, Module); + let decoderConfig = initSherpaNcnnDecoderConfig(config.decoderConfig, Module); let numBytes = featConfig.len + modelConfig.len + decoderConfig.len + 4 * 4 + 4 * 2; - let ptr = _malloc(numBytes); + let ptr = Module._malloc(numBytes); let offset = 0; - _CopyHeap(featConfig.ptr, featConfig.len, ptr + offset); + Module._CopyHeap(featConfig.ptr, featConfig.len, ptr + offset); offset += featConfig.len; - _CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset) + Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset) offset += modelConfig.len; - _CopyHeap(decoderConfig.ptr, decoderConfig.len, ptr + offset) + Module._CopyHeap(decoderConfig.ptr, decoderConfig.len, ptr + offset) offset += decoderConfig.len; Module.setValue(ptr + offset, config.enableEndpoint, 'i32'); @@ -146,17 +147,18 @@ function initSherpaNcnnRecognizerConfig(config) { } class Stream { - constructor(handle) { + constructor(handle, Module) { this.handle = handle; this.pointer = null; - this.n = 0 + this.n = 0; + this.Module = Module; } free() { if (this.handle) { - _DestroyStream(this.handle); + this.Module._DestroyStream(this.handle); this.handle = null; - _free(this.pointer); + this.Module._free(this.pointer); this.pointer = null; this.n = 0; } @@ -168,13 +170,15 @@ class Stream { */ acceptWaveform(sampleRate, samples) { if (this.n < samples.length) { - _free(this.pointer); - this.pointer = _malloc(samples.length * samples.BYTES_PER_ELEMENT); + this.Module._free(this.pointer); + this.pointer = + this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT); this.n = samples.length } - Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT); - _AcceptWaveform(this.handle, sampleRate, this.pointer, samples.length); + this.Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT); + this.Module._AcceptWaveform( + this.handle, sampleRate, this.pointer, samples.length); } inputFinished() { @@ -183,59 +187,56 @@ class Stream { }; class Recognizer { - constructor(configObj, borrowedHandle) { - if (borrowedHandle) { - this.handle = borrowedHandle; - return; - } - - let config = initSherpaNcnnRecognizerConfig(configObj) - let handle = _CreateRecognizer(config.ptr); + constructor(configObj, Module) { + this.config = configObj; + let config = initSherpaNcnnRecognizerConfig(configObj, Module) + let handle = Module._CreateRecognizer(config.ptr); - freeConfig(config.featConfig); - freeConfig(config.modelConfig); - freeConfig(config.decoderConfig); - freeConfig(config); + freeConfig(config.featConfig, Module); + freeConfig(config.modelConfig, Module); + freeConfig(config.decoderConfig, Module); + freeConfig(config, Module); this.handle = handle; + this.Module = Module; } free() { - _DestroyRecognizer(this.handle); + this.Module._DestroyRecognizer(this.handle); this.handle = 0 } createStream() { - let handle = _CreateStream(this.handle); - return new Stream(handle); + let handle = this.Module._CreateStream(this.handle); + return new Stream(handle, this.Module); } isReady(stream) { - return _IsReady(this.handle, stream.handle) == 1; + return this.Module._IsReady(this.handle, stream.handle) == 1; } isEndpoint(stream) { - return _IsEndpoint(this.handle, stream.handle) == 1; + return this.Module._IsEndpoint(this.handle, stream.handle) == 1; } decode(stream) { - return _Decode(this.handle, stream.handle); + return this.Module._Decode(this.handle, stream.handle); } reset(stream) { - _Reset(this.handle, stream.handle); + this.Module._Reset(this.handle, stream.handle); } getResult(stream) { - let r = _GetResult(this.handle, stream.handle); - let textPtr = getValue(r, 'i8*'); - let text = UTF8ToString(textPtr); - _DestroyResult(r); + let r = this.Module._GetResult(this.handle, stream.handle); + let textPtr = this.Module.getValue(r, 'i8*'); + let text = this.Module.UTF8ToString(textPtr); + this.Module._DestroyResult(r); return text; } } -function createRecognizer() { +function createRecognizer(Module) { let modelConfig = { encoderParam: './encoder_jit_trace-pnnx.ncnn.param', encoderBin: './encoder_jit_trace-pnnx.ncnn.bin', @@ -268,5 +269,10 @@ function createRecognizer() { rule3MinUtternceLength: 20, }; - return new Recognizer(configObj); + return new Recognizer(configObj, Module); } + +module.exports = { + createRecognizer, + +};