diff --git a/.github/scripts/test-offline-transducer.sh b/.github/scripts/test-offline-transducer.sh index 1bec7ec9b..7ac729860 100755 --- a/.github/scripts/test-offline-transducer.sh +++ b/.github/scripts/test-offline-transducer.sh @@ -15,6 +15,46 @@ echo "PATH: $PATH" which $EXE +log "------------------------------------------------------------------------" +log "Run zipformer transducer models (Russian) " +log "------------------------------------------------------------------------" +for type in small-zipformer zipformer; do + url=https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-$type-ru-2024-09-18.tar.bz2 + name=$(basename $url) + curl -SL -O $url + tar xvf $name + rm $name + repo=$(basename -s .tar.bz2 $name) + ls -lh $repo + + log "test $repo" + test_wavs=( + 0.wav + 1.wav + ) + + for w in ${test_wavs[@]}; do + time $EXE \ + --tokens=$repo/tokens.txt \ + --encoder=$repo/encoder.onnx \ + --decoder=$repo/decoder.onnx \ + --joiner=$repo/joiner.onnx \ + --debug=1 \ + $repo/test_wavs/$w + done + + for w in ${test_wavs[@]}; do + time $EXE \ + --tokens=$repo/tokens.txt \ + --encoder=$repo/encoder.int8.onnx \ + --decoder=$repo/decoder.onnx \ + --joiner=$repo/joiner.int8.onnx \ + --debug=1 \ + $repo/test_wavs/$w + done + rm -rf $repo +done + log "------------------------------------------------------------------------" log "Run zipformer transducer models (Japanese from ReazonSpeech) " log "------------------------------------------------------------------------" diff --git a/.github/workflows/export-russian-onnx-models.yaml b/.github/workflows/export-russian-onnx-models.yaml new file mode 100644 index 000000000..16ee4df44 --- /dev/null +++ b/.github/workflows/export-russian-onnx-models.yaml @@ -0,0 +1,108 @@ +name: export-russian-onnx-models + +on: + workflow_dispatch: + +concurrency: + group: export-russian-onnx-models-${{ github.ref }} + cancel-in-progress: true + +jobs: + export-russian-onnx-models: + if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj' + name: export Russian onnx models + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [macos-latest] + python-version: ["3.8"] + + steps: + - uses: actions/checkout@v4 + + - name: vosk-model-ru (zipformer v1) + shell: bash + run: | + cat >README.md <README.md < #include -#define SHERPA_ONNX_ASSIGN_ATTR_STR(c_name, js_name) \ - do { \ - if (o.Has(#js_name) && o.Get(#js_name).IsString()) { \ - Napi::String _str = o.Get(#js_name).As(); \ - std::string s = _str.Utf8Value(); \ - char *p = new char[s.size() + 1]; \ - std::copy(s.begin(), s.end(), p); \ - p[s.size()] = 0; \ - \ - c.c_name = p; \ - } \ +#define SHERPA_ONNX_ASSIGN_ATTR_STR(c_name, js_name) \ + do { \ + if (o.Has(#js_name) && o.Get(#js_name).IsString()) { \ + Napi::String _str = o.Get(#js_name).As(); \ + std::string s = _str.Utf8Value(); \ + char *p = new char[s.size() + 1]; \ + std::copy(s.begin(), s.end(), p); \ + p[s.size()] = 0; \ + \ + c.c_name = p; \ + } else if (o.Has(#js_name) && o.Get(#js_name).IsTypedArray()) { \ + Napi::Uint8Array _array = o.Get(#js_name).As(); \ + char *p = new char[_array.ElementLength() + 1]; \ + std::copy(_array.Data(), _array.Data() + _array.ElementLength(), p); \ + p[_array.ElementLength()] = '\0'; \ + \ + c.c_name = p; \ + } \ } while (0) #define SHERPA_ONNX_ASSIGN_ATTR_INT32(c_name, js_name) \ diff --git a/sherpa-onnx/kotlin-api/OfflineRecognizer.kt b/sherpa-onnx/kotlin-api/OfflineRecognizer.kt index f48cc3fd5..203278cb7 100644 --- a/sherpa-onnx/kotlin-api/OfflineRecognizer.kt +++ b/sherpa-onnx/kotlin-api/OfflineRecognizer.kt @@ -368,6 +368,32 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { modelType = "transducer", ) } + + 17 -> { + val modelDir = "sherpa-onnx-zipformer-ru-2024-09-18" + return OfflineModelConfig( + transducer = OfflineTransducerModelConfig( + encoder = "$modelDir/encoder.int8.onnx", + decoder = "$modelDir/decoder.onnx", + joiner = "$modelDir/joiner.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "transducer", + ) + } + + 18 -> { + val modelDir = "sherpa-onnx-small-zipformer-ru-2024-09-18" + return OfflineModelConfig( + transducer = OfflineTransducerModelConfig( + encoder = "$modelDir/encoder.int8.onnx", + decoder = "$modelDir/decoder.onnx", + joiner = "$modelDir/joiner.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + modelType = "transducer", + ) + } } return null }