From b594a3875ba9bbaeea62500b3672f06d8fe59332 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 24 Jun 2024 16:20:46 +0800 Subject: [PATCH] Add CI for non-streaming zipformer about ksponspeech (#1667) --- .github/scripts/ksponspeech/ASR/run.sh | 66 ++++++++++++++++++++++++-- .github/workflows/ksponspeech.yml | 61 +++++++++++++++++++++--- 2 files changed, 118 insertions(+), 9 deletions(-) diff --git a/.github/scripts/ksponspeech/ASR/run.sh b/.github/scripts/ksponspeech/ASR/run.sh index 068c22dfcb..5c7886463e 100755 --- a/.github/scripts/ksponspeech/ASR/run.sh +++ b/.github/scripts/ksponspeech/ASR/run.sh @@ -11,7 +11,66 @@ log() { cd egs/ksponspeech/ASR -function test_pretrained() { +function test_pretrained_non_streaming() { + git lfs install + git clone https://huggingface.co/johnBamma/icefall-asr-ksponspeech-zipformer-2024-06-24 + repo=icefall-asr-ksponspeech-zipformer-2024-06-24 + pushd $repo + mkdir test_wavs + cd test_wavs + curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/0.wav + curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/1.wav + curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/2.wav + curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/3.wav + curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/trans.txt + cd ../exp + ln -s pretrained.pt epoch-99.pt + ls -lh + popd + + log 'test pretrained.py' + ./zipformer/pretrained.py \ + --checkpoint $repo/exp/pretrained.pt \ + --tokens $repo/data/lang_bpe_5000/tokens.txt \ + --method greedy_search \ + $repo/test_wavs/0.wav \ + $repo/test_wavs/1.wav \ + $repo/test_wavs/2.wav \ + $repo/test_wavs/3.wav + + log 'test export-onnx.py' + + ./zipformer/export-onnx.py \ + --tokens $repo/data/lang_bpe_5000/tokens.txt \ + --use-averaged-model 0 \ + --epoch 99 \ + --avg 1 \ + --exp-dir $repo/exp/ + + ls -lh $repo/exp + + ls -lh $repo/data/lang_bpe_5000/ + + log 'test exported onnx models' + ./zipformer/onnx_pretrained.py \ + --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \ + --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \ + --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \ + --tokens $repo/data/lang_bpe_5000/tokens.txt \ + $repo/test_wavs/0.wav + + dst=/tmp/model-2024-06-24 + mkdir -p $dst + + cp -av $repo/test_wavs $dst + cp -v $repo/exp/*.onnx $dst + cp -v $repo/exp/*.onnx $dst + cp -v $repo/data/lang_bpe_5000/tokens.txt $dst + cp -v $repo/data/lang_bpe_5000/bpe.model $dst + rm -rf $repo +} + +function test_pretrained_streaming() { git lfs install git clone https://huggingface.co/johnBamma/icefall-asr-ksponspeech-pruned-transducer-stateless7-streaming-2024-06-12 repo=icefall-asr-ksponspeech-pruned-transducer-stateless7-streaming-2024-06-12 @@ -59,7 +118,7 @@ function test_pretrained() { --tokens $repo/data/lang_bpe_5000/tokens.txt \ $repo/test_wavs/0.wav - dst=/tmp/model1 + dst=/tmp/model-2024-06-16 mkdir -p $dst cp -v $repo/exp/*.onnx $dst @@ -69,4 +128,5 @@ function test_pretrained() { rm -rf $repo } -test_pretrained +test_pretrained_non_streaming +test_pretrained_streaming diff --git a/.github/workflows/ksponspeech.yml b/.github/workflows/ksponspeech.yml index 2e1441c066..6c4fc546d5 100644 --- a/.github/workflows/ksponspeech.yml +++ b/.github/workflows/ksponspeech.yml @@ -57,13 +57,19 @@ jobs: .github/scripts/ksponspeech/ASR/run.sh - - name: Show model files + - name: Show model files (2024-06-24) shell: bash run: | - src=/tmp/model1 + src=/tmp/model-2024-06-24 ls -lh $src - - name: Upload model to huggingface + - name: Show model files (2024-06-16) + shell: bash + run: | + src=/tmp/model-2024-06-16 + ls -lh $src + + - name: Upload model to huggingface (2024-06-24) env: HF_TOKEN: ${{ secrets.HF_TOKEN }} uses: nick-fields/retry@v3 @@ -72,7 +78,41 @@ jobs: timeout_seconds: 200 shell: bash command: | - src=/tmp/model1 + src=/tmp/model-2024-06-24 + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf hf + export GIT_LFS_SKIP_SMUDGE=1 + export GIT_CLONE_PROTECTION_ACTIVE=false + + git clone https://huggingface.co/k2-fsa/sherpa-onnx-zipformer-korean-2024-06-24 hf + cd hf + git fetch + git pull + git merge -m "merge remote" --ff origin main + cp -av $src/* ./ + ls -lh + git lfs track "bpe.model" + git lfs track "*.onnx" + git add . + git status + git commit -m "update models" + git status + + git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-zipformer-korean-2024-06-24 main || true + rm -rf hf + + - name: Upload model to huggingface (2024-06-16) + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v3 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + src=/tmp/model-2024-06-16 git config --global user.email "csukuangfj@gmail.com" git config --global user.name "Fangjun Kuang" @@ -98,15 +138,24 @@ jobs: git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16 main || true rm -rf hf - - name: Prepare for release + - name: Prepare for release (2024-06-16) shell: bash run: | - src=/tmp/model1 + src=/tmp/model-2024-06-16 d=sherpa-onnx-streaming-zipformer-korean-2024-06-16 mv $src ./$d tar cjvf ${d}.tar.bz2 $d ls -lh + - name: Prepare for release (2024-06-24) + shell: bash + run: | + src=/tmp/model-2024-06-24 + d=sherpa-onnx-zipformer-korean-2024-06-24 + mv $src ./$d + tar cjvf ${d}.tar.bz2 $d + ls -lh + - name: Release exported onnx models uses: svenstaro/upload-release-action@v2 with: