From 34e40a86b33102576b3442329421178a487e3ea3 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 22 Sep 2023 09:57:15 +0800 Subject: [PATCH] Fix exporting decoder model to onnx (#1264) * Use torch.jit.script() to export the decoder model See also https://github.com/k2-fsa/sherpa-onnx/issues/327 --- egs/aishell/ASR/pruned_transducer_stateless7/export-onnx.py | 1 + egs/commonvoice/ASR/pruned_transducer_stateless7/export-onnx.py | 1 + .../ASR/conv_emformer_transducer_stateless2/export-onnx.py | 1 + egs/librispeech/ASR/lstm_transducer_stateless2/export-onnx-zh.py | 1 + egs/librispeech/ASR/lstm_transducer_stateless2/export-onnx.py | 1 + egs/librispeech/ASR/pruned_transducer_stateless/export-onnx.py | 1 + egs/librispeech/ASR/pruned_transducer_stateless3/export-onnx.py | 1 + .../ASR/pruned_transducer_stateless5/export-onnx-streaming.py | 1 + egs/librispeech/ASR/pruned_transducer_stateless5/export-onnx.py | 1 + egs/librispeech/ASR/pruned_transducer_stateless7/export-onnx.py | 1 + .../ASR/pruned_transducer_stateless7_streaming/export-onnx-zh.py | 1 + .../ASR/pruned_transducer_stateless7_streaming/export-onnx.py | 1 + egs/librispeech/ASR/zipformer/export-onnx-streaming.py | 1 + egs/librispeech/ASR/zipformer/export-onnx.py | 1 + egs/wenetspeech/ASR/pruned_transducer_stateless2/export-onnx.py | 1 + .../ASR/pruned_transducer_stateless5/export-onnx-streaming.py | 1 + egs/wenetspeech/ASR/pruned_transducer_stateless5/export-onnx.py | 1 + 17 files changed, 17 insertions(+) diff --git a/egs/aishell/ASR/pruned_transducer_stateless7/export-onnx.py b/egs/aishell/ASR/pruned_transducer_stateless7/export-onnx.py index e8211500a0..2a9fc57d5f 100755 --- a/egs/aishell/ASR/pruned_transducer_stateless7/export-onnx.py +++ b/egs/aishell/ASR/pruned_transducer_stateless7/export-onnx.py @@ -322,6 +322,7 @@ def export_decoder_model_onnx( vocab_size = decoder_model.decoder.vocab_size y = torch.zeros(10, context_size, dtype=torch.int64) + decoder_model = torch.jit.script(decoder_model) torch.onnx.export( decoder_model, y, diff --git a/egs/commonvoice/ASR/pruned_transducer_stateless7/export-onnx.py b/egs/commonvoice/ASR/pruned_transducer_stateless7/export-onnx.py index 0c98885acf..2b9f2293a8 100755 --- a/egs/commonvoice/ASR/pruned_transducer_stateless7/export-onnx.py +++ b/egs/commonvoice/ASR/pruned_transducer_stateless7/export-onnx.py @@ -330,6 +330,7 @@ def export_decoder_model_onnx( vocab_size = decoder_model.decoder.vocab_size y = torch.zeros(10, context_size, dtype=torch.int64) + decoder_model = torch.jit.script(decoder_model) torch.onnx.export( decoder_model, y, diff --git a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-onnx.py b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-onnx.py index cfd365207e..ab046557fb 100755 --- a/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-onnx.py +++ b/egs/librispeech/ASR/conv_emformer_transducer_stateless2/export-onnx.py @@ -401,6 +401,7 @@ def export_decoder_model_onnx( vocab_size = decoder_model.decoder.vocab_size y = torch.zeros(10, context_size, dtype=torch.int64) + decoder_model = torch.jit.script(decoder_model) torch.onnx.export( decoder_model, y, diff --git a/egs/librispeech/ASR/lstm_transducer_stateless2/export-onnx-zh.py b/egs/librispeech/ASR/lstm_transducer_stateless2/export-onnx-zh.py index 89ced388ce..2a52e2eeca 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless2/export-onnx-zh.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless2/export-onnx-zh.py @@ -359,6 +359,7 @@ def export_decoder_model_onnx( vocab_size = decoder_model.decoder.vocab_size y = torch.zeros(10, context_size, dtype=torch.int64) + decoder_model = torch.jit.script(decoder_model) torch.onnx.export( decoder_model, y, diff --git a/egs/librispeech/ASR/lstm_transducer_stateless2/export-onnx.py b/egs/librispeech/ASR/lstm_transducer_stateless2/export-onnx.py index 6b6cb893f7..c543628ffd 100755 --- a/egs/librispeech/ASR/lstm_transducer_stateless2/export-onnx.py +++ b/egs/librispeech/ASR/lstm_transducer_stateless2/export-onnx.py @@ -356,6 +356,7 @@ def export_decoder_model_onnx( vocab_size = decoder_model.decoder.vocab_size y = torch.zeros(10, context_size, dtype=torch.int64) + decoder_model = torch.jit.script(decoder_model) torch.onnx.export( decoder_model, y, diff --git a/egs/librispeech/ASR/pruned_transducer_stateless/export-onnx.py b/egs/librispeech/ASR/pruned_transducer_stateless/export-onnx.py index 282238c13e..0a2132e565 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless/export-onnx.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless/export-onnx.py @@ -307,6 +307,7 @@ def export_decoder_model_onnx( vocab_size = decoder_model.decoder.vocab_size y = torch.zeros(10, context_size, dtype=torch.int64) + decoder_model = torch.jit.script(decoder_model) torch.onnx.export( decoder_model, y, diff --git a/egs/librispeech/ASR/pruned_transducer_stateless3/export-onnx.py b/egs/librispeech/ASR/pruned_transducer_stateless3/export-onnx.py index 26dea7e11f..2685ea95a7 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless3/export-onnx.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless3/export-onnx.py @@ -312,6 +312,7 @@ def export_decoder_model_onnx( vocab_size = decoder_model.decoder.vocab_size y = torch.zeros(10, context_size, dtype=torch.int64) + decoder_model = torch.jit.script(decoder_model) torch.onnx.export( decoder_model, y, diff --git a/egs/librispeech/ASR/pruned_transducer_stateless5/export-onnx-streaming.py b/egs/librispeech/ASR/pruned_transducer_stateless5/export-onnx-streaming.py index 549fb13c91..b90d81dcf5 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless5/export-onnx-streaming.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless5/export-onnx-streaming.py @@ -404,6 +404,7 @@ def export_decoder_model_onnx( vocab_size = decoder_model.decoder.vocab_size y = torch.zeros(10, context_size, dtype=torch.int64) + decoder_model = torch.jit.script(decoder_model) torch.onnx.export( decoder_model, y, diff --git a/egs/librispeech/ASR/pruned_transducer_stateless5/export-onnx.py b/egs/librispeech/ASR/pruned_transducer_stateless5/export-onnx.py index fff0fcdd5c..02aa24f2cd 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless5/export-onnx.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless5/export-onnx.py @@ -335,6 +335,7 @@ def export_decoder_model_onnx( vocab_size = decoder_model.decoder.vocab_size y = torch.zeros(10, context_size, dtype=torch.int64) + decoder_model = torch.jit.script(decoder_model) torch.onnx.export( decoder_model, y, diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7/export-onnx.py b/egs/librispeech/ASR/pruned_transducer_stateless7/export-onnx.py index 11c885f4df..b75548f8b4 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7/export-onnx.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7/export-onnx.py @@ -329,6 +329,7 @@ def export_decoder_model_onnx( vocab_size = decoder_model.decoder.vocab_size y = torch.zeros(10, context_size, dtype=torch.int64) + decoder_model = torch.jit.script(decoder_model) torch.onnx.export( decoder_model, y, diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-onnx-zh.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-onnx-zh.py index 8653126dee..2de56837e6 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-onnx-zh.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-onnx-zh.py @@ -413,6 +413,7 @@ def export_decoder_model_onnx( context_size = decoder_model.decoder.context_size vocab_size = decoder_model.decoder.vocab_size y = torch.zeros(10, context_size, dtype=torch.int64) + decoder_model = torch.jit.script(decoder_model) torch.onnx.export( decoder_model, y, diff --git a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-onnx.py b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-onnx.py index 6f84d79b4d..d71080760f 100755 --- a/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-onnx.py +++ b/egs/librispeech/ASR/pruned_transducer_stateless7_streaming/export-onnx.py @@ -401,6 +401,7 @@ def export_decoder_model_onnx( context_size = decoder_model.decoder.context_size vocab_size = decoder_model.decoder.vocab_size y = torch.zeros(10, context_size, dtype=torch.int64) + decoder_model = torch.jit.script(decoder_model) torch.onnx.export( decoder_model, y, diff --git a/egs/librispeech/ASR/zipformer/export-onnx-streaming.py b/egs/librispeech/ASR/zipformer/export-onnx-streaming.py index a951aeef35..e2c7d7d95b 100755 --- a/egs/librispeech/ASR/zipformer/export-onnx-streaming.py +++ b/egs/librispeech/ASR/zipformer/export-onnx-streaming.py @@ -506,6 +506,7 @@ def export_decoder_model_onnx( vocab_size = decoder_model.decoder.vocab_size y = torch.zeros(10, context_size, dtype=torch.int64) + decoder_model = torch.jit.script(decoder_model) torch.onnx.export( decoder_model, y, diff --git a/egs/librispeech/ASR/zipformer/export-onnx.py b/egs/librispeech/ASR/zipformer/export-onnx.py index e0d664009c..3682f0b625 100755 --- a/egs/librispeech/ASR/zipformer/export-onnx.py +++ b/egs/librispeech/ASR/zipformer/export-onnx.py @@ -353,6 +353,7 @@ def export_decoder_model_onnx( vocab_size = decoder_model.decoder.vocab_size y = torch.zeros(10, context_size, dtype=torch.int64) + decoder_model = torch.jit.script(decoder_model) torch.onnx.export( decoder_model, y, diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless2/export-onnx.py b/egs/wenetspeech/ASR/pruned_transducer_stateless2/export-onnx.py index 760fad9744..140b1d37ff 100755 --- a/egs/wenetspeech/ASR/pruned_transducer_stateless2/export-onnx.py +++ b/egs/wenetspeech/ASR/pruned_transducer_stateless2/export-onnx.py @@ -315,6 +315,7 @@ def export_decoder_model_onnx( vocab_size = decoder_model.decoder.vocab_size y = torch.zeros(10, context_size, dtype=torch.int64) + decoder_model = torch.jit.script(decoder_model) torch.onnx.export( decoder_model, y, diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless5/export-onnx-streaming.py b/egs/wenetspeech/ASR/pruned_transducer_stateless5/export-onnx-streaming.py index 9a926d7e50..921766ad4a 100755 --- a/egs/wenetspeech/ASR/pruned_transducer_stateless5/export-onnx-streaming.py +++ b/egs/wenetspeech/ASR/pruned_transducer_stateless5/export-onnx-streaming.py @@ -404,6 +404,7 @@ def export_decoder_model_onnx( vocab_size = decoder_model.decoder.vocab_size y = torch.zeros(10, context_size, dtype=torch.int64) + decoder_model = torch.jit.script(decoder_model) torch.onnx.export( decoder_model, y, diff --git a/egs/wenetspeech/ASR/pruned_transducer_stateless5/export-onnx.py b/egs/wenetspeech/ASR/pruned_transducer_stateless5/export-onnx.py index 68c7cc352c..037c7adf1d 100755 --- a/egs/wenetspeech/ASR/pruned_transducer_stateless5/export-onnx.py +++ b/egs/wenetspeech/ASR/pruned_transducer_stateless5/export-onnx.py @@ -335,6 +335,7 @@ def export_decoder_model_onnx( vocab_size = decoder_model.decoder.vocab_size y = torch.zeros(10, context_size, dtype=torch.int64) + decoder_model = torch.jit.script(decoder_model) torch.onnx.export( decoder_model, y,