diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index 3c3681fe..780769d4 100644 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -14,7 +14,7 @@ turbo-transformers Benchmark Utils Usage: - benchmark [--seq_len=] [--framework=] [--batch_size=] [-n ] [--enable-random] [--min_seq_len=] [--max_seq_len=] [--use_gpu] [--num_threads=] [--enable_mem_opt=] + benchmark [--seq_len=] [--framework=] [--batch_size=] [-n ] [--enable-random] [--min_seq_len=] [--max_seq_len=] [--use_gpu] [--num_threads=] [--enable_mem_opt] Options: --framework= The framework to test in (torch, torch_jit, turbo-transformers, @@ -27,7 +27,7 @@ --max_seq_len= Maximal sequence length generated when enable random [default: 50] --use_gpu Enable GPU. --num_threads= The number of CPU threads. [default: 4] - --enable_mem_opt= Use memory optimization for BERT. [default: False] + --enable_mem_opt Use model aware memory optimization for BERT. """ import json @@ -54,7 +54,8 @@ def main(): 'use_gpu': True if args['--use_gpu'] else False, 'enable_mem_opt': True if args['--enable_mem_opt'] else False, } - if (kwargs['model_name'] != 'bert'): + if (kwargs['model_name'] != 'bert' + or args['--framework'] != 'turbo-transformers'): kwargs['enable_mem_opt'] = False if args['--framework'] == 'turbo-transformers': benchmark_turbo_transformers(**kwargs) diff --git a/benchmark/onnx_benchmark_helper.py b/benchmark/onnx_benchmark_helper.py index c7dff6a5..fff7afe4 100644 --- a/benchmark/onnx_benchmark_helper.py +++ b/benchmark/onnx_benchmark_helper.py @@ -153,6 +153,7 @@ def _impl_(model_name: str, if enable_latency_plot: import time + import torch print(f"dump results to onnxrt_latency_{num_threads}.txt") result_list = [] with open(f"onnxrt_latency_{num_threads}.txt", "w") as of: diff --git a/benchmark/run_gpu_variable_benchmark.sh b/benchmark/run_gpu_variable_benchmark.sh index be86ecb3..31587099 100644 --- a/benchmark/run_gpu_variable_benchmark.sh +++ b/benchmark/run_gpu_variable_benchmark.sh @@ -17,7 +17,7 @@ set -e # FRAMEWORKS=("turbo-transformers" "torch" "onnxruntime") FRAMEWORKS=("turbo-transformers" "torch") # Note Onnx doese not supports Albert -# FRAMEWORKS=("onnxruntime") +# FRAMEWORKS=("onnxruntime-gpu") MAX_SEQ_LEN=(500)