From 16007ee14b1e1d6142c30b80ac423cf49b29a465 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Thu, 11 Jul 2024 20:27:50 +0800 Subject: [PATCH 1/5] Add cmake support for linux rocm onnxruntime lib --- CMakeLists.txt | 19 +++- cmake/onnxruntime-linux-x86_64-rocm.cmake | 101 ++++++++++++++++++++++ cmake/onnxruntime.cmake | 2 + sherpa-onnx/csrc/CMakeLists.txt | 7 ++ 4 files changed, 127 insertions(+), 2 deletions(-) create mode 100644 cmake/onnxruntime-linux-x86_64-rocm.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 203b8a569f..aad04ef66b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,7 +29,8 @@ option(SHERPA_ONNX_ENABLE_PORTAUDIO "Whether to build with portaudio" ON) option(SHERPA_ONNX_ENABLE_JNI "Whether to build JNI internface" OFF) option(SHERPA_ONNX_ENABLE_C_API "Whether to build C API" ON) option(SHERPA_ONNX_ENABLE_WEBSOCKET "Whether to build webscoket server/client" ON) -option(SHERPA_ONNX_ENABLE_GPU "Enable ONNX Runtime GPU support" OFF) +option(SHERPA_ONNX_ENABLE_GPU "Enable ONNX Runtime NVIDIA GPU support" OFF) +option(SHERPA_ONNX_ENABLE_ROCM "Enable ONNX Runtime AMD GPU support" OFF) option(SHERPA_ONNX_ENABLE_WASM "Whether to enable WASM" OFF) option(SHERPA_ONNX_ENABLE_WASM_TTS "Whether to enable WASM for TTS" OFF) option(SHERPA_ONNX_ENABLE_WASM_ASR "Whether to enable WASM for ASR" OFF) @@ -82,6 +83,10 @@ if(SHERPA_ONNX_ENABLE_JNI AND NOT BUILD_SHARED_LIBS) endif() if(SHERPA_ONNX_ENABLE_GPU) + if(SHERPA_ONNX_ENABLE_ROCM) + message(FATAL_ERROR "Both SHERPA_ONNX_ENABLE_GPU and SHERPA_ONNX_ENABLE_ROCM are ON. Please set at most one of them to ON.") + endif() + message(WARNING "\ Compiling for NVIDIA GPU is enabled. Please make sure cudatoolkit is installed on your system. Otherwise, you will get errors at runtime. @@ -89,7 +94,16 @@ Hint: You don't need sudo permission to install CUDA toolkit. Please refer to https://k2-fsa.github.io/k2/installation/cuda-cudnn.html to install CUDA toolkit if you have not installed it.") if(NOT BUILD_SHARED_LIBS) - message(STATUS "Set BUILD_SHARED_LIBS to ON since SHERPA_ONNX_ENABLE_GPU is ON") + message(STATUS "Set BUILD_SHARED_LIBS to ON since SHERPA_ONNX_ENABLE_GPU is ON") set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE) + endif() +endif() + +if(SHERPA_ONNX_ENABLE_ROCM) + message(WARNING "\ +Compiling for AMD GPU is enabled. Please make sure ROCm +is installed on your system. Otherwise, you will get errors at runtime.") + if(NOT BUILD_SHARED_LIBS) + message(STATUS "Set BUILD_SHARED_LIBS to ON since SHERPA_ONNX_ENABLE_ROCM is ON") set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE) endif() endif() @@ -117,6 +131,7 @@ message(STATUS "SHERPA_ONNX_ENABLE_JNI ${SHERPA_ONNX_ENABLE_JNI}") message(STATUS "SHERPA_ONNX_ENABLE_C_API ${SHERPA_ONNX_ENABLE_C_API}") message(STATUS "SHERPA_ONNX_ENABLE_WEBSOCKET ${SHERPA_ONNX_ENABLE_WEBSOCKET}") message(STATUS "SHERPA_ONNX_ENABLE_GPU ${SHERPA_ONNX_ENABLE_GPU}") +message(STATUS "SHERPA_ONNX_ENABLE_ROCM ${SHERPA_ONNX_ENABLE_ROCM}") message(STATUS "SHERPA_ONNX_ENABLE_WASM ${SHERPA_ONNX_ENABLE_WASM}") message(STATUS "SHERPA_ONNX_ENABLE_WASM_TTS ${SHERPA_ONNX_ENABLE_WASM_TTS}") message(STATUS "SHERPA_ONNX_ENABLE_WASM_ASR ${SHERPA_ONNX_ENABLE_WASM_ASR}") diff --git a/cmake/onnxruntime-linux-x86_64-rocm.cmake b/cmake/onnxruntime-linux-x86_64-rocm.cmake new file mode 100644 index 0000000000..d51c2f7277 --- /dev/null +++ b/cmake/onnxruntime-linux-x86_64-rocm.cmake @@ -0,0 +1,101 @@ +# Copyright (c) 2022-2023 Xiaomi Corporation +message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}") +message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") + +if(NOT CMAKE_SYSTEM_NAME STREQUAL Linux) + message(FATAL_ERROR "This file is for Linux only. Given: ${CMAKE_SYSTEM_NAME}") +endif() + +if(NOT CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64) + message(FATAL_ERROR "This file is for x86_64 only. Given: ${CMAKE_SYSTEM_PROCESSOR}") +endif() + +if(NOT BUILD_SHARED_LIBS) + message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}") +endif() + +if(NOT SHERPA_ONNX_ENABLE_ROCM) + message(FATAL_ERROR "This file is for AMD GPU only. Given SHERPA_ONNX_ENABLE_ROCM: ${SHERPA_ONNX_ENABLE_ROCM}") +endif() + +set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.18.1/onnxruntime-linux-x64-rocm-Release-1.18.1.zip") +set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.18.1/onnxruntime-linux-x64-rocm-Release-1.18.1.zip") +set(onnxruntime_HASH "SHA256=fe6674d0d4d72d9361667de2ada44a81591c3769c63a87421636317590be659e") + +# If you don't have access to the Internet, +# please download onnxruntime to one of the following locations. +# You can add more if you want. +set(possible_file_locations + $ENV{HOME}/Downloads/onnxruntime-linux-x64-rocm-Release-1.18.1.zip + ${CMAKE_SOURCE_DIR}/onnxruntime-linux-x64-rocm-Release-1.18.1.zip + ${CMAKE_BINARY_DIR}/onnxruntime-linux-x64-rocm-Release-1.18.1.zip + /tmp/onnxruntime-linux-x64-rocm-Release-1.18.1.zip + /star-fj/fangjun/download/github/onnxruntime-linux-x64-rocm-Release-1.18.1.zip +) + +foreach(f IN LISTS possible_file_locations) + if(EXISTS ${f}) + set(onnxruntime_URL "${f}") + file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL) + message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}") + set(onnxruntime_URL2) + break() + endif() +endforeach() + +FetchContent_Declare(onnxruntime + URL + ${onnxruntime_URL} + ${onnxruntime_URL2} + URL_HASH ${onnxruntime_HASH} +) + +FetchContent_GetProperties(onnxruntime) +if(NOT onnxruntime_POPULATED) + message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}") + FetchContent_Populate(onnxruntime) +endif() +message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}") + +find_library(location_onnxruntime onnxruntime + PATHS + "${onnxruntime_SOURCE_DIR}/lib" + NO_CMAKE_SYSTEM_PATH +) + +message(STATUS "location_onnxruntime: ${location_onnxruntime}") + +add_library(onnxruntime SHARED IMPORTED) + +set_target_properties(onnxruntime PROPERTIES + IMPORTED_LOCATION ${location_onnxruntime} + INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_SOURCE_DIR}/include" +) + +find_library(location_onnxruntime_rocm_lib onnxruntime_providers_rocm + PATHS + "${onnxruntime_SOURCE_DIR}/lib" + NO_CMAKE_SYSTEM_PATH +) + +add_library(onnxruntime_providers_rocm SHARED IMPORTED) +set_target_properties(onnxruntime_providers_rocm PROPERTIES + IMPORTED_LOCATION ${location_onnxruntime_rocm_lib} +) +message(STATUS "location_onnxruntime_rocm_lib: ${location_onnxruntime_rocm_lib}") + +# for libonnxruntime_providers_shared.so +find_library(location_onnxruntime_providers_shared_lib onnxruntime_providers_shared + PATHS + "${onnxruntime_SOURCE_DIR}/lib" + NO_CMAKE_SYSTEM_PATH +) +add_library(onnxruntime_providers_shared SHARED IMPORTED) +set_target_properties(onnxruntime_providers_shared PROPERTIES + IMPORTED_LOCATION ${location_onnxruntime_providers_shared_lib} +) +message(STATUS "location_onnxruntime_providers_shared_lib: ${location_onnxruntime_providers_shared_lib}") + +file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/libonnxruntime*") +message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}") +install(FILES ${onnxruntime_lib_files} DESTINATION lib) diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake index d1c4dc851f..fc746b1a0c 100644 --- a/cmake/onnxruntime.cmake +++ b/cmake/onnxruntime.cmake @@ -27,6 +27,8 @@ function(download_onnxruntime) elseif(CMAKE_SYSTEM_NAME STREQUAL Linux AND CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64) if(SHERPA_ONNX_ENABLE_GPU) include(onnxruntime-linux-x86_64-gpu) + elseif(SHERPA_ONNX_ENABLE_ROCM) + include(onnxruntime-linux-x86_64-rocm) elseif(BUILD_SHARED_LIBS) include(onnxruntime-linux-x86_64) else() diff --git a/sherpa-onnx/csrc/CMakeLists.txt b/sherpa-onnx/csrc/CMakeLists.txt index b6bda8ba9b..12ece567aa 100644 --- a/sherpa-onnx/csrc/CMakeLists.txt +++ b/sherpa-onnx/csrc/CMakeLists.txt @@ -190,6 +190,13 @@ if(SHERPA_ONNX_ENABLE_GPU) ) endif() +if(SHERPA_ONNX_ENABLE_ROCM) + target_link_libraries(sherpa-onnx-core + onnxruntime_providers_rocm + onnxruntime_providers_shared + ) +endif() + if(BUILD_SHARED_LIBS) target_link_libraries(sherpa-onnx-core onnxruntime) else() From 706f939ca217ac5f9099a973b08a59659fa8e4d5 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Thu, 11 Jul 2024 20:36:22 +0800 Subject: [PATCH 2/5] Add C++ code for ROCM execution provider --- sherpa-onnx/csrc/provider.cc | 2 ++ sherpa-onnx/csrc/provider.h | 3 +- sherpa-onnx/csrc/session.cc | 67 ++++++++++++++++++++++-------------- 3 files changed, 45 insertions(+), 27 deletions(-) diff --git a/sherpa-onnx/csrc/provider.cc b/sherpa-onnx/csrc/provider.cc index 19d5859765..80fc5a671e 100644 --- a/sherpa-onnx/csrc/provider.cc +++ b/sherpa-onnx/csrc/provider.cc @@ -26,6 +26,8 @@ Provider StringToProvider(std::string s) { return Provider::kNNAPI; } else if (s == "trt") { return Provider::kTRT; + } else if (s == "rocm") { + return Provider::kRocm; } else { SHERPA_ONNX_LOGE("Unsupported string: %s. Fallback to cpu", s.c_str()); return Provider::kCPU; diff --git a/sherpa-onnx/csrc/provider.h b/sherpa-onnx/csrc/provider.h index 712006f2b7..8f03e695e0 100644 --- a/sherpa-onnx/csrc/provider.h +++ b/sherpa-onnx/csrc/provider.h @@ -19,7 +19,8 @@ enum class Provider { kCoreML = 2, // CoreMLExecutionProvider kXnnpack = 3, // XnnpackExecutionProvider kNNAPI = 4, // NnapiExecutionProvider - kTRT = 5, // TensorRTExecutionProvider + kTRT = 5, // TensorRTExecutionProvider + kRocm = 6, // ROCMExecutionProvider }; /** diff --git a/sherpa-onnx/csrc/session.cc b/sherpa-onnx/csrc/session.cc index b6fdaaa84e..b303614809 100644 --- a/sherpa-onnx/csrc/session.cc +++ b/sherpa-onnx/csrc/session.cc @@ -31,8 +31,8 @@ static void OrtStatusFailure(OrtStatus *status, const char *s) { api.ReleaseStatus(status); } -static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads, - const std::string &provider_str, +static Ort::SessionOptions GetSessionOptionsImpl( + int32_t num_threads, const std::string &provider_str, const ProviderConfig *provider_config = nullptr) { Provider p = StringToProvider(provider_str); @@ -60,15 +60,32 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads, "XnnpackExecutionProvider") != available_providers.end()) { sess_opts.AppendExecutionProvider("XNNPACK"); } else { - SHERPA_ONNX_LOGE("Available providers: %s. Fallback to cpu!", - os.str().c_str()); + SHERPA_ONNX_LOGE( + "Available providers: %s. " + "XNNPACK is not supported. Fallback to cpu!", + os.str().c_str()); + } + break; + } + case Provider::kRocm: { + if (std::find(available_providers.begin(), available_providers.end(), + "ROCMExecutionProvider") != available_providers.end()) { + OrtROCMProviderOptions options; + options.device_id = 0; + sess_opts.AppendExecutionProvider_ROCM(options); + } else { + SHERPA_ONNX_LOGE( + "Available providers: %s." + "ROCM is not supported. Fallback to cpu!", + os.str().c_str()); } break; } case Provider::kTRT: { if (provider_config == nullptr) { - SHERPA_ONNX_LOGE("Tensorrt support for Online models ony," - "Must be extended for offline and others"); + SHERPA_ONNX_LOGE( + "TensorRT support is for Online models only," + "Must be extended for offline and others"); exit(1); } auto trt_config = provider_config->trt_config; @@ -84,30 +101,28 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads, std::to_string(trt_config.trt_max_partition_iterations); auto trt_min_subgraph_size = std::to_string(trt_config.trt_min_subgraph_size); - auto trt_fp16_enable = - std::to_string(trt_config.trt_fp16_enable); + auto trt_fp16_enable = std::to_string(trt_config.trt_fp16_enable); auto trt_detailed_build_log = std::to_string(trt_config.trt_detailed_build_log); auto trt_engine_cache_enable = std::to_string(trt_config.trt_engine_cache_enable); auto trt_timing_cache_enable = std::to_string(trt_config.trt_timing_cache_enable); - auto trt_dump_subgraphs = - std::to_string(trt_config.trt_dump_subgraphs); + auto trt_dump_subgraphs = std::to_string(trt_config.trt_dump_subgraphs); std::vector trt_options = { - {"device_id", device_id.c_str()}, - {"trt_max_workspace_size", trt_max_workspace_size.c_str()}, - {"trt_max_partition_iterations", trt_max_partition_iterations.c_str()}, - {"trt_min_subgraph_size", trt_min_subgraph_size.c_str()}, - {"trt_fp16_enable", trt_fp16_enable.c_str()}, - {"trt_detailed_build_log", trt_detailed_build_log.c_str()}, - {"trt_engine_cache_enable", trt_engine_cache_enable.c_str()}, - {"trt_engine_cache_path", trt_config.trt_engine_cache_path.c_str()}, - {"trt_timing_cache_enable", trt_timing_cache_enable.c_str()}, - {"trt_timing_cache_path", trt_config.trt_timing_cache_path.c_str()}, - {"trt_dump_subgraphs", trt_dump_subgraphs.c_str()} - }; + {"device_id", device_id.c_str()}, + {"trt_max_workspace_size", trt_max_workspace_size.c_str()}, + {"trt_max_partition_iterations", + trt_max_partition_iterations.c_str()}, + {"trt_min_subgraph_size", trt_min_subgraph_size.c_str()}, + {"trt_fp16_enable", trt_fp16_enable.c_str()}, + {"trt_detailed_build_log", trt_detailed_build_log.c_str()}, + {"trt_engine_cache_enable", trt_engine_cache_enable.c_str()}, + {"trt_engine_cache_path", trt_config.trt_engine_cache_path.c_str()}, + {"trt_timing_cache_enable", trt_timing_cache_enable.c_str()}, + {"trt_timing_cache_path", trt_config.trt_timing_cache_path.c_str()}, + {"trt_dump_subgraphs", trt_dump_subgraphs.c_str()}}; // ToDo : Trt configs // "trt_int8_enable" // "trt_int8_use_native_calibration_table" @@ -152,9 +167,8 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads, if (provider_config != nullptr) { options.device_id = provider_config->device; - options.cudnn_conv_algo_search = - OrtCudnnConvAlgoSearch(provider_config->cuda_config - .cudnn_conv_algo_search); + options.cudnn_conv_algo_search = OrtCudnnConvAlgoSearch( + provider_config->cuda_config.cudnn_conv_algo_search); } else { options.device_id = 0; // Default OrtCudnnConvAlgoSearchExhaustive is extremely slow @@ -220,7 +234,8 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads, Ort::SessionOptions GetSessionOptions(const OnlineModelConfig &config) { return GetSessionOptionsImpl(config.num_threads, - config.provider_config.provider, &config.provider_config); + config.provider_config.provider, + &config.provider_config); } Ort::SessionOptions GetSessionOptions(const OfflineModelConfig &config) { From 1fbce1a68257a1acccccd73428f7fb2d0c6faac5 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Thu, 11 Jul 2024 20:38:02 +0800 Subject: [PATCH 3/5] Add CI for ROCM --- .github/workflows/linux-rocm.yaml | 205 ++++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) create mode 100644 .github/workflows/linux-rocm.yaml diff --git a/.github/workflows/linux-rocm.yaml b/.github/workflows/linux-rocm.yaml new file mode 100644 index 0000000000..0e90340589 --- /dev/null +++ b/.github/workflows/linux-rocm.yaml @@ -0,0 +1,205 @@ +name: linux-rocm + +on: + push: + branches: + - master + - rocm + tags: + - 'v[0-9]+.[0-9]+.[0-9]+*' + paths: + - '.github/workflows/linux-rocm.yaml' + - '.github/scripts/test-online-transducer.sh' + - '.github/scripts/test-online-paraformer.sh' + - '.github/scripts/test-offline-transducer.sh' + - '.github/scripts/test-offline-ctc.sh' + - '.github/scripts/test-online-ctc.sh' + - '.github/scripts/test-offline-tts.sh' + - 'CMakeLists.txt' + - 'cmake/**' + - 'sherpa-onnx/csrc/*' + - 'sherpa-onnx/c-api/*' + - 'c-api-examples/**' + pull_request: + branches: + - master + paths: + - '.github/workflows/linux-rocm.yaml' + - '.github/scripts/test-online-transducer.sh' + - '.github/scripts/test-online-paraformer.sh' + - '.github/scripts/test-offline-transducer.sh' + - '.github/scripts/test-offline-ctc.sh' + - '.github/scripts/test-online-ctc.sh' + - '.github/scripts/test-online-ctc.sh' + - '.github/scripts/test-offline-tts.sh' + - 'CMakeLists.txt' + - 'cmake/**' + - 'sherpa-onnx/csrc/*' + - 'sherpa-onnx/c-api/*' + + workflow_dispatch: + +concurrency: + group: linux-rocm-${{ github.ref }} + cancel-in-progress: true + +jobs: + linux_rocm: + runs-on: ${{ matrix.os }} + name: ${{ matrix.build_type }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + # build_type: [Release, Debug] + build_type: [Release] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-${{ matrix.build_type }}-rocm + + - name: Configure CMake + shell: bash + run: | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cmake --version + + mkdir build + cd build + cmake \ + -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} \ + -D CMAKE_INSTALL_PREFIX=./install \ + -D BUILD_SHARED_LIBS=ON \ + -D SHERPA_ONNX_ENABLE_ROCM=ON \ + .. + + - name: Build sherpa-onnx for ubuntu + shell: bash + run: | + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + + cd build + make -j2 + make install + + ls -lh lib + ls -lh bin + + echo "----" + ls -lh install/lib + + echo "----" + ls -lh install/bin + + - name: Display dependencies of sherpa-onnx for linux + shell: bash + run: | + file build/bin/sherpa-onnx + readelf -d build/bin/sherpa-onnx + echo "----" + ldd build/bin/sherpa-onnx + + - name: Test spoken language identification + shell: bash + run: | + export PATH=$PWD/build/bin:$PATH + export EXE=sherpa-onnx-offline-language-identification + + .github/scripts/test-spoken-language-identification.sh + + - name: Test online CTC + shell: bash + run: | + export PATH=$PWD/build/bin:$PATH + export EXE=sherpa-onnx + + .github/scripts/test-online-ctc.sh + + - name: Test offline TTS + shell: bash + run: | + export PATH=$PWD/build/bin:$PATH + export EXE=sherpa-onnx-offline-tts + + .github/scripts/test-offline-tts.sh + + - name: Test online paraformer + shell: bash + run: | + export PATH=$PWD/build/bin:$PATH + export EXE=sherpa-onnx + + .github/scripts/test-online-paraformer.sh + + + - name: Test offline Whisper + shell: bash + run: | + export PATH=$PWD/build/bin:$PATH + export EXE=sherpa-onnx-offline + + .github/scripts/test-offline-whisper.sh + + - name: Test offline CTC + shell: bash + run: | + export PATH=$PWD/build/bin:$PATH + export EXE=sherpa-onnx-offline + + .github/scripts/test-offline-ctc.sh + + - name: Test offline transducer + shell: bash + run: | + export PATH=$PWD/build/bin:$PATH + export EXE=sherpa-onnx-offline + + .github/scripts/test-offline-transducer.sh + + - name: Test online transducer + shell: bash + run: | + export PATH=$PWD/build/bin:$PATH + export EXE=sherpa-onnx + + .github/scripts/test-online-transducer.sh + + - name: Test online transducer (C API) + shell: bash + run: | + export PATH=$PWD/build/bin:$PATH + export EXE=decode-file-c-api + + .github/scripts/test-online-transducer.sh + + - name: Copy files + if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/') + shell: bash + run: | + SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + + dst=sherpa-onnx-${SHERPA_ONNX_VERSION}-linux-x64-rocm + mkdir $dst + + cp -a build/install/bin $dst/ + cp -a build/install/lib $dst/ + cp -a build/install/include $dst/ + + tree $dst + + tar cjvf ${dst}.tar.bz2 $dst + + - name: Release pre-compiled binaries and libs for linux x64 + if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/') + uses: svenstaro/upload-release-action@v2 + with: + file_glob: true + overwrite: true + file: sherpa-onnx-*linux-x64-rocm.tar.bz2 From 1a58ae6421f08f32e5ede93a45243bc7b1b4ead5 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Thu, 11 Jul 2024 20:40:22 +0800 Subject: [PATCH 4/5] fix typos --- CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index aad04ef66b..2bec3ab233 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -94,7 +94,8 @@ Hint: You don't need sudo permission to install CUDA toolkit. Please refer to https://k2-fsa.github.io/k2/installation/cuda-cudnn.html to install CUDA toolkit if you have not installed it.") if(NOT BUILD_SHARED_LIBS) - message(STATUS "Set BUILD_SHARED_LIBS to ON since SHERPA_ONNX_ENABLE_GPU is ON") set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE) + message(STATUS "Set BUILD_SHARED_LIBS to ON since SHERPA_ONNX_ENABLE_GPU is ON") + set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE) endif() endif() From 5650413b7e50d3b1ce300bc45c439600b094e47c Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Thu, 11 Jul 2024 21:29:53 +0800 Subject: [PATCH 5/5] fix style issues --- sherpa-onnx/csrc/provider-config.cc | 59 ++++++++++++++--------------- 1 file changed, 28 insertions(+), 31 deletions(-) diff --git a/sherpa-onnx/csrc/provider-config.cc b/sherpa-onnx/csrc/provider-config.cc index 3c8f0ee473..1db62aa6bc 100644 --- a/sherpa-onnx/csrc/provider-config.cc +++ b/sherpa-onnx/csrc/provider-config.cc @@ -13,14 +13,15 @@ namespace sherpa_onnx { void CudaConfig::Register(ParseOptions *po) { po->Register("cuda-cudnn-conv-algo-search", &cudnn_conv_algo_search, - "CuDNN convolution algrorithm search"); + "CuDNN convolution algrorithm search"); } bool CudaConfig::Validate() const { if (cudnn_conv_algo_search < 1 || cudnn_conv_algo_search > 3) { - SHERPA_ONNX_LOGE("cudnn_conv_algo_search: '%d' is not a valid option." - "Options : [1,3]. Check OnnxRT docs", - cudnn_conv_algo_search); + SHERPA_ONNX_LOGE( + "cudnn_conv_algo_search: '%d' is not a valid option." + "Options : [1,3]. Check OnnxRT docs", + cudnn_conv_algo_search); return false; } return true; @@ -37,41 +38,41 @@ std::string CudaConfig::ToString() const { void TensorrtConfig::Register(ParseOptions *po) { po->Register("trt-max-workspace-size", &trt_max_workspace_size, - "Set TensorRT EP GPU memory usage limit."); + "Set TensorRT EP GPU memory usage limit."); po->Register("trt-max-partition-iterations", &trt_max_partition_iterations, - "Limit partitioning iterations for model conversion."); + "Limit partitioning iterations for model conversion."); po->Register("trt-min-subgraph-size", &trt_min_subgraph_size, - "Set minimum size for subgraphs in partitioning."); + "Set minimum size for subgraphs in partitioning."); po->Register("trt-fp16-enable", &trt_fp16_enable, - "Enable FP16 precision for faster performance."); + "Enable FP16 precision for faster performance."); po->Register("trt-detailed-build-log", &trt_detailed_build_log, - "Enable detailed logging of build steps."); + "Enable detailed logging of build steps."); po->Register("trt-engine-cache-enable", &trt_engine_cache_enable, - "Enable caching of TensorRT engines."); + "Enable caching of TensorRT engines."); po->Register("trt-timing-cache-enable", &trt_timing_cache_enable, - "Enable use of timing cache to speed up builds."); + "Enable use of timing cache to speed up builds."); po->Register("trt-engine-cache-path", &trt_engine_cache_path, - "Set path to store cached TensorRT engines."); + "Set path to store cached TensorRT engines."); po->Register("trt-timing-cache-path", &trt_timing_cache_path, - "Set path for storing timing cache."); + "Set path for storing timing cache."); po->Register("trt-dump-subgraphs", &trt_dump_subgraphs, - "Dump optimized subgraphs for debugging."); + "Dump optimized subgraphs for debugging."); } bool TensorrtConfig::Validate() const { if (trt_max_workspace_size < 0) { - SHERPA_ONNX_LOGE("trt_max_workspace_size: %lld is not valid.", - trt_max_workspace_size); + SHERPA_ONNX_LOGE("trt_max_workspace_size: %ld is not valid.", + trt_max_workspace_size); return false; } if (trt_max_partition_iterations < 0) { SHERPA_ONNX_LOGE("trt_max_partition_iterations: %d is not valid.", - trt_max_partition_iterations); + trt_max_partition_iterations); return false; } if (trt_min_subgraph_size < 0) { SHERPA_ONNX_LOGE("trt_min_subgraph_size: %d is not valid.", - trt_min_subgraph_size); + trt_min_subgraph_size); return false; } @@ -83,23 +84,19 @@ std::string TensorrtConfig::ToString() const { os << "TensorrtConfig("; os << "trt_max_workspace_size=" << trt_max_workspace_size << ", "; - os << "trt_max_partition_iterations=" - << trt_max_partition_iterations << ", "; + os << "trt_max_partition_iterations=" << trt_max_partition_iterations << ", "; os << "trt_min_subgraph_size=" << trt_min_subgraph_size << ", "; - os << "trt_fp16_enable=\"" - << (trt_fp16_enable? "True" : "False") << "\", "; + os << "trt_fp16_enable=\"" << (trt_fp16_enable ? "True" : "False") << "\", "; os << "trt_detailed_build_log=\"" - << (trt_detailed_build_log? "True" : "False") << "\", "; + << (trt_detailed_build_log ? "True" : "False") << "\", "; os << "trt_engine_cache_enable=\"" - << (trt_engine_cache_enable? "True" : "False") << "\", "; - os << "trt_engine_cache_path=\"" - << trt_engine_cache_path.c_str() << "\", "; + << (trt_engine_cache_enable ? "True" : "False") << "\", "; + os << "trt_engine_cache_path=\"" << trt_engine_cache_path.c_str() << "\", "; os << "trt_timing_cache_enable=\"" - << (trt_timing_cache_enable? "True" : "False") << "\", "; - os << "trt_timing_cache_path=\"" - << trt_timing_cache_path.c_str() << "\","; - os << "trt_dump_subgraphs=\"" - << (trt_dump_subgraphs? "True" : "False") << "\" )"; + << (trt_timing_cache_enable ? "True" : "False") << "\", "; + os << "trt_timing_cache_path=\"" << trt_timing_cache_path.c_str() << "\","; + os << "trt_dump_subgraphs=\"" << (trt_dump_subgraphs ? "True" : "False") + << "\" )"; return os.str(); }