Merge pull request #189 from asappresearch/torchscript_gpu_v2.5

GPU inference in C++ & Torchscript
asappresearch · May 19, 2021 · 9ddc8da · 9ddc8da
2 parents a698784 + 4eefcc0
commit 9ddc8da
Show file tree

Hide file tree

Showing 3 changed files with 46 additions and 14 deletions.
diff --git a/sru/csrc/CMakeLists.txt b/sru/csrc/CMakeLists.txt
@@ -1,21 +1,37 @@
 cmake_minimum_required(VERSION 3.1 FATAL_ERROR)
-project(foo)
+project(sru_cpp_example)
 
 find_package(Torch REQUIRED)
 
-# Define our library target
+# Define sru_cpu and link against LibTorch
 add_library(sru_cpu SHARED sru_cpu_impl.cpp)
-# Enable C++14
 target_compile_features(sru_cpu PRIVATE cxx_std_14)
-# Link against LibTorch
 target_link_libraries(sru_cpu "${TORCH_LIBRARIES}")
 
-# Define our library target
-add_library(sru_cuda SHARED sru_cuda_impl_dummy.cpp)
-# Enable C++14
-target_compile_features(sru_cuda PRIVATE cxx_std_14)
-# Link against LibTorch
-target_link_libraries(sru_cuda "${TORCH_LIBRARIES}")
+message(STATUS "  CMAKE_PREFIX_PATH: ${CMAKE_PREFIX_PATH}")
+message(STATUS "  TORCH LIBS: ${TORCH_LIBRARIES}")
+
+# Define sru_gpu and link against LibTorch
+find_package(CUDA)
+if (CUDA_FOUND)
+    # When libtorch is built with the old GCC ABI, dependent libraries must too.
+    if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
+        if(DEFINED GLIBCXX_USE_CXX11_ABI)
+            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}" )
+        endif()
+    endif()
+    message(STATUS "  GLIBCXX_USE_CXX11_ABI: ${GLIBCXX_USE_CXX11_ABI}")
+    message(STATUS "  CXXFLAGS: ${CMAKE_CXX_FLAGS}")
+
+    CUDA_ADD_LIBRARY(sru_cuda SHARED sru_cuda_kernel.cu sru_cuda_impl.cpp)
+    target_compile_features(sru_cuda PRIVATE cxx_std_14)
+    target_link_libraries(sru_cuda "${TORCH_LIBRARIES}")
+else()
+    message(STATUS "CUDA not found. Use dummy SRU_CUDA implementation.")
+    add_library(sru_cuda SHARED sru_cuda_impl_dummy.cpp)
+    target_compile_features(sru_cuda PRIVATE cxx_std_14)
+    target_link_libraries(sru_cuda "${TORCH_LIBRARIES}")
+endif()
 
 add_executable(example_app main_test_cpp.cpp)
 target_link_libraries(example_app "${TORCH_LIBRARIES}")

diff --git a/sru/csrc/README.md b/sru/csrc/README.md
@@ -13,7 +13,7 @@ From the `csrc` directory, compile the code using `cmake`:
 ```
 $ mkdir build
 $ cd build 
-$ cmake -DCMAKE_PREFIX_PATH="$(python -c 'import torch.utils; print(torch.utils.cmake_prefix_path)')" ..
+$ cmake -DCMAKE_PREFIX_PATH="$(python -c 'import torch.utils; print(torch.utils.cmake_prefix_path)')" -DGLIBCXX_USE_CXX11_ABI="$(python -c 'import torch; print(1 if torch._C._GLIBCXX_USE_CXX11_ABI else 0)')" ..
 $ make -j
 ```
 Note that `torch.utils.cmake_prefix_path` is not available in earlier versions of pytorch such as
@@ -24,6 +24,7 @@ After compilation, you should be able to see and run `example_app` binary, which
 torchscript SRU model as input:
 ```
 $ ./example_app <path to an exported torchscript SRU model>
+$ ./example_app <path to an exported torchscript SRU model> cuda
 ```
 
 ## Save and load a torchscript SRU model
@@ -40,4 +41,5 @@ torchscript_model.save("example_model.pt")
 Test loading the model:
 ```
 $ ./example_app <path to the model>/example_model.pt
-```
+$ ./example_app <path to the model>/example_model.pt cuda
+```
diff --git a/sru/csrc/main_test_cpp.cpp b/sru/csrc/main_test_cpp.cpp
@@ -6,20 +6,34 @@
 #include <typeinfo>
 
 int main(int argc, const char* argv[]) {
-  if (argc != 2) {
+  if ((argc != 2) && (argc != 3)) {
     std::cerr << "usage: example-app <path-to-exported-script-module>\n";
+    std::cerr << "usage: example-app <path-to-exported-script-module> cuda\n";
     return -1;
   }
 
+  bool use_cuda = (argc == 3) && (strcmp(argv[2], "cuda") == 0);
+
   // Deserialize the ScriptModule from a file using torch::jit::load().
   torch::jit::script::Module module = torch::jit::load(argv[1]);
+  if (use_cuda) {
+    module.to(torch::kCUDA);
+  }
+  else {
+    module.to(torch::kCPU);
+  }
 
   auto num_layers = module.attr("num_layers").toInt();
   auto input_size = module.attr("input_size").toInt();
   auto hidden_size = module.attr("hidden_size").toInt();
 
   std::vector<torch::jit::IValue> inputs;
-  inputs.push_back(torch::ones({3, 2, input_size}));
+  if (use_cuda) {
+    inputs.push_back(torch::ones({3, 2, input_size}).to(torch::kCUDA));
+  }
+  else {
+    inputs.push_back(torch::ones({3, 2, input_size}));
+  }
   auto outputs = module.forward(std::move(inputs));
   auto h = outputs.toTuple()->elements()[0].toTensor();
   auto c = outputs.toTuple()->elements()[1].toTensor();