Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,6 @@
[submodule "third_party/libnop"]
path = third_party/libnop
url = https://github.com/google/libnop.git
[submodule "third_party/hipify"]
path = third_party/hipify
url = https://github.com/ROCmSoftwarePlatform/hipify-torch.git
10 changes: 10 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,16 @@ include(Sanitize)
# Misc checks to cope with various compiler modes.
include(MiscCheck)

# ROCm related
if (TP_USE_ROCM)
include(Hip)
if(TP_HAVE_HIP)
include(Hipify)
else()
message(FATAL_ERROR "Not able to find HIP installation, so cant compile with ROCm support.")
endif()
endif()

add_subdirectory(tensorpipe)

install(EXPORT TensorpipeTargets
Expand Down
162 changes: 162 additions & 0 deletions cmake/Hip.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
set(TP_HAVE_HIP FALSE)

IF(NOT DEFINED ENV{ROCM_PATH})
SET(ROCM_PATH /opt/rocm)
ELSE()
SET(ROCM_PATH $ENV{ROCM_PATH})
ENDIF()

# HIP_PATH
IF(NOT DEFINED ENV{HIP_PATH})
SET(HIP_PATH ${ROCM_PATH}/hip)
ELSE()
SET(HIP_PATH $ENV{HIP_PATH})
ENDIF()

IF(NOT EXISTS ${HIP_PATH})
return()
ENDIF()

# HCC_PATH
IF(NOT DEFINED ENV{HCC_PATH})
SET(HCC_PATH ${ROCM_PATH}/hcc)
ELSE()
SET(HCC_PATH $ENV{HCC_PATH})
ENDIF()

# HSA_PATH
IF(NOT DEFINED ENV{HSA_PATH})
SET(HSA_PATH ${ROCM_PATH}/hsa)
ELSE()
SET(HSA_PATH $ENV{HSA_PATH})
ENDIF()

# ROCBLAS_PATH
IF(NOT DEFINED ENV{ROCBLAS_PATH})
SET(ROCBLAS_PATH ${ROCM_PATH}/rocblas)
ELSE()
SET(ROCBLAS_PATH $ENV{ROCBLAS_PATH})
ENDIF()

# ROCSPARSE_PATH
IF(NOT DEFINED ENV{ROCSPARSE_PATH})
SET(ROCSPARSE_PATH ${ROCM_PATH}/rocsparse)
ELSE()
SET(ROCSPARSE_PATH $ENV{ROCSPARSE_PATH})
ENDIF()

# ROCFFT_PATH
IF(NOT DEFINED ENV{ROCFFT_PATH})
SET(ROCFFT_PATH ${ROCM_PATH}/rocfft)
ELSE()
SET(ROCFFT_PATH $ENV{ROCFFT_PATH})
ENDIF()

# HIPSPARSE_PATH
IF(NOT DEFINED ENV{HIPSPARSE_PATH})
SET(HIPSPARSE_PATH ${ROCM_PATH}/hipsparse)
ELSE()
SET(HIPSPARSE_PATH $ENV{HIPSPARSE_PATH})
ENDIF()

# THRUST_PATH
IF(DEFINED ENV{THRUST_PATH})
SET(THRUST_PATH $ENV{THRUST_PATH})
ELSE()
SET(THRUST_PATH ${ROCM_PATH}/include)
ENDIF()

# HIPRAND_PATH
IF(NOT DEFINED ENV{HIPRAND_PATH})
SET(HIPRAND_PATH ${ROCM_PATH}/hiprand)
ELSE()
SET(HIPRAND_PATH $ENV{HIPRAND_PATH})
ENDIF()

# ROCRAND_PATH
IF(NOT DEFINED ENV{ROCRAND_PATH})
SET(ROCRAND_PATH ${ROCM_PATH}/rocrand)
ELSE()
SET(ROCRAND_PATH $ENV{ROCRAND_PATH})
ENDIF()

# MIOPEN_PATH
IF(NOT DEFINED ENV{MIOPEN_PATH})
SET(MIOPEN_PATH ${ROCM_PATH}/miopen)
ELSE()
SET(MIOPEN_PATH $ENV{MIOPEN_PATH})
ENDIF()

IF(NOT DEFINED ENV{TP_ROCM_ARCH})
SET(TP_ROCM_ARCH gfx900;gfx906;gfx908)
ELSE()
SET(TP_ROCM_ARCH $ENV{TP_ROCM_ARCH})
ENDIF()

# Add HIP to the CMAKE Module Path
set(CMAKE_MODULE_PATH ${HIP_PATH}/cmake ${CMAKE_MODULE_PATH})

# Disable Asserts In Code (Can't use asserts on HIP stack.)
ADD_DEFINITIONS(-DNDEBUG)

# Find the HIP Package
find_package(HIP 1.0)

IF(HIP_FOUND)
set(TP_HAVE_HIP TRUE)

if(HIP_COMPILER STREQUAL clang)
set(hip_library_name amdhip64)
else()
set(hip_library_name hip_hcc)
endif()
message("HIP library name: ${hip_library_name}")

set(CMAKE_HCC_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG})
set(CMAKE_HCC_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE})
FIND_LIBRARY(TP_HIP_HCC_LIBRARIES ${hip_library_name} HINTS ${HIP_PATH}/lib)

list(APPEND HIP_CXX_FLAGS -fPIC)
list(APPEND HIP_CXX_FLAGS -D__HIP_PLATFORM_HCC__=1)
list(APPEND HIP_CXX_FLAGS -D__HIP_NO_HALF_OPERATORS__=1)
list(APPEND HIP_CXX_FLAGS -D__HIP_NO_HALF_CONVERSIONS__=1)
list(APPEND HIP_CXX_FLAGS -DHIP_VERSION=${HIP_VERSION_MAJOR})
list(APPEND HIP_CXX_FLAGS -Wno-macro-redefined)
list(APPEND HIP_CXX_FLAGS -Wno-inconsistent-missing-override)
list(APPEND HIP_CXX_FLAGS -Wno-exceptions)
list(APPEND HIP_CXX_FLAGS -Wno-shift-count-negative)
list(APPEND HIP_CXX_FLAGS -Wno-shift-count-overflow)
list(APPEND HIP_CXX_FLAGS -Wno-unused-command-line-argument)
list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier)

set(HIP_CLANG_FLAGS ${HIP_CXX_FLAGS})
# Ask hcc to generate device code during compilation so we can use
# host linker to link.
list(APPEND HIP_CLANG_FLAGS -fno-gpu-rdc)
list(APPEND HIP_CLANG_FLAGS -Wno-defaulted-function-deleted)
foreach(tp_rocm_arch ${TP_ROCM_ARCH})
list(APPEND HIP_CLANG_FLAGS --amdgpu-target=${tp_rocm_arch})
endforeach()

set(hip_DIR ${HIP_PATH}/lib/cmake/hip)
set(hsa-runtime64_DIR ${ROCM_PATH}/lib/cmake/hsa-runtime64)
set(AMDDeviceLibs_DIR ${ROCM_PATH}/lib/cmake/AMDDeviceLibs)
set(amd_comgr_DIR ${ROCM_PATH}/lib/cmake/amd_comgr)
set(rocrand_DIR ${ROCRAND_PATH}/lib/cmake/rocrand)
set(hiprand_DIR ${HIPRAND_PATH}/lib/cmake/hiprand)
set(rocblas_DIR ${ROCBLAS_PATH}/lib/cmake/rocblas)
set(miopen_DIR ${MIOPEN_PATH}/lib/cmake/miopen)
set(rocfft_DIR ${ROCFFT_PATH}/lib/cmake/rocfft)
set(hipfft_DIR ${HIPFFT_PATH}/lib/cmake/hipfft)
set(hipsparse_DIR ${HIPSPARSE_PATH}/lib/cmake/hipsparse)
set(rccl_DIR ${RCCL_PATH}/lib/cmake/rccl)
set(rocprim_DIR ${ROCPRIM_PATH}/lib/cmake/rocprim)
set(hipcub_DIR ${HIPCUB_PATH}/lib/cmake/hipcub)
set(rocthrust_DIR ${ROCTHRUST_PATH}/lib/cmake/rocthrust)
set(ROCclr_DIR ${ROCM_PATH}/rocclr/lib/cmake/rocclr)

find_package(hip REQUIRED)

set(TP_HIP_INCLUDE ${ROCM_PATH}/include ${TP_HIP_INCLUDE})
set(TP_HIP_INCLUDE ${hip_INCLUDE_DIRS} $<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}> $<INSTALL_INTERFACE:include> ${TP_HIP_INCLUDE})

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC, the only reason for this entire file to exist is to be able to provide ${hip_INCLUDE_DIRS}? If so, why don't we just use ${HIP_PATH}/include? It'd reduce a lot of the seemingly-unrelated code here. @jeffdaily for comment

ENDIF()
62 changes: 62 additions & 0 deletions cmake/Hipify.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Copyright (c) Facebook, Inc. and its affiliates.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These functions seem generic enough to be valuable as a part of hipify-torch itself, since any CMake-based hipify flow would likely need these functions. Can we move them to a CMake file in hipify-torch and include that CMake file here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes it is moved to hipify-torch repo

# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# cmake file to trigger hipify

function(write_file_list FILE_SUFFIX INPUT_LIST)
message(STATUS "Writing ${FILE_SUFFIX} into file - file_${FILE_SUFFIX}.txt")
set(_FULL_FILE_NAME "${CMAKE_BINARY_DIR}/cuda_to_hip_list_${FILE_SUFFIX}.txt")
file(WRITE ${_FULL_FILE_NAME} "")
foreach(_SOURCE_FILE ${INPUT_LIST})
file(APPEND ${_FULL_FILE_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/${_SOURCE_FILE})
file(APPEND ${_FULL_FILE_NAME} "\n")
endforeach()
endfunction()

function(get_file_list FILE_SUFFIX OUTPUT_LIST)
set(_FULL_FILE_NAME "${CMAKE_BINARY_DIR}/cuda_to_hip_list_${FILE_SUFFIX}.txt")
file(STRINGS ${_FULL_FILE_NAME} _FILE_LIST)
set(${OUTPUT_LIST}_HIP ${_FILE_LIST} PARENT_SCOPE)
endfunction()

function(update_list_with_hip_files FILE_SUFFIX)
set(_SCRIPTS_DIR ${PROJECT_SOURCE_DIR}/tools/amd_build)
set(_FULL_FILE_NAME "${CMAKE_BINARY_DIR}/cuda_to_hip_list_${FILE_SUFFIX}.txt")
set(_EXE_COMMAND
${_SCRIPTS_DIR}/replace_cuda_with_hip_files.py
--io-file ${_FULL_FILE_NAME}
--dump-dict-directory ${CMAKE_BINARY_DIR})
execute_process(
COMMAND ${_EXE_COMMAND}
RESULT_VARIABLE _return_value)
if (NOT _return_value EQUAL 0)
message(FATAL_ERROR "Failed to get the list of hipified files!")
endif()
endfunction()

function(get_hipified_list FILE_SUFFIX INPUT_LIST OUTPUT_LIST)
write_file_list("${FILE_SUFFIX}" "${INPUT_LIST}")
update_list_with_hip_files("${FILE_SUFFIX}")
get_file_list("${FILE_SUFFIX}" __temp_srcs)
set(${OUTPUT_LIST} ${__temp_srcs_HIP} PARENT_SCOPE)
endfunction()


set(HIPIFY_SCRIPTS_DIR ${PROJECT_SOURCE_DIR}/tools/amd_build)
set(HIPIFY_COMMAND
${HIPIFY_SCRIPTS_DIR}/build_amd.py
--project-directory ${PROJECT_SOURCE_DIR}
--output-directory ${PROJECT_SOURCE_DIR}
--dump-dict-directory ${CMAKE_BINARY_DIR}
)

execute_process(
COMMAND ${HIPIFY_COMMAND}
RESULT_VARIABLE hipify_return_value
)
if (NOT hipify_return_value EQUAL 0)
message(FATAL_ERROR "Failed to hipify files!")
endif()
6 changes: 6 additions & 0 deletions cmake/Options.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ endmacro()

# TODO: Default to ON if CUDA available.
option(TP_USE_CUDA "Enable support for CUDA tensors" OFF)
option(TP_USE_ROCM "Enable support for ROCM tensors" OFF)

# if both TP_USE_CUDA and TP_USE_ROCM is set then break
if(TP_USE_CUDA AND TP_USE_ROCM)
message(FATAL_ERROR "Tensorpipe can be built either for CUDA or ROCm, TP_USE_CUDA and TP_USE_ROCM both are set, erroring out!!!!")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested modification:

"TensorPipe does not support building for CUDA and ROCM at the same time. Please unset either TP_USE_CUDA or TP_USE_ROCM."

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

endif()

# Optional features
option(TP_BUILD_BENCHMARK "Build benchmarks" OFF)
Expand Down
60 changes: 41 additions & 19 deletions tensorpipe/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ list(APPEND TP_SRCS
list(APPEND TP_PUBLIC_HDRS
channel/mpt/factory.h)


## Transports

### uv
Expand Down Expand Up @@ -219,9 +220,9 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/config.h
DESTINATION ${TP_INSTALL_INCLUDEDIR}/tensorpipe)


## CUDA
## CUDA AND ROCM

if(TP_USE_CUDA)
if(TP_USE_CUDA OR TP_USE_ROCM)
# TP_SRCS is the list of source files that we need to build libtensorpipe.
set(TP_CUDA_SRCS)

Expand All @@ -234,9 +235,17 @@ if(TP_USE_CUDA)
# TP_INCLUDE_DIRS is list of include path to be used
set(TP_CUDA_INCLUDE_DIRS)

find_package(CUDA REQUIRED)
list(APPEND TP_CUDA_LINK_LIBRARIES ${CUDA_LIBRARIES})
list(APPEND TP_CUDA_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS})
if (TP_USE_CUDA)
set(TP_GPU_LIB_NAME "tensorpipe_cuda")
find_package(CUDA REQUIRED)
list(APPEND TP_CUDA_LINK_LIBRARIES ${CUDA_LIBRARIES})
list(APPEND TP_CUDA_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS})
elseif (TP_USE_ROCM)
set(TP_GPU_LIB_NAME "tensorpipe_hip")
# Finding of HIP package is already before hipifying the files
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Curious, any reason not looking for packages here as existing code did for CUDA?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

list(APPEND TP_CUDA_LINK_LIBRARIES ${TP_HIP_HCC_LIBRARIES})
list(APPEND TP_CUDA_INCLUDE_DIRS ${TP_HIP_INCLUDE})
Comment on lines +249 to +250
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regarding the naming, any reason they don't follow the CUDA ones, i.e., HIP_LIBRARIES and HIP_INCLUDE_DIRS?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let me check with the HIP team, if there is any particular reason for keeping this name, i.e., hip_INCLUDE_DIRS or different with CUDA, and get back if any reason.

endif()

list(APPEND TP_CUDA_SRCS
common/cuda_buffer.cc)
Expand All @@ -246,12 +255,16 @@ if(TP_USE_CUDA)

### cuda_xth

list(APPEND TP_CUDA_SRCS
channel/cuda_xth/channel_impl.cc
channel/cuda_xth/context_impl.cc
channel/cuda_xth/factory.cc)
list(APPEND TP_CUDA_PUBLIC_HDRS
channel/cuda_xth/factory.h)
tp_conditional_backend(

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Can we use uppercase for this macro everywhere?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated to uppercase.
@lw
Please let me know if it is breaking any convention followed in tensorpipe. I checked the pyTorch code for any hint, but there is no convention followed there.

TP_ENABLE_HIP_XTH "Enable HIP XTH communication channel" "TP_USE_ROCM")
if(TP_ENABLE_HIP_XTH OR TP_USE_CUDA)
list(APPEND TP_CUDA_SRCS
channel/cuda_xth/channel_impl.cc
channel/cuda_xth/context_impl.cc
channel/cuda_xth/factory.cc)
list(APPEND TP_CUDA_PUBLIC_HDRS
channel/cuda_xth/factory.h)
endif()

### cuda_basic

Expand All @@ -267,7 +280,9 @@ if(TP_USE_CUDA)

tp_conditional_backend(
TP_ENABLE_CUDA_IPC "Enable CUDA inter-process communication channel" "TP_USE_CUDA")
if(TP_ENABLE_CUDA_IPC)
tp_conditional_backend(
TP_ENABLE_HIP_IPC "Enable HIP inter-process communication channel" "TP_USE_ROCM")
if(TP_ENABLE_CUDA_IPC OR TP_ENABLE_HIP_IPC)
list(APPEND TP_CUDA_SRCS
channel/cuda_ipc/channel_impl.cc
channel/cuda_ipc/context_impl.cc
Expand All @@ -281,7 +296,9 @@ if(TP_USE_CUDA)

tp_conditional_backend(
TP_ENABLE_CUDA_GDR "Enable CUDA GpuDirect (InfiniBand) channel" "LINUX")
if(TP_ENABLE_CUDA_GDR)
tp_conditional_backend(
TP_ENABLE_HIP_GDR "Enable HIP GpuDirect (InfiniBand) channel" "LINUX")
if((TP_ENABLE_CUDA_GDR AND TP_USE_CUDA) OR (TP_ENABLE_HIP_GDR AND TP_USE_ROCM))
list(APPEND TP_CUDA_SRCS
common/ibv.cc
channel/cuda_gdr/channel_impl.cc
Expand All @@ -293,19 +310,24 @@ if(TP_USE_CUDA)
set(TENSORPIPE_HAS_CUDA_GDR_CHANNEL 1)
endif()

if(TP_USE_ROCM)
get_hipified_list("TP_CUDA_SRCS" "${TP_CUDA_SRCS}" TP_CUDA_SRCS)
get_hipified_list("TP_CUDA_PUBLIC_HDRS" "${TP_CUDA_PUBLIC_HDRS}" TP_CUDA_PUBLIC_HDRS)
endif()

configure_file(config_cuda.h.in config_cuda.h)

add_library(tensorpipe_cuda ${TP_STATIC_OR_SHARED} ${TP_CUDA_SRCS})
add_library(${TP_GPU_LIB_NAME} ${TP_STATIC_OR_SHARED} ${TP_CUDA_SRCS})

if(BUILD_SHARED_LIBS)
set_target_properties(tensorpipe_cuda PROPERTIES POSITION_INDEPENDENT_CODE 1)
set_target_properties(${TP_GPU_LIB_NAME} PROPERTIES POSITION_INDEPENDENT_CODE 1)
endif()

target_link_libraries(tensorpipe_cuda PUBLIC tensorpipe)
target_link_libraries(tensorpipe_cuda PRIVATE ${TP_CUDA_LINK_LIBRARIES})
target_include_directories(tensorpipe_cuda PUBLIC ${TP_CUDA_INCLUDE_DIRS})
target_link_libraries(${TP_GPU_LIB_NAME} PUBLIC tensorpipe)
target_link_libraries(${TP_GPU_LIB_NAME} PRIVATE ${TP_CUDA_LINK_LIBRARIES})
target_include_directories(${TP_GPU_LIB_NAME} PUBLIC ${TP_CUDA_INCLUDE_DIRS})

install(TARGETS tensorpipe_cuda
install(TARGETS ${TP_GPU_LIB_NAME}
EXPORT TensorpipeTargets
LIBRARY DESTINATION ${TP_INSTALL_LIBDIR}
ARCHIVE DESTINATION ${TP_INSTALL_LIBDIR})
Expand Down
Loading