Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 13 additions & 14 deletions .superci/armory.yml → .superci/galapagos.mi210.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,10 @@ steps:
sbatch_options:
- "--account=fluidnumerics"
- "--gres=gpu:mi210:2"
- "--ntasks=6"
- "--cpus-per-task=2"
- "--ntasks=2"
- "--cpus-per-task=16"
- "--time=40:00"
prerequisites:
- "source /etc/profile.d/z11_lmod.sh"
prerequisites: []
modules:
- cmake/3.31.2
- gcc/12.4.0
Expand All @@ -17,36 +16,36 @@ steps:
- feq-parse/2.2.2
env:
BUILD_DIR: ${WORKSPACE}/build
PREFIX: ${WORKSPACE}/opt/self
PREFIX: ${WORKSPACE}/install
OUTDIR: ${WORKSPACE}/local
GPU_ARCH: gfx90a
BUILD_TYPE: coverage
BUILD_TYPE: release
ENABLE_GPU: ON
ENABLE_DOUBLE_PRECISION: ON
ENABLE_MULTITHREADING: OFF
ENABLE_TESTING: ON
ENABLE_EXAMPLES: ON
NTHREADS: 4
GCOV: gcov-12
GCOV: gcov
commands:
- |
set -e
mkdir -p ${BUILD_DIR}
mkdir -p ${OUTDIR}
cd ${WORKSPACE}/build
FC=gfortran \
CXX=hipcc \
cd ${BUILD_DIR}
cmake -G Ninja \
-DCMAKE_PREFIX_PATH=${ROCM_PATH} \
-DCMAKE_INSTALL_PREFIX=${PREFIX} \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DSELF_ENABLE_GPU=${ENABLE_GPU} \
-DSELF_ENABLE_MULTITHREADING=${ENABLE_MULTITHREADING} \
-DSELF_MULTITHREADING_NTHREADS=${NTHREADS} \
-DSELF_ENABLE_DOUBLE_PRECISION=${ENABLE_DOUBLE_PRECISION} \
-DAMDGPU_TARGETS=${GPU_ARCH} \
-DCMAKE_HIP_ARCHITECTURES=${GPU_ARCH} \
-DSELF_ENABLE_EXAMPLES=${ENABLE_EXAMPLES} \
-DSELF_ENABLE_TESTING=${ENABLE_TESTING} \
../
ninja


# Initialize coverage
if [ "$BUILD_TYPE" = "coverage" ]; then
lcov --capture \
Expand Down Expand Up @@ -79,7 +78,7 @@ steps:
--sha "${COMMIT_SHA}" \
--branch "${BRANCH_NAME}" \
--pr "${PR_NUMBER}" \
--flag "armory-noether-gfx90a-test" \
--flag "galapagos-noether-gfx90a-test" \
--file "${WORKSPACE}/coverage.info"
fi

85 changes: 85 additions & 0 deletions .superci/galapagos.v100.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
steps:
- name : "Build on Noether"
sbatch_options:
- "--account=fluidnumerics"
- "--gres=gpu:v100:2"
- "--ntasks=2"
- "--cpus-per-task=6"
- "--time=40:00"
prerequisites: []
modules:
- gcc/12.4.0
- cmake/3.31.2
- cuda/12.4.1
- openmpi/5.0.6
- hdf5/1.14.5
- feq-parse/2.2.2
env:
BUILD_DIR: ${WORKSPACE}/build
PREFIX: ${WORKSPACE}/install
OUTDIR: ${WORKSPACE}/local
GPU_ARCH: 70
BUILD_TYPE: release
ENABLE_GPU: ON
ENABLE_DOUBLE_PRECISION: ON
ENABLE_MULTITHREADING: OFF
ENABLE_TESTING: ON
ENABLE_EXAMPLES: ON
NTHREADS: 4
GCOV: gcov
commands:
- |
set -e
mkdir -p ${BUILD_DIR}
mkdir -p ${OUTDIR}
cd ${BUILD_DIR}
cmake -G Ninja \
-DCMAKE_INSTALL_PREFIX=${PREFIX} \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DSELF_ENABLE_GPU=${ENABLE_GPU} \
-DSELF_ENABLE_MULTITHREADING=${ENABLE_MULTITHREADING} \
-DSELF_MULTITHREADING_NTHREADS=${NTHREADS} \
-DSELF_ENABLE_DOUBLE_PRECISION=${ENABLE_DOUBLE_PRECISION} \
-DCMAKE_CUDA_ARCHITECTURES=${GPU_ARCH} \
-DSELF_ENABLE_EXAMPLES=${ENABLE_EXAMPLES} \
-DSELF_ENABLE_TESTING=${ENABLE_TESTING} \
../
ninja


# Initialize coverage
if [ "$BUILD_TYPE" = "coverage" ]; then
lcov --capture \
--initial \
--directory ${BUILD_DIR}/src \
--gcov=${GCOV} \
--output-file ${WORKSPACE}/initial.info
fi


# Run ctests
ctest --test-dir ${BUILD_DIR} --verbose

if [ "$BUILD_TYPE" = "coverage" ]; then
# Compile coverage information
lcov --capture \
--directory ${BUILD_DIR}/src \
--gcov=${GCOV} \
--output-file ${WORKSPACE}/ctest-capture.info

lcov --add-tracefile ${WORKSPACE}/initial.info \
--add-tracefile ${WORKSPACE}/ctest-capture.info \
--gcov=${GCOV} \
--output-file ${WORKSPACE}/coverage.info

# Generate summary
lcov --summary ${WORKSPACE}/coverage.info

${HOME}/.local/bin/codecov-linux -t "${CODECOV_TOKEN}" \
--sha "${COMMIT_SHA}" \
--branch "${BRANCH_NAME}" \
--pr "${PR_NUMBER}" \
--flag "galapagos-oram-v100-test" \
--file "${WORKSPACE}/coverage.info"
fi

83 changes: 32 additions & 51 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,9 @@ cmake_minimum_required(VERSION 3.21)
cmake_policy(VERSION 3.21...3.27)

# C Language is needed in order to verify Fortran compiler is C-interoperable
# CXX language is needed to properly find "hip" package
project(SELF VERSION 1.0.0
DESCRIPTION "Spectral Element Library in Fortran"
LANGUAGES Fortran C CXX)

LANGUAGES Fortran C)

option(SELF_ENABLE_MULTITHREADING "Option to enable CPU multithreading for `do concurrent` loop blocks." OFF)
option(SELF_ENABLE_TESTING "Option to enable build of tests. (Default On)" ON)
Expand All @@ -48,13 +46,6 @@ if(SELF_ENABLE_MULTITHREADING)
set(SELF_MULITHREADING_NTHREADS "4" CACHE STRING "Number of threads to use for `do concurrent` loop blocks. This option is only used with GNU compilers. Other compilers use OMP_NUM_THREADS environment variable at runtime.")
endif()

if(NOT DEFINED ROCM_PATH)
if(NOT DEFINED ENV{ROCM_PATH})
set(ROCM_PATH "/opt/rocm/" CACHE PATH "Path to which ROCm has been installed")
else()
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCm has been installed")
endif()
endif()


# Fortran compiler requirements
Expand Down Expand Up @@ -157,11 +148,7 @@ if(SELF_ENABLE_DOUBLE_PRECISION)
set( CMAKE_Fortran_FLAGS_PROFILE "${CMAKE_Fortran_FLAGS_PROFILE} -DDOUBLE_PRECISION")
set( CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -DDOUBLE_PRECISION" )

set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDOUBLE_PRECISION" )
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DDOUBLE_PRECISION" )
set( CMAKE_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_COVERAGE} -DDOUBLE_PRECISION")
set( CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_PROFILE} -DDOUBLE_PRECISION")
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DDOUBLE_PRECISION" )

endif()

if(SELF_ENABLE_GPU)
Expand All @@ -172,12 +159,6 @@ if(SELF_ENABLE_GPU)
set( CMAKE_Fortran_FLAGS_PROFILE "${CMAKE_Fortran_FLAGS_PROFILE} -DENABLE_GPU")
set( CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -DENABLE_GPU" )

set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DENABLE_GPU" )
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DENABLE_GPU" )
set( CMAKE_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_COVERAGE} -DENABLE_GPU")
set( CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_PROFILE} -DENABLE_GPU")
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DENABLE_GPU" )

# Check MPI for GPU awareness
# Add SELF's cmake module directory to the search path
set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${CMAKE_CURRENT_SOURCE_DIR}/cmake")
Expand All @@ -193,49 +174,49 @@ if(SELF_ENABLE_GPU)
if(hip_FOUND)
if(MPI_HAS_QUERY_HIP_SUPPORT)
find_package(hipblas REQUIRED)
#message("-- HIP found. Enabling HIP language.")
#enable_language(HIP)
message("-- HIP found. Enabling HIP language.")
enable_language(HIP)
set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DHAVE_HIP" )
set( CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -DHAVE_HIP" )
set( CMAKE_Fortran_FLAGS_COVERAGE "${CMAKE_Fortran_FLAGS_COVERAGE} -DHAVE_HIP")
set( CMAKE_Fortran_FLAGS_PROFILE "${CMAKE_Fortran_FLAGS_PROFILE} -DHAVE_HIP")
set( CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -DHAVE_HIP" )

set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_HIP" )
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DHAVE_HIP" )
set( CMAKE_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_COVERAGE} -DHAVE_HIP")
set( CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_PROFILE} -DHAVE_HIP")
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DHAVE_HIP" )

if(SELF_ENABLE_DOUBLE_PRECISION)
set( CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -DDOUBLE_PRECISION" )
set( CMAKE_HIP_FLAGS_DEBUG "${CMAKE_HIP_FLAGS_DEBUG} -DDOUBLE_PRECISION" )
set( CMAKE_HIP_FLAGS_COVERAGE "${CMAKE_HIP_FLAGS_COVERAGE} -DDOUBLE_PRECISION")
set( CMAKE_HIP_FLAGS_PROFILE "${CMAKE_HIP_FLAGS_PROFILE} -DDOUBLE_PRECISION")
set( CMAKE_HIP_FLAGS_RELEASE "${CMAKE_HIP_FLAGS_RELEASE} -DDOUBLE_PRECISION" )
endif()
set( BACKEND_LIBRARIES hip::device roc::hipblas)
else()
message( FATAL_ERROR "MPI installation is not GPU-aware" )
endif()
else()
# CUDA (Optional)
find_package(cuda)
if(cuda_FOUND)
if(MPI_HAS_QUERY_CUDA_SUPPORT)
#message("-- CUDA found. Enabling CUDA language.")
#enable_language(CUDA)
set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DHAVE_CUDA" )
set( CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -DHAVE_CUDA" )
set( CMAKE_Fortran_FLAGS_COVERAGE "${CMAKE_Fortran_FLAGS_COVERAGE} -DHAVE_CUDA")
set( CMAKE_Fortran_FLAGS_PROFILE "${CMAKE_Fortran_FLAGS_PROFILE} -DHAVE_CUDA")
set( CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -DHAVE_CUDA" )

set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_CUDA" )
set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DHAVE_CUDA" )
set( CMAKE_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_COVERAGE} -DHAVE_CUDA")
set( CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_PROFILE} -DHAVE_CUDA")
set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DHAVE_CUDA" )

# TO DO - need cuda libraries and hipblas libraries
else()
message( FATAL_ERROR "MPI installation is not GPU-aware" )
# CUDA
find_package(CUDAToolkit REQUIRED)
message("-- CUDA found. Enabling CUDA language.")
enable_language(CUDA)
if(MPI_HAS_QUERY_CUDA_SUPPORT)
set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DHAVE_CUDA" )
set( CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -DHAVE_CUDA" )
set( CMAKE_Fortran_FLAGS_COVERAGE "${CMAKE_Fortran_FLAGS_COVERAGE} -DHAVE_CUDA")
set( CMAKE_Fortran_FLAGS_PROFILE "${CMAKE_Fortran_FLAGS_PROFILE} -DHAVE_CUDA")
set( CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -DHAVE_CUDA" )

if(SELF_ENABLE_DOUBLE_PRECISION)
set( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DDOUBLE_PRECISION" )
set( CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -DDOUBLE_PRECISION" )
set( CMAKE_CUDA_FLAGS_COVERAGE "${CMAKE_CUDA_FLAGS_COVERAGE} -DDOUBLE_PRECISION")
set( CMAKE_CUDA_FLAGS_PROFILE "${CMAKE_CUDA_FLAGS_PROFILE} -DDOUBLE_PRECISION")
set( CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -DDOUBLE_PRECISION" )
endif()

set( BACKEND_LIBRARIES CUDA::cuda_driver CUDA::cudart CUDA::cublas)

else()
message( FATAL_ERROR "Enabling GPU support requires either HIP or CUDA." )
message( FATAL_ERROR "MPI installation is not GPU-aware" )
endif()
endif()
endif()
Expand Down
18 changes: 6 additions & 12 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,12 @@ file(GLOB SELF_FSRC "${CMAKE_CURRENT_SOURCE_DIR}/*.f*")
if(SELF_ENABLE_GPU)
file(GLOB SELF_BACKEND_FSRC "${CMAKE_CURRENT_SOURCE_DIR}/gpu/*.f*")
file(GLOB SELF_BACKEND_CPPSRC "${CMAKE_CURRENT_SOURCE_DIR}/gpu/*.cpp*")
# Note : [email protected] (Oct. 1 2024)
# Ultimately, we want to be able to use the language support for HIP/CUDA
# rather than bringing in HIP/CUDA through `find_package`. At the moment
# we are doing a hack overrided the CXX compiler with either hipcc or nvcc
# The reason we're doing it this way (hacky) at the moment is that we get
# segmentation faults on our AMD GPU tests when using the HIP language support
# via Cmake, for some yet unknown reason.
# if(hip_FOUND)
# set_source_files_properties(${SELF_BACKEND_CPPSRC} PROPERTIES LANGUAGE HIP)
# elseif(cuda_FOUND)
# set_source_files_properties(${SELF_BACKEND_CPPSRC} PROPERTIES LANGUAGE CUDA)
# endif()

if(hip_FOUND)
set_source_files_properties(${SELF_BACKEND_CPPSRC} PROPERTIES LANGUAGE HIP)
else()
set_source_files_properties(${SELF_BACKEND_CPPSRC} PROPERTIES LANGUAGE CUDA)
endif()
else()
file(GLOB SELF_BACKEND_FSRC "${CMAKE_CURRENT_SOURCE_DIR}/cpu/*.f*")
endif()
Expand Down
3 changes: 2 additions & 1 deletion src/gpu/SELF_GPU_Macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#include <climits>
#include <cstdio>

#ifdef HAVE_HIP
#ifdef __HIP_PLATFORM_AMD__

#include <hip/hip_runtime.h>

Expand All @@ -31,6 +31,7 @@ static void check(const hipError_t err, const char *const file, const int line)
#else

#include <cuda_runtime.h>
#include <stdint.h> // required to provide uint32_t

#define hipLaunchKernelGGL(F,G,B,M,S,...) F<<<G,B,M,S>>>(__VA_ARGS__)

Expand Down