diff --git a/.superci/armory.yml b/.superci/galapagos.mi210.yml similarity index 82% rename from .superci/armory.yml rename to .superci/galapagos.mi210.yml index 247fa91f7..42bee128f 100644 --- a/.superci/armory.yml +++ b/.superci/galapagos.mi210.yml @@ -3,11 +3,10 @@ steps: sbatch_options: - "--account=fluidnumerics" - "--gres=gpu:mi210:2" - - "--ntasks=6" - - "--cpus-per-task=2" + - "--ntasks=2" + - "--cpus-per-task=16" - "--time=40:00" - prerequisites: - - "source /etc/profile.d/z11_lmod.sh" + prerequisites: [] modules: - cmake/3.31.2 - gcc/12.4.0 @@ -17,36 +16,36 @@ steps: - feq-parse/2.2.2 env: BUILD_DIR: ${WORKSPACE}/build - PREFIX: ${WORKSPACE}/opt/self + PREFIX: ${WORKSPACE}/install OUTDIR: ${WORKSPACE}/local GPU_ARCH: gfx90a - BUILD_TYPE: coverage + BUILD_TYPE: release ENABLE_GPU: ON ENABLE_DOUBLE_PRECISION: ON ENABLE_MULTITHREADING: OFF + ENABLE_TESTING: ON + ENABLE_EXAMPLES: ON NTHREADS: 4 - GCOV: gcov-12 + GCOV: gcov commands: - | set -e mkdir -p ${BUILD_DIR} mkdir -p ${OUTDIR} - cd ${WORKSPACE}/build - FC=gfortran \ - CXX=hipcc \ + cd ${BUILD_DIR} cmake -G Ninja \ - -DCMAKE_PREFIX_PATH=${ROCM_PATH} \ -DCMAKE_INSTALL_PREFIX=${PREFIX} \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ -DSELF_ENABLE_GPU=${ENABLE_GPU} \ -DSELF_ENABLE_MULTITHREADING=${ENABLE_MULTITHREADING} \ -DSELF_MULTITHREADING_NTHREADS=${NTHREADS} \ -DSELF_ENABLE_DOUBLE_PRECISION=${ENABLE_DOUBLE_PRECISION} \ - -DAMDGPU_TARGETS=${GPU_ARCH} \ + -DCMAKE_HIP_ARCHITECTURES=${GPU_ARCH} \ + -DSELF_ENABLE_EXAMPLES=${ENABLE_EXAMPLES} \ + -DSELF_ENABLE_TESTING=${ENABLE_TESTING} \ ../ ninja - # Initialize coverage if [ "$BUILD_TYPE" = "coverage" ]; then lcov --capture \ @@ -79,7 +78,7 @@ steps: --sha "${COMMIT_SHA}" \ --branch "${BRANCH_NAME}" \ --pr "${PR_NUMBER}" \ - --flag "armory-noether-gfx90a-test" \ + --flag "galapagos-noether-gfx90a-test" \ --file "${WORKSPACE}/coverage.info" fi diff --git a/.superci/galapagos.v100.yml b/.superci/galapagos.v100.yml new file mode 100644 index 000000000..4fa5fc77c --- /dev/null +++ b/.superci/galapagos.v100.yml @@ -0,0 +1,85 @@ +steps: + - name : "Build on Noether" + sbatch_options: + - "--account=fluidnumerics" + - "--gres=gpu:v100:2" + - "--ntasks=2" + - "--cpus-per-task=6" + - "--time=40:00" + prerequisites: [] + modules: + - gcc/12.4.0 + - cmake/3.31.2 + - cuda/12.4.1 + - openmpi/5.0.6 + - hdf5/1.14.5 + - feq-parse/2.2.2 + env: + BUILD_DIR: ${WORKSPACE}/build + PREFIX: ${WORKSPACE}/install + OUTDIR: ${WORKSPACE}/local + GPU_ARCH: 70 + BUILD_TYPE: release + ENABLE_GPU: ON + ENABLE_DOUBLE_PRECISION: ON + ENABLE_MULTITHREADING: OFF + ENABLE_TESTING: ON + ENABLE_EXAMPLES: ON + NTHREADS: 4 + GCOV: gcov + commands: + - | + set -e + mkdir -p ${BUILD_DIR} + mkdir -p ${OUTDIR} + cd ${BUILD_DIR} + cmake -G Ninja \ + -DCMAKE_INSTALL_PREFIX=${PREFIX} \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DSELF_ENABLE_GPU=${ENABLE_GPU} \ + -DSELF_ENABLE_MULTITHREADING=${ENABLE_MULTITHREADING} \ + -DSELF_MULTITHREADING_NTHREADS=${NTHREADS} \ + -DSELF_ENABLE_DOUBLE_PRECISION=${ENABLE_DOUBLE_PRECISION} \ + -DCMAKE_CUDA_ARCHITECTURES=${GPU_ARCH} \ + -DSELF_ENABLE_EXAMPLES=${ENABLE_EXAMPLES} \ + -DSELF_ENABLE_TESTING=${ENABLE_TESTING} \ + ../ + ninja + + + # Initialize coverage + if [ "$BUILD_TYPE" = "coverage" ]; then + lcov --capture \ + --initial \ + --directory ${BUILD_DIR}/src \ + --gcov=${GCOV} \ + --output-file ${WORKSPACE}/initial.info + fi + + + # Run ctests + ctest --test-dir ${BUILD_DIR} --verbose + + if [ "$BUILD_TYPE" = "coverage" ]; then + # Compile coverage information + lcov --capture \ + --directory ${BUILD_DIR}/src \ + --gcov=${GCOV} \ + --output-file ${WORKSPACE}/ctest-capture.info + + lcov --add-tracefile ${WORKSPACE}/initial.info \ + --add-tracefile ${WORKSPACE}/ctest-capture.info \ + --gcov=${GCOV} \ + --output-file ${WORKSPACE}/coverage.info + + # Generate summary + lcov --summary ${WORKSPACE}/coverage.info + + ${HOME}/.local/bin/codecov-linux -t "${CODECOV_TOKEN}" \ + --sha "${COMMIT_SHA}" \ + --branch "${BRANCH_NAME}" \ + --pr "${PR_NUMBER}" \ + --flag "galapagos-oram-v100-test" \ + --file "${WORKSPACE}/coverage.info" + fi + diff --git a/CMakeLists.txt b/CMakeLists.txt index 06064514c..e63bdea3a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,11 +28,9 @@ cmake_minimum_required(VERSION 3.21) cmake_policy(VERSION 3.21...3.27) # C Language is needed in order to verify Fortran compiler is C-interoperable -# CXX language is needed to properly find "hip" package project(SELF VERSION 1.0.0 DESCRIPTION "Spectral Element Library in Fortran" - LANGUAGES Fortran C CXX) - + LANGUAGES Fortran C) option(SELF_ENABLE_MULTITHREADING "Option to enable CPU multithreading for `do concurrent` loop blocks." OFF) option(SELF_ENABLE_TESTING "Option to enable build of tests. (Default On)" ON) @@ -48,13 +46,6 @@ if(SELF_ENABLE_MULTITHREADING) set(SELF_MULITHREADING_NTHREADS "4" CACHE STRING "Number of threads to use for `do concurrent` loop blocks. This option is only used with GNU compilers. Other compilers use OMP_NUM_THREADS environment variable at runtime.") endif() -if(NOT DEFINED ROCM_PATH) - if(NOT DEFINED ENV{ROCM_PATH}) - set(ROCM_PATH "/opt/rocm/" CACHE PATH "Path to which ROCm has been installed") - else() - set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCm has been installed") - endif() -endif() # Fortran compiler requirements @@ -157,11 +148,7 @@ if(SELF_ENABLE_DOUBLE_PRECISION) set( CMAKE_Fortran_FLAGS_PROFILE "${CMAKE_Fortran_FLAGS_PROFILE} -DDOUBLE_PRECISION") set( CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -DDOUBLE_PRECISION" ) - set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DDOUBLE_PRECISION" ) - set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DDOUBLE_PRECISION" ) - set( CMAKE_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_COVERAGE} -DDOUBLE_PRECISION") - set( CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_PROFILE} -DDOUBLE_PRECISION") - set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DDOUBLE_PRECISION" ) + endif() if(SELF_ENABLE_GPU) @@ -172,12 +159,6 @@ if(SELF_ENABLE_GPU) set( CMAKE_Fortran_FLAGS_PROFILE "${CMAKE_Fortran_FLAGS_PROFILE} -DENABLE_GPU") set( CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -DENABLE_GPU" ) - set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DENABLE_GPU" ) - set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DENABLE_GPU" ) - set( CMAKE_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_COVERAGE} -DENABLE_GPU") - set( CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_PROFILE} -DENABLE_GPU") - set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DENABLE_GPU" ) - # Check MPI for GPU awareness # Add SELF's cmake module directory to the search path set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${CMAKE_CURRENT_SOURCE_DIR}/cmake") @@ -193,49 +174,49 @@ if(SELF_ENABLE_GPU) if(hip_FOUND) if(MPI_HAS_QUERY_HIP_SUPPORT) find_package(hipblas REQUIRED) - #message("-- HIP found. Enabling HIP language.") - #enable_language(HIP) + message("-- HIP found. Enabling HIP language.") + enable_language(HIP) set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DHAVE_HIP" ) set( CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -DHAVE_HIP" ) set( CMAKE_Fortran_FLAGS_COVERAGE "${CMAKE_Fortran_FLAGS_COVERAGE} -DHAVE_HIP") set( CMAKE_Fortran_FLAGS_PROFILE "${CMAKE_Fortran_FLAGS_PROFILE} -DHAVE_HIP") set( CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -DHAVE_HIP" ) - set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_HIP" ) - set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DHAVE_HIP" ) - set( CMAKE_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_COVERAGE} -DHAVE_HIP") - set( CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_PROFILE} -DHAVE_HIP") - set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DHAVE_HIP" ) - + if(SELF_ENABLE_DOUBLE_PRECISION) + set( CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -DDOUBLE_PRECISION" ) + set( CMAKE_HIP_FLAGS_DEBUG "${CMAKE_HIP_FLAGS_DEBUG} -DDOUBLE_PRECISION" ) + set( CMAKE_HIP_FLAGS_COVERAGE "${CMAKE_HIP_FLAGS_COVERAGE} -DDOUBLE_PRECISION") + set( CMAKE_HIP_FLAGS_PROFILE "${CMAKE_HIP_FLAGS_PROFILE} -DDOUBLE_PRECISION") + set( CMAKE_HIP_FLAGS_RELEASE "${CMAKE_HIP_FLAGS_RELEASE} -DDOUBLE_PRECISION" ) + endif() set( BACKEND_LIBRARIES hip::device roc::hipblas) else() message( FATAL_ERROR "MPI installation is not GPU-aware" ) endif() else() - # CUDA (Optional) - find_package(cuda) - if(cuda_FOUND) - if(MPI_HAS_QUERY_CUDA_SUPPORT) - #message("-- CUDA found. Enabling CUDA language.") - #enable_language(CUDA) - set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DHAVE_CUDA" ) - set( CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -DHAVE_CUDA" ) - set( CMAKE_Fortran_FLAGS_COVERAGE "${CMAKE_Fortran_FLAGS_COVERAGE} -DHAVE_CUDA") - set( CMAKE_Fortran_FLAGS_PROFILE "${CMAKE_Fortran_FLAGS_PROFILE} -DHAVE_CUDA") - set( CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -DHAVE_CUDA" ) - - set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAVE_CUDA" ) - set( CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DHAVE_CUDA" ) - set( CMAKE_CXX_FLAGS_COVERAGE "${CMAKE_CXX_FLAGS_COVERAGE} -DHAVE_CUDA") - set( CMAKE_CXX_FLAGS_PROFILE "${CMAKE_CXX_FLAGS_PROFILE} -DHAVE_CUDA") - set( CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -DHAVE_CUDA" ) - - # TO DO - need cuda libraries and hipblas libraries - else() - message( FATAL_ERROR "MPI installation is not GPU-aware" ) + # CUDA + find_package(CUDAToolkit REQUIRED) + message("-- CUDA found. Enabling CUDA language.") + enable_language(CUDA) + if(MPI_HAS_QUERY_CUDA_SUPPORT) + set( CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -DHAVE_CUDA" ) + set( CMAKE_Fortran_FLAGS_DEBUG "${CMAKE_Fortran_FLAGS_DEBUG} -DHAVE_CUDA" ) + set( CMAKE_Fortran_FLAGS_COVERAGE "${CMAKE_Fortran_FLAGS_COVERAGE} -DHAVE_CUDA") + set( CMAKE_Fortran_FLAGS_PROFILE "${CMAKE_Fortran_FLAGS_PROFILE} -DHAVE_CUDA") + set( CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -DHAVE_CUDA" ) + + if(SELF_ENABLE_DOUBLE_PRECISION) + set( CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DDOUBLE_PRECISION" ) + set( CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -DDOUBLE_PRECISION" ) + set( CMAKE_CUDA_FLAGS_COVERAGE "${CMAKE_CUDA_FLAGS_COVERAGE} -DDOUBLE_PRECISION") + set( CMAKE_CUDA_FLAGS_PROFILE "${CMAKE_CUDA_FLAGS_PROFILE} -DDOUBLE_PRECISION") + set( CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE} -DDOUBLE_PRECISION" ) endif() + + set( BACKEND_LIBRARIES CUDA::cuda_driver CUDA::cudart CUDA::cublas) + else() - message( FATAL_ERROR "Enabling GPU support requires either HIP or CUDA." ) + message( FATAL_ERROR "MPI installation is not GPU-aware" ) endif() endif() endif() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 797ddb125..6dc5ca790 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -29,18 +29,12 @@ file(GLOB SELF_FSRC "${CMAKE_CURRENT_SOURCE_DIR}/*.f*") if(SELF_ENABLE_GPU) file(GLOB SELF_BACKEND_FSRC "${CMAKE_CURRENT_SOURCE_DIR}/gpu/*.f*") file(GLOB SELF_BACKEND_CPPSRC "${CMAKE_CURRENT_SOURCE_DIR}/gpu/*.cpp*") - # Note : joe@fluidnumerics.com (Oct. 1 2024) - # Ultimately, we want to be able to use the language support for HIP/CUDA - # rather than bringing in HIP/CUDA through `find_package`. At the moment - # we are doing a hack overrided the CXX compiler with either hipcc or nvcc - # The reason we're doing it this way (hacky) at the moment is that we get - # segmentation faults on our AMD GPU tests when using the HIP language support - # via Cmake, for some yet unknown reason. - # if(hip_FOUND) - # set_source_files_properties(${SELF_BACKEND_CPPSRC} PROPERTIES LANGUAGE HIP) - # elseif(cuda_FOUND) - # set_source_files_properties(${SELF_BACKEND_CPPSRC} PROPERTIES LANGUAGE CUDA) - # endif() + + if(hip_FOUND) + set_source_files_properties(${SELF_BACKEND_CPPSRC} PROPERTIES LANGUAGE HIP) + else() + set_source_files_properties(${SELF_BACKEND_CPPSRC} PROPERTIES LANGUAGE CUDA) + endif() else() file(GLOB SELF_BACKEND_FSRC "${CMAKE_CURRENT_SOURCE_DIR}/cpu/*.f*") endif() diff --git a/src/gpu/SELF_GPU_Macros.h b/src/gpu/SELF_GPU_Macros.h index 193255982..b8ce7f484 100644 --- a/src/gpu/SELF_GPU_Macros.h +++ b/src/gpu/SELF_GPU_Macros.h @@ -15,7 +15,7 @@ #include #include -#ifdef HAVE_HIP +#ifdef __HIP_PLATFORM_AMD__ #include @@ -31,6 +31,7 @@ static void check(const hipError_t err, const char *const file, const int line) #else #include +#include // required to provide uint32_t #define hipLaunchKernelGGL(F,G,B,M,S,...) F<<>>(__VA_ARGS__)