Skip to content

Commit

Permalink
Merge pull request #248 from LLNL/pr-from-fork/206
Browse files Browse the repository at this point in the history
Pr for Basic Kokkos
  • Loading branch information
MrBurmark committed Jun 22, 2022
2 parents a6ff66a + 6add081 commit 7bf4e8c
Show file tree
Hide file tree
Showing 44 changed files with 1,050 additions and 9 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@
[submodule "tpl/RAJA"]
path = tpl/RAJA
url = https://github.com/LLNL/RAJA.git
[submodule "tpl/kokkos"]
path = tpl/kokkos
url = https://github.com/kokkos/kokkos
63 changes: 56 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ cmake_minimum_required(VERSION 3.14.5)

option(ENABLE_RAJA_SEQUENTIAL "Run sequential variants of RAJA kernels. Disable
this, and all other variants, to run _only_ raw C loops." On)
option(ENABLE_KOKKOS "Include Kokkos implementations of the kernels in the RAJA Perfsuite" Off)

#
# Note: the BLT build system is inheritted by RAJA and is initialized by RAJA
Expand All @@ -22,8 +23,13 @@ if (PERFSUITE_ENABLE_WARNINGS)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror")
endif()

set(CMAKE_CXX_STANDARD 14)
set(BLT_CXX_STD c++14)
if(ENABLE_KOKKOS)
set(CMAKE_CXX_STANDARD 17)
set(BLT_CXX_STD c++17)
else()
set(CMAKE_CXX_STANDARD 14)
set(BLT_CXX_STD c++14)
endif()

include(blt/SetupBLT.cmake)

Expand Down Expand Up @@ -100,7 +106,12 @@ endif()
if (ENABLE_CUDA)
list(APPEND RAJA_PERFSUITE_DEPENDS cuda)
endif()
if (ENABLE_HIP)

# Kokkos requires hipcc as the CMAKE_CXX_COMPILER for HIP AMD/VEGA GPU
# platforms, whereas RAJAPerf Suite uses blt/CMake FindHIP to set HIP compiler.
# Separate RAJAPerf Suite and Kokkos handling of HIP compilers

if ((ENABLE_HIP) AND (NOT ENABLE_KOKKOS))
message(STATUS "HIP version: ${hip_VERSION}")
if("${hip_VERSION}" VERSION_LESS "3.5")
message(FATAL_ERROR "Trying to use HIP/ROCm version ${hip_VERSION}. RAJA Perf Suite requires HIP/ROCm version 3.5 or newer. ")
Expand All @@ -113,8 +124,13 @@ set(RAJAPERF_BUILD_SYSTYPE $ENV{SYS_TYPE})
set(RAJAPERF_BUILD_HOST $ENV{HOSTNAME})

if (ENABLE_CUDA)
set(CMAKE_CUDA_STANDARD 14)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -restrict -arch ${CUDA_ARCH} --expt-extended-lambda --expt-relaxed-constexpr")
if (ENABLE_KOKKOS)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -restrict --extended-lambda --expt-relaxed-constexpr")
else()
set(CMAKE_CUDA_STANDARD 14)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -restrict -arch ${CUDA_ARCH} --expt-extended-lambda --expt-relaxed-constexpr")
endif()

set(RAJAPERF_COMPILER "${CUDA_NVCC_EXECUTABLE}")
list(APPEND RAJAPERF_COMPILER ${CMAKE_CXX_COMPILER})
Expand All @@ -135,13 +151,46 @@ configure_file(${CMAKE_SOURCE_DIR}/src/rajaperf_config.hpp.in

include_directories($<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/include>)

# Make sure RAJA flag propagate (we need to do some house cleaning to
# Make sure RAJA flags propagate (we need to do some tidying to
# remove project-specific CMake variables that are no longer needed)
set (CUDA_NVCC_FLAGS ${RAJA_NVCC_FLAGS})

#
# Each directory in the perf suite has its own CMakeLists.txt file.
#

# ENABLE_KOKKOS is A RAJAPerf Suite Option
if(ENABLE_KOKKOS)
add_definitions(-DRUN_KOKKOS)
if(ENABLE_HIP)
set(Kokkos_ENABLE_HIP ON CACHE BOOL "Kokkos builds for AMD HIP set the
Kokkos_ENABLE_HIP variable to ON")
endif()

if(ENABLE_TARGET_OPENMP)
set(Kokkos_ENABLE_OPENMPTARGET ON CACHE BOOL "Docstring")
if(NOT CMAKE_BUILD_TYPE MATCHES Debug)
if(NOT EXPERIMENTAL_BUILD)
message(FATAL_ERROR "Kokkos builds with OpenMPTarget require a Debug build to succeed at the moment. Rebuild with CMAKE_BUILD_TYPE=Debug. If you're a compiler developer, rebuild with -DEXPERIMENTAL_BUILD=ON")
endif()
endif()
endif()

# ENABLE_CUDA IS A RAJA PERFSUITE OPTION
if(ENABLE_CUDA)
set(Kokkos_ENABLE_CUDA ON CACHE BOOL "Docstring")
set(Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL "Docstring")
enable_language(CUDA)
endif()
if(ENABLE_OPENMP)
set(Kokkos_ENABLE_OPENMP ON CACHE BOOL "Docstring")
endif()

add_subdirectory(tpl/kokkos)
get_property(KOKKOS_INCLUDE_DIRS DIRECTORY tpl/kokkos PROPERTY INCLUDE_DIRECTORIES)
include_directories(${KOKKOS_INCLUDE_DIRS})
list(APPEND RAJA_PERFSUITE_DEPENDS kokkos)
endif()

add_subdirectory(src)

if (RAJA_PERFSUITE_ENABLE_TESTS)
Expand Down
4 changes: 3 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@
include_directories(.)

add_subdirectory(common)
add_subdirectory(apps)
add_subdirectory(basic)
add_subdirectory(basic-kokkos)
add_subdirectory(apps)
add_subdirectory(lcals)
add_subdirectory(polybench)
add_subdirectory(stream)
Expand All @@ -20,6 +21,7 @@ set(RAJA_PERFSUITE_EXECUTABLE_DEPENDS
common
apps
basic
basic-kokkos
lcals
polybench
stream
Expand Down
10 changes: 10 additions & 0 deletions src/RAJAPerfSuiteDriver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
// SPDX-License-Identifier: (BSD-3-Clause)
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//

#ifdef RUN_KOKKOS
#include <Kokkos_Core.hpp>
#endif

#include "common/Executor.hpp"

#include <iostream>
Expand All @@ -24,6 +28,9 @@ int main( int argc, char** argv )
MPI_Comm_size(MPI_COMM_WORLD, &num_ranks);
rajaperf::getCout() << "\n\nRunning with " << num_ranks << " MPI ranks..." << std::endl;
#endif
#ifdef RUN_KOKKOS
Kokkos::initialize(argc, argv);
#endif

// STEP 1: Create suite executor object
rajaperf::Executor executor(argc, argv);
Expand All @@ -43,6 +50,9 @@ int main( int argc, char** argv )

rajaperf::getCout() << "\n\nDONE!!!...." << std::endl;

#ifdef RUN_KOKKOS
Kokkos::finalize();
#endif
#ifdef RAJA_PERFSUITE_ENABLE_MPI
MPI_Finalize();
#endif
Expand Down
27 changes: 27 additions & 0 deletions src/basic-kokkos/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
###############################################################################
# Copyright (c) 2017-20, Lawrence Livermore National Security, LLC
# and RAJA Performance Suite project contributors.
# See the RAJAPerf/COPYRIGHT file for details.
#
# SPDX-License-Identifier: (BSD-3-Clause)
###############################################################################

#include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/../basic)

blt_add_library(
NAME basic-kokkos
SOURCES
PI_ATOMIC-Kokkos.cpp
DAXPY-Kokkos.cpp
IF_QUAD-Kokkos.cpp
INIT3-Kokkos.cpp
INIT_VIEW1D-Kokkos.cpp
INIT_VIEW1D_OFFSET-Kokkos.cpp
MULADDSUB-Kokkos.cpp
NESTED_INIT-Kokkos.cpp
REDUCE3_INT-Kokkos.cpp
TRAP_INT-Kokkos.cpp
DAXPY_ATOMIC-Kokkos.cpp
INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/../basic
DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS}
)
69 changes: 69 additions & 0 deletions src/basic-kokkos/DAXPY-Kokkos.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC
// and RAJA Performance Suite project contributors.
// See the RAJAPerf/COPYRIGHT file for details.
//
// SPDX-License-Identifier: (BSD-3-Clause)
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//

#include "DAXPY.hpp"
#if defined(RUN_KOKKOS)
#include "common/KokkosViewUtils.hpp"
#include <iostream>

namespace rajaperf {
namespace basic {

struct DaxpyFunctor {
Real_ptr x;
Real_ptr y;
Real_type a;
DaxpyFunctor(Real_ptr m_x, Real_ptr m_y, Real_type m_a)
: x(m_x), y(m_y), a(m_a) {}
void operator()(Index_type i) const { DAXPY_BODY; }
};

void DAXPY::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) {
const Index_type run_reps = getRunReps();
const Index_type ibegin = 0;
const Index_type iend = getActualProblemSize();

DAXPY_DATA_SETUP;

auto x_view = getViewFromPointer(x, iend);
auto y_view = getViewFromPointer(y, iend);

switch (vid) {

case Kokkos_Lambda: {

Kokkos::fence();
startTimer();

for (RepIndex_type irep = 0; irep < run_reps; ++irep) {
Kokkos::parallel_for(
"DAXPY-Kokkos Kokkos_Lambda",
Kokkos::RangePolicy<Kokkos::DefaultExecutionSpace>(ibegin, iend),
KOKKOS_LAMBDA(Index_type i) { y_view[i] += a * x_view[i]; });
}

Kokkos::fence();
stopTimer();

break;
}
default: {
std::cout << "\n DAXPY : Unknown variant id = " << vid << std::endl;
}
}

// Move data (i.e., pointer, KokkosView-wrapped ponter) back to the host from
// the device

moveDataToHostFromKokkosView(x, x_view, iend);
moveDataToHostFromKokkosView(y, y_view, iend);
}

} // end namespace basic
} // end namespace rajaperf
#endif
70 changes: 70 additions & 0 deletions src/basic-kokkos/DAXPY_ATOMIC-Kokkos.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC
// and RAJA Performance Suite project contributors.
// See the RAJAPerf/LICENSE file for details.
//
// SPDX-License-Identifier: (BSD-3-Clause)
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//

#include "DAXPY_ATOMIC.hpp"
#if defined(RUN_KOKKOS)
#include "common/KokkosViewUtils.hpp"
#include <iostream>

// Delete me
// For de-bugging:
#include "RAJA/RAJA.hpp"

namespace rajaperf {
namespace basic {

void DAXPY_ATOMIC::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{

const Index_type run_reps = getRunReps();
const Index_type ibegin = 0;
const Index_type iend = getActualProblemSize();

DAXPY_ATOMIC_DATA_SETUP;
//
// Kokkos Views to wrap pointers declared in DAXPY_ATOMIC.hpp
//

auto x_view = getViewFromPointer(x, iend);
auto y_view = getViewFromPointer(y, iend);

switch (vid) {

case Kokkos_Lambda: {

Kokkos::fence();
startTimer();

for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

Kokkos::parallel_for(
"DAXPY_ATOMIC_Kokkos Kokkos_Lambda",
Kokkos::RangePolicy<Kokkos::DefaultExecutionSpace>(ibegin, iend),
KOKKOS_LAMBDA(Index_type i) {
Kokkos::atomic_add(&y_view[i], a * x_view[i]);
});
}

Kokkos::fence();
stopTimer();

break;
}

default: {
getCout() << "\n DAXPY_ATOMIC : Unknown variant id = " << vid << std::endl;
}
}

moveDataToHostFromKokkosView(x, x_view, iend);
moveDataToHostFromKokkosView(y, y_view, iend);
}

} // end namespace basic
} // end namespace rajaperf
#endif
Loading

0 comments on commit 7bf4e8c

Please sign in to comment.