From 060bfc49846bdc4b0bad53f1200559783ffe92f3 Mon Sep 17 00:00:00 2001 From: Amy Powell Date: Wed, 12 Jan 2022 16:06:50 -0700 Subject: [PATCH 01/28] Kokkos: Add git submodule and CMake changes co-authored-by: Christian Trott co-authored-by: David Zoeller Poliakoff Adds optional Kokkos dependency and Kokkos as a submodule --- .gitmodules | 3 ++ CMakeLists.txt | 80 ++++++++++++++++++++++++++++++++++++++-------- src/CMakeLists.txt | 18 ++++++++++- tpl/kokkos | 1 + 4 files changed, 88 insertions(+), 14 deletions(-) create mode 160000 tpl/kokkos diff --git a/.gitmodules b/.gitmodules index 13f05ecd3..0993afb86 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "tpl/RAJA"] path = tpl/RAJA url = https://github.com/LLNL/RAJA.git +[submodule "tpl/kokkos"] + path = tpl/kokkos + url = https://github.com/kokkos/kokkos diff --git a/CMakeLists.txt b/CMakeLists.txt index 813d1e9b0..b39d4653b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,6 +13,7 @@ cmake_minimum_required(VERSION 3.14.5) option(ENABLE_RAJA_SEQUENTIAL "Run sequential variants of RAJA kernels. Disable this, and all other variants, to run _only_ raw C loops." On) +option(ENABLE_KOKKOS "Include Kokkos implementations of the kernels in the RAJA Perfsuite" Off) # # Note: the BLT build system is inheritted by RAJA and is initialized by RAJA @@ -22,8 +23,13 @@ if (PERFSUITE_ENABLE_WARNINGS) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Werror") endif() -set(CMAKE_CXX_STANDARD 14) -set(BLT_CXX_STD c++14) +if(ENABLE_KOKKOS) + set(CMAKE_CXX_STANDARD 17) + set(BLT_CXX_STD c++17) +else() + set(CMAKE_CXX_STANDARD 14) + set(BLT_CXX_STD c++14) +endif() include(blt/SetupBLT.cmake) @@ -46,7 +52,6 @@ cmake_dependent_option(RAJA_PERFSUITE_ENABLE_OPENMP5_SCAN "Build OpenMP scan var # # Define RAJA settings... -# set(RAJA_ENABLE_TESTS Off CACHE BOOL "") set(RAJA_ENABLE_EXAMPLES Off CACHE BOOL "") @@ -91,16 +96,18 @@ set(RAJA_PERFSUITE_VERSION_PATCHLEVEL 0) set(RAJA_PERFSUITE_DEPENDS RAJA) -if (RAJA_PERFSUITE_ENABLE_MPI) - list(APPEND RAJA_PERFSUITE_DEPENDS mpi) -endif() if (ENABLE_OPENMP) list(APPEND RAJA_PERFSUITE_DEPENDS openmp) endif() if (ENABLE_CUDA) list(APPEND RAJA_PERFSUITE_DEPENDS cuda) endif() -if (ENABLE_HIP) + +# Kokkos requires hipcc as the CMAKE_CXX_COMPILER for HIP AMD/VEGA GPU +# platforms, whereas RAJAPerf Suite uses blt/CMake FindHIP to set HIP compiler +# Separate RAJAPerf Suite and Kokkos handling of HIP compilers + +if ((ENABLE_HIP) AND (NOT ENABLE_KOKKOS)) message(STATUS "HIP version: ${hip_VERSION}") if("${hip_VERSION}" VERSION_LESS "3.5") message(FATAL_ERROR "Trying to use HIP/ROCm version ${hip_VERSION}. RAJA Perf Suite requires HIP/ROCm version 3.5 or newer. ") @@ -113,9 +120,12 @@ set(RAJAPERF_BUILD_SYSTYPE $ENV{SYS_TYPE}) set(RAJAPERF_BUILD_HOST $ENV{HOSTNAME}) if (ENABLE_CUDA) - set(CMAKE_CUDA_STANDARD 14) + if (ENABLE_KOKKOS) + set(CMAKE_CUDA_STANDARD 17) + else() + set(CMAKE_CUDA_STANDARD 14) + endif() set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -restrict -arch ${CUDA_ARCH} --expt-extended-lambda --expt-relaxed-constexpr") - set(RAJAPERF_COMPILER "${CUDA_NVCC_EXECUTABLE}") list(APPEND RAJAPERF_COMPILER ${CMAKE_CXX_COMPILER}) set(RAJAPERF_COMPILER_OPTIONS "${CUDA_NVCC_FLAGS}") @@ -135,13 +145,57 @@ configure_file(${CMAKE_SOURCE_DIR}/src/rajaperf_config.hpp.in include_directories($) -# Make sure RAJA flag propagate (we need to do some house cleaning to +# Make sure RAJA flags propagate (we need to do some tidying to # remove project-specific CMake variables that are no longer needed) set (CUDA_NVCC_FLAGS ${RAJA_NVCC_FLAGS}) -# -# Each directory in the perf suite has its own CMakeLists.txt file. -# +# The statement below is required for Kokkos compilation. +if(ENABLE_KOKKOS) + include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/tpl/RAJA/include/) +endif() + + +# ENABLE_KOKKOS is A RAJAPerf Suite Option +if(ENABLE_KOKKOS) + add_definitions(-DRUN_KOKKOS) + if(ENABLE_HIP) + set(Kokkos_ENABLE_HIP ON CACHE BOOL "Kokkos builds for AMD HIP set the +Kokkos_ENABLE_HIP variable to ON") + #set(Kokkos_ARCH_VEGA900 ON CACHE BOOL "Docstring") #TODO: better + endif() + if(ENABLE_TARGET_OPENMP) + set(Kokkos_ENABLE_OPENMPTARGET ON CACHE BOOL "Docstring") + set(Kokkos_ARCH_VOLTA70 ON CACHE BOOL "Docstring") #TODO: better + set(CMAKE_CXX_STANDARD 17) + set(BLT_CXX_STANDARD 17) + set(RAJA_ENABLE_TARGET_OPENMP ON CACHE BOOL "Docstring") + if(NOT CMAKE_BUILD_TYPE MATCHES Debug) + if(NOT EXPERIMENTAL_BUILD) + message(FATAL_ERROR "Kokkos builds with OpenMPTarget require a Debug build to succeed at the moment. Rebuild with CMAKE_BUILD_TYPE=Debug. If you're a compiler developer, rebuild with -DEXPERIMENTAL_BUILD=ON") + endif() + endif() + + #add_definitions(-DRAJA_ENABLE_TARGET_OPENMP) + endif() + +# ENABLE_CUDA IS A RAJA PERFSUITE OPTION + if(ENABLE_CUDA) + set(Kokkos_ENABLE_CUDA ON CACHE BOOL "Docstring") + set(Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL "Docstring") + set(Kokkos_ARCH_VOLTA70 ON CACHE BOOL "Docstring") #TODO: better + enable_language(CUDA) + endif() + if(ENABLE_OPENMP) + set(Kokkos_ENABLE_OPENMP ON CACHE BOOL "Docstring") + endif() + + add_subdirectory(tpl/kokkos) + get_property(KOKKOS_INCLUDE_DIRS DIRECTORY tpl/kokkos PROPERTY INCLUDE_DIRECTORIES) + include_directories(${KOKKOS_INCLUDE_DIRS}) + + list(APPEND RAJA_PERFSUITE_DEPENDS kokkos) +endif() + add_subdirectory(src) if (RAJA_PERFSUITE_ENABLE_TESTS) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 598131623..0f3e624f3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -9,8 +9,12 @@ include_directories(.) add_subdirectory(common) -add_subdirectory(apps) add_subdirectory(basic) + +# TODO: We will add Kokkos variants of kernel groups one by one +# when all are done this difference is not needed anymore +if(NOT ENABLE_KOKKOS) +add_subdirectory(apps) add_subdirectory(lcals) add_subdirectory(polybench) add_subdirectory(stream) @@ -24,6 +28,18 @@ set(RAJA_PERFSUITE_EXECUTABLE_DEPENDS polybench stream algorithm) +endif() + +# TODO: Eventually Kokkos depends should be done via append_list +if(ENABLE_KOKKOS) + add_subdirectory(basic-kokkos) + set(RAJA_PERFSUITE_EXECUTABLE_DEPENDS + common + basic + basic-kokkos + ) +endif() + list(APPEND RAJA_PERFSUITE_EXECUTABLE_DEPENDS ${RAJA_PERFSUITE_DEPENDS}) if(ENABLE_TARGET_OPENMP) diff --git a/tpl/kokkos b/tpl/kokkos new file mode 160000 index 000000000..2834f94af --- /dev/null +++ b/tpl/kokkos @@ -0,0 +1 @@ +Subproject commit 2834f94af9b01debf67c1aaa3f0eb0c903d72c8d From 8da0e88cc371b2d8dddc91f1a5127c65fb9fcbd0 Mon Sep 17 00:00:00 2001 From: Amy Powell Date: Wed, 12 Jan 2022 16:09:16 -0700 Subject: [PATCH 02/28] Kokkos: Initialize/Finalize Kokkos in Driver co-authored-by: Christian Trott co-authored-by: David Zoeller Poliakoff --- src/RAJAPerfSuiteDriver.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/RAJAPerfSuiteDriver.cpp b/src/RAJAPerfSuiteDriver.cpp index d423dcff9..17e64f0f0 100644 --- a/src/RAJAPerfSuiteDriver.cpp +++ b/src/RAJAPerfSuiteDriver.cpp @@ -6,6 +6,10 @@ // SPDX-License-Identifier: (BSD-3-Clause) //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +#ifdef RUN_KOKKOS +#include +#endif + #include "common/Executor.hpp" #include @@ -24,6 +28,9 @@ int main( int argc, char** argv ) MPI_Comm_size(MPI_COMM_WORLD, &num_ranks); rajaperf::getCout() << "\n\nRunning with " << num_ranks << " MPI ranks..." << std::endl; #endif +#ifdef RUN_KOKKOS + Kokkos::initialize(argc, argv); +#endif // STEP 1: Create suite executor object rajaperf::Executor executor(argc, argv); @@ -43,6 +50,9 @@ int main( int argc, char** argv ) rajaperf::getCout() << "\n\nDONE!!!...." << std::endl; +#ifdef RUN_KOKKOS + Kokkos::finalize(); +#endif #ifdef RAJA_PERFSUITE_ENABLE_MPI MPI_Finalize(); #endif From 2da3277a42de8934c89785fcbcb2ae57ce993fce Mon Sep 17 00:00:00 2001 From: Amy Powell Date: Wed, 12 Jan 2022 16:09:58 -0700 Subject: [PATCH 03/28] Kokkos: Add basic kernel group variants co-authored-by: Christian Trott co-authored-by: David Zoeller Poliakoff Note: This makes it so that one only compiles the basic kernel group when Kokkos is enabled. This required a few judicous ifdefs in some places in common. Eventually when the Kokkos variants for all the kernel groups are merged, those ifdefs are not needed. --- src/basic-kokkos/CMakeLists.txt | 33 ++++ src/basic-kokkos/DAXPY-Kokkos.cpp | 97 +++++++++++ src/basic-kokkos/IF_QUAD-Kokkos.cpp | 99 ++++++++++++ src/basic-kokkos/INIT3-Kokkos.cpp | 90 +++++++++++ src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp | 74 +++++++++ .../INIT_VIEW1D_OFFSET-Kokkos.cpp | 74 +++++++++ src/basic-kokkos/MULADDSUB-Kokkos.cpp | 90 +++++++++++ src/basic-kokkos/NESTED_INIT-Kokkos.cpp | 79 +++++++++ src/basic-kokkos/PI_ATOMIC-Kokkos.cpp | 77 +++++++++ src/basic-kokkos/REDUCE3_INT-Kokkos.cpp | 151 ++++++++++++++++++ src/basic-kokkos/TRAP_INT-Kokkos.cpp | 143 +++++++++++++++++ src/basic/DAXPY.cpp | 2 + src/basic/DAXPY.hpp | 1 + src/basic/DAXPY_ATOMIC.hpp | 1 + src/basic/IF_QUAD.cpp | 4 + src/basic/IF_QUAD.hpp | 1 + src/basic/INDEXLIST.hpp | 1 + src/basic/INDEXLIST_3LOOP.hpp | 1 + src/basic/INIT3.cpp | 3 + src/basic/INIT3.hpp | 5 + src/basic/INIT_VIEW1D.cpp | 5 + src/basic/INIT_VIEW1D.hpp | 1 + src/basic/INIT_VIEW1D_OFFSET.cpp | 5 + src/basic/INIT_VIEW1D_OFFSET.hpp | 1 + src/basic/MAT_MAT_SHARED.cpp | 2 + src/basic/MAT_MAT_SHARED.hpp | 7 + src/basic/MULADDSUB.cpp | 6 + src/basic/MULADDSUB.hpp | 1 + src/basic/NESTED_INIT.cpp | 4 + src/basic/NESTED_INIT.hpp | 1 + src/basic/PI_ATOMIC.cpp | 4 + src/basic/PI_ATOMIC.hpp | 4 +- src/basic/PI_REDUCE.cpp | 2 + src/basic/PI_REDUCE.hpp | 5 + src/basic/REDUCE3_INT.cpp | 5 + src/basic/REDUCE3_INT.hpp | 1 + src/basic/TRAP_INT.cpp | 5 + src/basic/TRAP_INT.hpp | 1 + src/common/Executor.cpp | 13 +- src/common/KernelBase.cpp | 6 + src/common/KernelBase.hpp | 3 + src/common/RAJAPerfSuite.cpp | 15 ++ src/common/RAJAPerfSuite.hpp | 103 +++++++++++- 43 files changed, 1223 insertions(+), 3 deletions(-) create mode 100644 src/basic-kokkos/CMakeLists.txt create mode 100644 src/basic-kokkos/DAXPY-Kokkos.cpp create mode 100644 src/basic-kokkos/IF_QUAD-Kokkos.cpp create mode 100644 src/basic-kokkos/INIT3-Kokkos.cpp create mode 100644 src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp create mode 100644 src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp create mode 100644 src/basic-kokkos/MULADDSUB-Kokkos.cpp create mode 100644 src/basic-kokkos/NESTED_INIT-Kokkos.cpp create mode 100644 src/basic-kokkos/PI_ATOMIC-Kokkos.cpp create mode 100644 src/basic-kokkos/REDUCE3_INT-Kokkos.cpp create mode 100644 src/basic-kokkos/TRAP_INT-Kokkos.cpp diff --git a/src/basic-kokkos/CMakeLists.txt b/src/basic-kokkos/CMakeLists.txt new file mode 100644 index 000000000..c859747c2 --- /dev/null +++ b/src/basic-kokkos/CMakeLists.txt @@ -0,0 +1,33 @@ +############################################################################### +# Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +# and RAJA Performance Suite project contributors. +# See the RAJAPerf/COPYRIGHT file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +############################################################################### + +include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/../basic) + +blt_add_library( + NAME basic-kokkos + SOURCES + PI_ATOMIC-Kokkos.cpp + DAXPY-Kokkos.cpp + IF_QUAD-Kokkos.cpp + INIT3-Kokkos.cpp + INIT_VIEW1D-Kokkos.cpp + INIT_VIEW1D_OFFSET-Kokkos.cpp + MULADDSUB-Kokkos.cpp + NESTED_INIT-Kokkos.cpp + REDUCE3_INT-Kokkos.cpp + TRAP_INT-Kokkos.cpp + DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} + ) + +# Diagnostics +message (STATUS "${RAJA_PERFSUITE_DEPENDS}") + +blt_print_target_properties(TARGET RAJA) + +get_source_file_property(blah ATOMIC_PI-Kokkos.cpp HIP_SOURCE_PROPERTY_FORMAT) +message (STATUS "DOGS1 - ${blah}") diff --git a/src/basic-kokkos/DAXPY-Kokkos.cpp b/src/basic-kokkos/DAXPY-Kokkos.cpp new file mode 100644 index 000000000..dd8294cb7 --- /dev/null +++ b/src/basic-kokkos/DAXPY-Kokkos.cpp @@ -0,0 +1,97 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DAXPY.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + +struct DaxpyFunctor { + Real_ptr x; + Real_ptr y; + Real_type a; + DaxpyFunctor(Real_ptr m_x, Real_ptr m_y, Real_type m_a) : x(m_x), y(m_y), a(m_a) { } + void operator()(Index_type i) const { DAXPY_BODY; } +}; + +void DAXPY::runKokkosVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + DAXPY_DATA_SETUP; + + // Declare KokkosViews for the pointers that will be wrapped. + // Get pointer names in the KERNEL_NAME.hpp file + // Wrap pointers x and y in separate KokkosViews + // This is a one dimension array + // One dimensional arrays are indexed to iend (RAJAPerfSuite convention) + // New template-based machinery in /rajaperf/src/common/RAJAPerfSuite.hpp + + auto x_view = getViewFromPointer(x, iend); + + auto y_view = getViewFromPointer(y, iend); + + + + auto daxpy_lam = [=](Index_type i) { + DAXPY_BODY; + }; + + + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda: { + Kokkos::fence(); + + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + Kokkos::parallel_for("DAXPY-Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + // Increment y_view (pointer wrapped in KokksView) + // by product of a and ith entry of x_view + // DAXPY_BODY substituted with the + // calculation defined in DAXPY.hpp + KOKKOS_LAMBDA(Index_type i) { y_view[i] += a * x_view[i];} + ); + } + // Kokkos fence + Kokkos::fence(); + + stopTimer(); + + break; + } + default : { + std::cout << "\n DAXPY : Unknown variant id = " << vid << std::endl; + } + + } + + // Move data (i.e., pointer, KokkosView-wrapped ponter) back to the host from the device + + moveDataToHostFromKokkosView(x, x_view, iend); + + moveDataToHostFromKokkosView(y, y_view, iend); + +#endif // RUN_KOKKOS +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic-kokkos/IF_QUAD-Kokkos.cpp b/src/basic-kokkos/IF_QUAD-Kokkos.cpp new file mode 100644 index 000000000..39f2f6dc2 --- /dev/null +++ b/src/basic-kokkos/IF_QUAD-Kokkos.cpp @@ -0,0 +1,99 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "IF_QUAD.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + +void IF_QUAD::runKokkosVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + IF_QUAD_DATA_SETUP; + + // Instantiating views using getViewFromPointer for the IF_QUAD definition + + auto a_view = getViewFromPointer(a, iend); + auto b_view = getViewFromPointer(b, iend); + auto c_view = getViewFromPointer(c, iend); + auto x1_view = getViewFromPointer(x1, iend); + auto x2_view = getViewFromPointer(x2, iend); + + + auto ifquad_lam = [=](Index_type i) { + IF_QUAD_BODY; + }; + + + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for("IF_QUAD_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA (Index_type i) { + + Real_type s = b_view[i]*b_view[i] - 4.0*a_view[i]*c_view[i]; + if ( s >= 0 ) { + s = sqrt(s); + x2_view[i] = (-b_view[i]+s)/(2.0*a_view[i]); + x1_view[i] = (-b_view[i]-s)/(2.0*a_view[i]); + } + else { + x2_view[i] = 0.0; + x1_view[i] = 0.0; + + } +}); + + } + + Kokkos::fence(); + stopTimer(); + + break; + + } + + default : { + std::cout << "\n IF_QUAD : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + moveDataToHostFromKokkosView(a, a_view, iend); + moveDataToHostFromKokkosView(b, b_view, iend); + moveDataToHostFromKokkosView(c, c_view, iend); + moveDataToHostFromKokkosView(x1, x1_view, iend); + moveDataToHostFromKokkosView(x2, x2_view, iend); + + +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic-kokkos/INIT3-Kokkos.cpp b/src/basic-kokkos/INIT3-Kokkos.cpp new file mode 100644 index 000000000..1ebaf83cc --- /dev/null +++ b/src/basic-kokkos/INIT3-Kokkos.cpp @@ -0,0 +1,90 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INIT3.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void INIT3::runKokkosVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + INIT3_DATA_SETUP; + + // Instantiating Views using getViewFromPointer for the INIT3 definition + // (i.e., INIT3.hpp) + + // The pointer is the first argument, and the last index, denoted by iend, is + // your second argument + // + auto out1_view = getViewFromPointer(out1, iend); + auto out2_view = getViewFromPointer(out2, iend); + auto out3_view = getViewFromPointer(out3, iend); + auto in1_view = getViewFromPointer(in1, iend); + auto in2_view = getViewFromPointer(in2, iend); + + // Next step, integrate the INIT3_BODY into the Kokkos parallel expression + + auto init3_lam = [=](Index_type i) { + INIT3_BODY; + }; + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + +// Nota bene -- Conversion of Raja code begins here + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // Kokkos translation of INIT3_BODY + Kokkos::parallel_for("INIT3-Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + //INIT3_BODY definition: + // out1[i] = out2[i] = out3[i] = - in1[i] - in2[i] ; + out1_view[i] = out2_view[i] = out3_view[i] = - in1_view[i] - in2_view[i]; + }); + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default : { + std::cout << "\n INIT3 : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + moveDataToHostFromKokkosView(out1, out1_view, iend); + moveDataToHostFromKokkosView(out2, out2_view, iend); + moveDataToHostFromKokkosView(out3, out3_view, iend); + moveDataToHostFromKokkosView(in1, in1_view, iend); + moveDataToHostFromKokkosView(in2, in2_view, iend); + +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp b/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp new file mode 100644 index 000000000..95702570e --- /dev/null +++ b/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp @@ -0,0 +1,74 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INIT_VIEW1D.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void INIT_VIEW1D::runKokkosVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + INIT_VIEW1D_DATA_SETUP; + + // Declare a Kokkos View that will be used to wrap a pointer + auto a_view = getViewFromPointer(a, iend); + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for("INIT_VIEW1D_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin,iend), + KOKKOS_LAMBDA (Index_type i) { + //INIT_VIEW1D_BODY_RAJA + //Instead, use the INIT_VIEW1D_BODY definition + //with Kokkos View + //a[i] = (i+1) * v; + a_view[i] = (i + 1) * v; + + }); + + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default : { + std::cout << "\n INIT_VIEW1D : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + moveDataToHostFromKokkosView(a, a_view, iend); + +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp b/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp new file mode 100644 index 000000000..bc2d9d955 --- /dev/null +++ b/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp @@ -0,0 +1,74 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "INIT_VIEW1D_OFFSET.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void INIT_VIEW1D_OFFSET::runKokkosVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 1; + const Index_type iend = getActualProblemSize()+1; + + INIT_VIEW1D_OFFSET_DATA_SETUP; + + auto a_view = getViewFromPointer(a, iend); + + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for("INIT_VIEW1D_OFFSET_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA (Index_type i) { + //INIT_VIEW1D_OFFSET_BODY_RAJA + //Instead, use the INIT_VIEW1D_OFFSET_BODY + //definition: + //a[i-ibegin] = i * v; + a_view[i-ibegin] = i * v; + }); + + + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default : { + std::cout << "\n INIT_VIEW1D_OFFSET : Unknown variant id = " << vid << std::endl; + } + + } + +#endif // RUN_KOKKOS + + // Move data from Kokkos View (on Device) back to Host + moveDataToHostFromKokkosView(a, a_view, iend); + +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic-kokkos/MULADDSUB-Kokkos.cpp b/src/basic-kokkos/MULADDSUB-Kokkos.cpp new file mode 100644 index 000000000..0caad2748 --- /dev/null +++ b/src/basic-kokkos/MULADDSUB-Kokkos.cpp @@ -0,0 +1,90 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "MULADDSUB.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + + +void MULADDSUB::runKokkosVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + MULADDSUB_DATA_SETUP; + + + // Define Kokkos Views that will wrap pointers defined in MULADDSUB.hpp + auto out1_view = getViewFromPointer(out1, iend); + auto out2_view = getViewFromPointer(out2, iend); + auto out3_view = getViewFromPointer(out3, iend); + auto in1_view = getViewFromPointer(in1, iend); + auto in2_view = getViewFromPointer(in2, iend); + + auto mas_lam = [=](Index_type i) { + MULADDSUB_BODY; + }; + + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // If SIMD really matters , consider using Kokkos SIMD + Kokkos::parallel_for("MULTISUB-KokkosSeq Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + //MULADDSUB_BODY definition: + //out1[i] = in1[i] * in2[i] ; + //out2[i] = in1[i] + in2[i] ; + //out3[i] = in1[i] - in2[i] ; + // WITH KOKKOS VIEWS + out1_view[i] = in1_view[i] * in2_view[i] ; + out2_view[i] = in1_view[i] + in2_view[i] ; + out3_view[i] = in1_view[i] - in2_view[i] ; + }); + + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default : { + std::cout << "\n MULADDSUB : Unknown variant id = " << vid << std::endl; + } + + } +#endif // RUN_KOKKOS + moveDataToHostFromKokkosView(out1, out1_view, iend); + moveDataToHostFromKokkosView(out2, out2_view, iend); + moveDataToHostFromKokkosView(out3, out3_view, iend); + moveDataToHostFromKokkosView(out3, out3_view, iend); + moveDataToHostFromKokkosView(in1, in1_view, iend); + moveDataToHostFromKokkosView(in2, in2_view, iend); + +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic-kokkos/NESTED_INIT-Kokkos.cpp b/src/basic-kokkos/NESTED_INIT-Kokkos.cpp new file mode 100644 index 000000000..6f58e34c4 --- /dev/null +++ b/src/basic-kokkos/NESTED_INIT-Kokkos.cpp @@ -0,0 +1,79 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "NESTED_INIT.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf { +namespace basic { + + +void NESTED_INIT::runKokkosVariant(VariantID vid) { + const Index_type run_reps = getRunReps(); + + NESTED_INIT_DATA_SETUP; + + // Wrap the nested init array pointer in a Kokkos View + // In a Kokkos View, array arguments for array boundaries go from outmost + // to innermost dimension sizes + // See the basic NESTED_INIT.hpp file for defnition of NESTED_INIT + + auto array_kokkos_view = getViewFromPointer(array, nk, nj, ni); + + auto nestedinit_lam = [=](Index_type i, Index_type j, Index_type k) { + NESTED_INIT_BODY; + }; + +#if defined RUN_KOKKOS + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // MDRange can be optimized + Kokkos::parallel_for("NESTED_INIT KokkosSeq", + // Range policy to define amount of work to be done + Kokkos::MDRangePolicy, + // Execution space + Kokkos::DefaultExecutionSpace>({0, 0, 0}, {nk, nj, ni}), + // Loop body + KOKKOS_LAMBDA(Index_type k, Index_type j, Index_type i) { + // #define NESTED_INIT_BODY + // array[i+ni*(j+nj*k)] = 0.00000001 * i * j * k ; + array_kokkos_view(k, j, i) = 0.00000001 * i * j * k; + }); + } + + Kokkos::fence(); + + stopTimer(); + // Moves mirror data from GPU to CPU (void, i.e., no return type). In + // this moving of data back to Host, the layout is changed back to Layout + // Right, vs. the LayoutLeft of the GPU + moveDataToHostFromKokkosView(array, array_kokkos_view, nk, nj, ni); + + break; + } + + default: { + std::cout << "\n NESTED_INIT : Unknown variant id = " << vid << std::endl; + } + } +#endif // RUN_KOKKOS +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp b/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp new file mode 100644 index 000000000..51a819951 --- /dev/null +++ b/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp @@ -0,0 +1,77 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "PI_ATOMIC.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf { +namespace basic { + +void PI_ATOMIC::runKokkosVariant(VariantID vid) { + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + PI_ATOMIC_DATA_SETUP; + + // Declare Kokkos View that will wrap the pointer defined in PI_ATOMIC.hpp + auto pi_view = getViewFromPointer(pi, 1); + +#if defined(RUN_KOKKOS) + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + // Initializing a value, pi, on the host + *pi = m_pi_init; + + pi_view = getViewFromPointer(pi, 1); + + Kokkos::parallel_for( + "PI_ATOMIC-Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + double x = (double(i) + 0.5) * dx; + // Make a reference to the 0th element of a 1D view with one + // element + // Atomic operation is an uninterruptable, single operation; e.g., + // addition, multiplication, division, etc. All of these atomic + // operations are architecture dependent. Atomics are advantageous + // from a correctness point of view + Kokkos::atomic_add(&pi_view(0), dx / (1.0 + x * x)); + }); + // Moving the data on the device (held in the KokkosView) BACK to the + // pointer, pi. + moveDataToHostFromKokkosView(pi, pi_view, 1); + *pi *= 4.0; + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + std::cout << "\n PI_ATOMIC : Unknown variant id = " << vid << std::endl; + } + } +#endif // RUN_KOKKOS + +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp b/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp new file mode 100644 index 000000000..4f340a919 --- /dev/null +++ b/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp @@ -0,0 +1,151 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "REDUCE3_INT.hpp" + +#include "RAJA/RAJA.hpp" + +#include +#include + +namespace rajaperf +{ +namespace basic +{ + + +void REDUCE3_INT::runKokkosVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + REDUCE3_INT_DATA_SETUP; + + //Declare KokkosView that will wrap the pointer to a vector + + auto vec_view = getViewFromPointer(vec, iend); + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Base_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Int_type vsum = m_vsum_init; + Int_type vmin = m_vmin_init; + Int_type vmax = m_vmax_init; + + for (Index_type i = ibegin; i < iend; ++i ) { + REDUCE3_INT_BODY; + } + + m_vsum += vsum; + m_vmin = RAJA_MIN(m_vmin, vmin); + m_vmax = RAJA_MAX(m_vmax, vmax); + + } + stopTimer(); + + break; + } + + case Lambda_Seq : { + + auto init3_base_lam = [=](Index_type i) -> Int_type { + return vec[i]; + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Int_type vsum = m_vsum_init; + Int_type vmin = m_vmin_init; + Int_type vmax = m_vmax_init; + + for (Index_type i = ibegin; i < iend; ++i ) { + vsum += init3_base_lam(i); + vmin = RAJA_MIN(vmin, init3_base_lam(i)); + vmax = RAJA_MAX(vmax, init3_base_lam(i)); + } + + m_vsum += vsum; + m_vmin = RAJA_MIN(m_vmin, vmin); + m_vmax = RAJA_MAX(m_vmax, vmax); + + } + stopTimer(); + + break; + } + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { +/* + RAJA::ReduceSum vsum(m_vsum_init); + RAJA::ReduceMin vmin(m_vmin_init); + RAJA::ReduceMax vmax(m_vmax_init); + + RAJA::forall( + RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { + REDUCE3_INT_BODY_RAJA; + }); + + m_vsum += static_cast(vsum.get()); + m_vmin = RAJA_MIN(m_vmin, static_cast(vmin.get())); + m_vmax = RAJA_MAX(m_vmax, static_cast(vmax.get())); +*/ + // These values are initilized elsewhere by RPS + // These variables were declared to Kokkos-ify the parallel_reduce + // construct: +#ifndef RAJA_ENABLE_TARGET_OPENMP + Int_type max_value = m_vmax_init; + Int_type min_value = m_vmin_init; + Int_type sum = m_vsum_init; + + + parallel_reduce("REDUCE3-Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(const int64_t i, Int_type& tl_max, Int_type& tl_min, Int_type& tl_sum){ + Int_type vec_i = vec_view[i]; + if (vec_i > tl_max) tl_max = vec_i; + if (vec_i < tl_min) tl_min = vec_i; + tl_sum += vec_i; + }, + Kokkos::Max(max_value), + Kokkos::Min(min_value), + sum); + m_vsum += static_cast(sum); + m_vmin = RAJA_MIN(m_vmin, static_cast(min_value)); + m_vmax = RAJA_MAX(m_vmax, static_cast(max_value)); +#endif + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default : { + std::cout << "\n REDUCE3_INT : Unknown variant id = " << vid << std::endl; + } + + } +#endif // RUN_KOKKOS + + moveDataToHostFromKokkosView(vec, vec_view, iend); +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic-kokkos/TRAP_INT-Kokkos.cpp b/src/basic-kokkos/TRAP_INT-Kokkos.cpp new file mode 100644 index 000000000..45e822015 --- /dev/null +++ b/src/basic-kokkos/TRAP_INT-Kokkos.cpp @@ -0,0 +1,143 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-20, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/COPYRIGHT file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "TRAP_INT.hpp" + +#include "RAJA/RAJA.hpp" + +#include + +namespace rajaperf +{ +namespace basic +{ + +// +// Function used in TRAP_INT loop. +// +RAJA_INLINE +// +KOKKOS_FUNCTION +Real_type trap_int_func(Real_type x, + Real_type y, + Real_type xp, + Real_type yp) +{ + Real_type denom = (x - xp)*(x - xp) + (y - yp)*(y - yp); + denom = 1.0/sqrt(denom); + return denom; +} + + +void TRAP_INT::runKokkosVariant(VariantID vid) +{ + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + TRAP_INT_DATA_SETUP; + +// Declare KokkosViews that will wrap a pointer - not relevant in this case +// ...? + + + +#if defined(RUN_KOKKOS) + + switch ( vid ) { + + case Base_Seq : { + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type sumx = m_sumx_init; + + for (Index_type i = ibegin; i < iend; ++i ) { + TRAP_INT_BODY; + } + + m_sumx += sumx * h; + + } + stopTimer(); + + break; + } + + case Lambda_Seq : { + + auto trapint_base_lam = [=](Index_type i) -> Real_type { + Real_type x = x0 + i*h; + return trap_int_func(x, y, xp, yp); + }; + + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Real_type sumx = m_sumx_init; + + for (Index_type i = ibegin; i < iend; ++i ) { + sumx += trapint_base_lam(i); + } + + m_sumx += sumx * h; + + } + stopTimer(); + + break; + } + + case Kokkos_Lambda : { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + +// RAJA::ReduceSum sumx(m_sumx_init); + +// RAJA::forall( +// RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { +// TRAP_INT_BODY; +// +// Begin Kokkos translation +// A RAJA reduce translates into a Kokkoss::parallel_reduce +// To perform the translation: + // Declare and initialize variables + // To perform a reduction, you need: 1) an initial value; 2) iterate + // over an iterable; 3) to be able to extract the result at the end of + // the reduction (in this case, trap_integral_val) + + Real_type trap_integral_val = m_sumx_init; + + Kokkos::parallel_reduce("TRAP_INT_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(const int64_t i, Real_type& sumx) {TRAP_INT_BODY}, + trap_integral_val + ); + + m_sumx += static_cast(trap_integral_val) * h; + + } + Kokkos::fence(); + stopTimer(); + + break; + } + + default : { + std::cout << "\n TRAP_INT : Unknown variant id = " << vid << std::endl; + } + + } +#endif //RUN_KOKKOS +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic/DAXPY.cpp b/src/basic/DAXPY.cpp index 6d6133eb6..69a5a152e 100644 --- a/src/basic/DAXPY.cpp +++ b/src/basic/DAXPY.cpp @@ -51,6 +51,8 @@ DAXPY::DAXPY(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } DAXPY::~DAXPY() diff --git a/src/basic/DAXPY.hpp b/src/basic/DAXPY.hpp index db8501e9f..82a6fd9ff 100644 --- a/src/basic/DAXPY.hpp +++ b/src/basic/DAXPY.hpp @@ -52,6 +52,7 @@ class DAXPY : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/DAXPY_ATOMIC.hpp b/src/basic/DAXPY_ATOMIC.hpp index 909939a45..dd52d777c 100644 --- a/src/basic/DAXPY_ATOMIC.hpp +++ b/src/basic/DAXPY_ATOMIC.hpp @@ -55,6 +55,7 @@ class DAXPY_ATOMIC : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/IF_QUAD.cpp b/src/basic/IF_QUAD.cpp index 69396d330..412f248b4 100644 --- a/src/basic/IF_QUAD.cpp +++ b/src/basic/IF_QUAD.cpp @@ -37,6 +37,10 @@ IF_QUAD::IF_QUAD(const RunParams& params) setUsesFeature(Forall); + + setVariantDefined( Kokkos_Lambda ); + + setVariantDefined( Base_Seq ); setVariantDefined( Lambda_Seq ); setVariantDefined( RAJA_Seq ); diff --git a/src/basic/IF_QUAD.hpp b/src/basic/IF_QUAD.hpp index 4d2a22c22..a03727a6c 100644 --- a/src/basic/IF_QUAD.hpp +++ b/src/basic/IF_QUAD.hpp @@ -69,6 +69,7 @@ class IF_QUAD : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/INDEXLIST.hpp b/src/basic/INDEXLIST.hpp index 0836d8197..f180b8072 100644 --- a/src/basic/INDEXLIST.hpp +++ b/src/basic/INDEXLIST.hpp @@ -60,6 +60,7 @@ class INDEXLIST : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/INDEXLIST_3LOOP.hpp b/src/basic/INDEXLIST_3LOOP.hpp index e19ee5508..d4ec35f76 100644 --- a/src/basic/INDEXLIST_3LOOP.hpp +++ b/src/basic/INDEXLIST_3LOOP.hpp @@ -71,6 +71,7 @@ class INDEXLIST_3LOOP : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/INIT3.cpp b/src/basic/INIT3.cpp index fc3fd024d..1de99cceb 100644 --- a/src/basic/INIT3.cpp +++ b/src/basic/INIT3.cpp @@ -36,11 +36,13 @@ INIT3::INIT3(const RunParams& params) setVariantDefined( Base_Seq ); setVariantDefined( Lambda_Seq ); setVariantDefined( RAJA_Seq ); + setVariantDefined( Kokkos_Lambda ); setVariantDefined( Base_OpenMP ); setVariantDefined( Lambda_OpenMP ); setVariantDefined( RAJA_OpenMP ); + setVariantDefined( Base_OpenMPTarget ); setVariantDefined( RAJA_OpenMPTarget ); @@ -48,6 +50,7 @@ INIT3::INIT3(const RunParams& params) setVariantDefined( Lambda_CUDA ); setVariantDefined( RAJA_CUDA ); + setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); diff --git a/src/basic/INIT3.hpp b/src/basic/INIT3.hpp index 44f3622de..9bd152c04 100644 --- a/src/basic/INIT3.hpp +++ b/src/basic/INIT3.hpp @@ -55,6 +55,7 @@ class INIT3 : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); @@ -63,6 +64,10 @@ class INIT3 : public KernelBase template < size_t block_size > void runHipVariantImpl(VariantID vid); + + + + private: static const size_t default_gpu_block_size = 256; using gpu_block_sizes_type = gpu_block_size::make_list_type; diff --git a/src/basic/INIT_VIEW1D.cpp b/src/basic/INIT_VIEW1D.cpp index bd752aa06..ff65af383 100644 --- a/src/basic/INIT_VIEW1D.cpp +++ b/src/basic/INIT_VIEW1D.cpp @@ -52,6 +52,11 @@ INIT_VIEW1D::INIT_VIEW1D(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + + + } INIT_VIEW1D::~INIT_VIEW1D() diff --git a/src/basic/INIT_VIEW1D.hpp b/src/basic/INIT_VIEW1D.hpp index b51d38b79..b5dfbf097 100644 --- a/src/basic/INIT_VIEW1D.hpp +++ b/src/basic/INIT_VIEW1D.hpp @@ -66,6 +66,7 @@ class INIT_VIEW1D : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/INIT_VIEW1D_OFFSET.cpp b/src/basic/INIT_VIEW1D_OFFSET.cpp index 165cd5544..0c6691338 100644 --- a/src/basic/INIT_VIEW1D_OFFSET.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET.cpp @@ -52,6 +52,11 @@ INIT_VIEW1D_OFFSET::INIT_VIEW1D_OFFSET(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + + + } INIT_VIEW1D_OFFSET::~INIT_VIEW1D_OFFSET() diff --git a/src/basic/INIT_VIEW1D_OFFSET.hpp b/src/basic/INIT_VIEW1D_OFFSET.hpp index be597496d..4cc3548c7 100644 --- a/src/basic/INIT_VIEW1D_OFFSET.hpp +++ b/src/basic/INIT_VIEW1D_OFFSET.hpp @@ -65,6 +65,7 @@ class INIT_VIEW1D_OFFSET : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/MAT_MAT_SHARED.cpp b/src/basic/MAT_MAT_SHARED.cpp index 98cd878ce..87a5fde1e 100644 --- a/src/basic/MAT_MAT_SHARED.cpp +++ b/src/basic/MAT_MAT_SHARED.cpp @@ -60,6 +60,8 @@ MAT_MAT_SHARED::MAT_MAT_SHARED(const RunParams ¶ms) setVariantDefined(Base_HIP); setVariantDefined(Lambda_HIP); setVariantDefined(RAJA_HIP); + + setVariantDefined(Kokkos_Lambda); } MAT_MAT_SHARED::~MAT_MAT_SHARED() {} diff --git a/src/basic/MAT_MAT_SHARED.hpp b/src/basic/MAT_MAT_SHARED.hpp index 095721c27..7013ca69c 100644 --- a/src/basic/MAT_MAT_SHARED.hpp +++ b/src/basic/MAT_MAT_SHARED.hpp @@ -139,6 +139,7 @@ class MAT_MAT_SHARED : public KernelBase { void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); @@ -147,6 +148,12 @@ class MAT_MAT_SHARED : public KernelBase { template < size_t block_size > void runHipVariantImpl(VariantID vid); + // { +// getCout() << "\n MAT_MAT_SHARED : Unknown Kokkos variant id = " << vid << std::endl; + //getCout() << "\n MAT_MAT_SHARED : Unknown Kokkos variant id and/or tune_idx = " << tune_idx << std::endl; + //getCout() << "\n MAT_MAT_SHARED : No Kokkos version yet " << std::endl; +// } + private: static const size_t default_gpu_block_size = TL_SZ * TL_SZ; using gpu_block_sizes_type = gpu_block_size::make_list_type; diff --git a/src/basic/MULADDSUB.cpp b/src/basic/MULADDSUB.cpp index d1c180b8e..ec06b061d 100644 --- a/src/basic/MULADDSUB.cpp +++ b/src/basic/MULADDSUB.cpp @@ -51,6 +51,12 @@ MULADDSUB::MULADDSUB(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + + + + } MULADDSUB::~MULADDSUB() diff --git a/src/basic/MULADDSUB.hpp b/src/basic/MULADDSUB.hpp index 30ad11a54..63d637073 100644 --- a/src/basic/MULADDSUB.hpp +++ b/src/basic/MULADDSUB.hpp @@ -58,6 +58,7 @@ class MULADDSUB : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/NESTED_INIT.cpp b/src/basic/NESTED_INIT.cpp index ef9550d97..fb15bef18 100644 --- a/src/basic/NESTED_INIT.cpp +++ b/src/basic/NESTED_INIT.cpp @@ -62,6 +62,10 @@ NESTED_INIT::NESTED_INIT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + + } NESTED_INIT::~NESTED_INIT() diff --git a/src/basic/NESTED_INIT.hpp b/src/basic/NESTED_INIT.hpp index 13da52cf2..6849c9a73 100644 --- a/src/basic/NESTED_INIT.hpp +++ b/src/basic/NESTED_INIT.hpp @@ -58,6 +58,7 @@ class NESTED_INIT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/PI_ATOMIC.cpp b/src/basic/PI_ATOMIC.cpp index 776883232..26230bb49 100644 --- a/src/basic/PI_ATOMIC.cpp +++ b/src/basic/PI_ATOMIC.cpp @@ -35,6 +35,10 @@ PI_ATOMIC::PI_ATOMIC(const RunParams& params) setUsesFeature(Forall); setUsesFeature(Atomic); + setVariantDefined( Kokkos_Lambda ); + + + setVariantDefined( Base_Seq ); setVariantDefined( Lambda_Seq ); setVariantDefined( RAJA_Seq ); diff --git a/src/basic/PI_ATOMIC.hpp b/src/basic/PI_ATOMIC.hpp index 10c674dda..36e5d97d1 100644 --- a/src/basic/PI_ATOMIC.hpp +++ b/src/basic/PI_ATOMIC.hpp @@ -27,7 +27,6 @@ Real_type dx = m_dx; \ Real_ptr pi = m_pi; - #include "common/KernelBase.hpp" namespace rajaperf @@ -54,6 +53,7 @@ class PI_ATOMIC : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); @@ -62,6 +62,8 @@ class PI_ATOMIC : public KernelBase template < size_t block_size > void runHipVariantImpl(VariantID vid); + + private: static const size_t default_gpu_block_size = 256; using gpu_block_sizes_type = gpu_block_size::make_list_type; diff --git a/src/basic/PI_REDUCE.cpp b/src/basic/PI_REDUCE.cpp index 16d0770ba..e8966729b 100644 --- a/src/basic/PI_REDUCE.cpp +++ b/src/basic/PI_REDUCE.cpp @@ -51,6 +51,8 @@ PI_REDUCE::PI_REDUCE(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } PI_REDUCE::~PI_REDUCE() diff --git a/src/basic/PI_REDUCE.hpp b/src/basic/PI_REDUCE.hpp index c7cc3258a..c5c2107fe 100644 --- a/src/basic/PI_REDUCE.hpp +++ b/src/basic/PI_REDUCE.hpp @@ -56,6 +56,7 @@ class PI_REDUCE : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); @@ -64,6 +65,10 @@ class PI_REDUCE : public KernelBase template < size_t block_size > void runHipVariantImpl(VariantID vid); +// { +// getCout() << "\n PI_REDUCE : Unknown Kokkos variant id = " << vid << std::endl; +// } + private: static const size_t default_gpu_block_size = 256; using gpu_block_sizes_type = gpu_block_size::make_list_type; diff --git a/src/basic/REDUCE3_INT.cpp b/src/basic/REDUCE3_INT.cpp index dee6d3a5e..afdba37b5 100644 --- a/src/basic/REDUCE3_INT.cpp +++ b/src/basic/REDUCE3_INT.cpp @@ -56,6 +56,11 @@ REDUCE3_INT::REDUCE3_INT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + + + } REDUCE3_INT::~REDUCE3_INT() diff --git a/src/basic/REDUCE3_INT.hpp b/src/basic/REDUCE3_INT.hpp index 93ad766c2..c84fa84b2 100644 --- a/src/basic/REDUCE3_INT.hpp +++ b/src/basic/REDUCE3_INT.hpp @@ -70,6 +70,7 @@ class REDUCE3_INT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/TRAP_INT.cpp b/src/basic/TRAP_INT.cpp index 3bf939f38..585a657f7 100644 --- a/src/basic/TRAP_INT.cpp +++ b/src/basic/TRAP_INT.cpp @@ -51,6 +51,11 @@ TRAP_INT::TRAP_INT(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); + + + } TRAP_INT::~TRAP_INT() diff --git a/src/basic/TRAP_INT.hpp b/src/basic/TRAP_INT.hpp index 50acfeb79..eff85b90e 100644 --- a/src/basic/TRAP_INT.hpp +++ b/src/basic/TRAP_INT.hpp @@ -67,6 +67,7 @@ class TRAP_INT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t tune_idx); void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/common/Executor.cpp b/src/common/Executor.cpp index f3ee040ff..ba8a88ed8 100644 --- a/src/common/Executor.cpp +++ b/src/common/Executor.cpp @@ -852,12 +852,23 @@ void Executor::runSuite() getCout() << "\n\nRun warmup kernels...\n"; vector warmup_kernels; - + // TODO: Amy, check this warmup_kernels.push_back(makeKernel()); warmup_kernels.push_back(makeKernel()); + #ifndef RUN_KOKKOS warmup_kernels.push_back(makeKernel()); + #endif + #ifndef RUN_KOKKOS warmup_kernels.push_back(makeKernel()); + #endif + #ifndef RUN_KOKKOS warmup_kernels.push_back(makeKernel()); + #endif +// warmup_kernels.push_back(new basic::DAXPY(run_params)); +// warmup_kernels.push_back(new basic::REDUCE3_INT(run_params)); +//#ifndef RUN_KOKKOS +// warmup_kernels.push_back(new algorithm::SORT(run_params)); +//#endif for (size_t ik = 0; ik < warmup_kernels.size(); ++ik) { KernelBase* warmup_kernel = warmup_kernels[ik]; diff --git a/src/common/KernelBase.cpp b/src/common/KernelBase.cpp index a07a6bbbb..a949a45d8 100644 --- a/src/common/KernelBase.cpp +++ b/src/common/KernelBase.cpp @@ -238,6 +238,12 @@ void KernelBase::runKernel(VariantID vid, size_t tune_idx) #endif break; } + case Kokkos_Lambda : + { +#if defined(RUN_KOKKOS) + runKokkosVariant(vid); +#endif + } default : { #if 0 diff --git a/src/common/KernelBase.hpp b/src/common/KernelBase.hpp index 8d74d6e05..0ace216d1 100644 --- a/src/common/KernelBase.hpp +++ b/src/common/KernelBase.hpp @@ -230,6 +230,9 @@ class KernelBase #if defined(RAJA_ENABLE_TARGET_OPENMP) virtual void runOpenMPTargetVariant(VariantID vid, size_t tune_idx) = 0; #endif +#if defined(RUN_KOKKOS) + virtual void runKokkosVariant(VariantID vid) = 0; +#endif protected: const RunParams& run_params; diff --git a/src/common/RAJAPerfSuite.cpp b/src/common/RAJAPerfSuite.cpp index 9f66f4bf3..2f81890b2 100644 --- a/src/common/RAJAPerfSuite.cpp +++ b/src/common/RAJAPerfSuite.cpp @@ -25,7 +25,9 @@ #include "basic/INIT3.hpp" #include "basic/INIT_VIEW1D.hpp" #include "basic/INIT_VIEW1D_OFFSET.hpp" +#ifndef RUN_KOKKOS #include "basic/MAT_MAT_SHARED.hpp" +#endif #include "basic/MULADDSUB.hpp" #include "basic/NESTED_INIT.hpp" #include "basic/PI_ATOMIC.hpp" @@ -159,7 +161,9 @@ static const std::string KernelNames [] = std::string("Basic_INIT3"), std::string("Basic_INIT_VIEW1D"), std::string("Basic_INIT_VIEW1D_OFFSET"), + #ifndef RUN_KOKKOS std::string("Basic_MAT_MAT_SHARED"), + #endif std::string("Basic_MULADDSUB"), std::string("Basic_NESTED_INIT"), std::string("Basic_PI_ATOMIC"), @@ -274,6 +278,8 @@ static const std::string VariantNames [] = std::string("Lambda_HIP"), std::string("RAJA_HIP"), + std::string("Kokkos_Lambda"), + std::string("Unknown Variant") // Keep this at the end and DO NOT remove.... }; // END VariantNames @@ -418,6 +424,12 @@ bool isVariantAvailable(VariantID vid) } #endif +#if defined(RUN_KOKKOS) + if (vid == Kokkos_Lambda) { + ret_val = true; + } +#endif + return ret_val; } @@ -570,6 +582,7 @@ KernelBase* getKernelObject(KernelID kid, break; } +#ifndef RUN_KOKKOS // // Lcals kernels... // @@ -778,6 +791,8 @@ KernelBase* getKernelObject(KernelID kid, break; } +#endif + default: { getCout() << "\n Unknown Kernel ID = " << kid << std::endl; } diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index 61a6f3bef..d0466a559 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -16,6 +16,10 @@ #include "RAJA/config.hpp" #include "rajaperf_config.hpp" +#if defined(RUN_KOKKOS) +#include "Kokkos_Core.hpp" +#endif // RUN_KOKKOS + #include #include @@ -86,7 +90,9 @@ enum KernelID { Basic_REDUCE3_INT, Basic_REDUCE_STRUCT, Basic_TRAP_INT, - +#ifdef RUN_KOKKOS // move this up to the point implemented with Kokkos + NumKernels, +#endif // // Lcals kernels... // @@ -154,7 +160,11 @@ enum KernelID { Algorithm_SORTPAIRS, Algorithm_REDUCE_SUM, +#ifndef RUN_KOKKOS NumKernels // Keep this one last and NEVER comment out (!!) +#else + KokkosDummy +#endif }; @@ -192,6 +202,8 @@ enum VariantID { Lambda_HIP, RAJA_HIP, + Kokkos_Lambda, + NumVariants // Keep this one last and NEVER comment out (!!) }; @@ -348,6 +360,95 @@ std::ostream& getNullStream(); template < typename... Ts > inline void ignore_unused(Ts&&...) { } +#if defined(RUN_KOKKOS) +template +struct PointerOfNdimensions; + +template +struct PointerOfNdimensions { + using type = PointedAt; +}; + +template +struct PointerOfNdimensions { + using type = + typename PointerOfNdimensions::type *; +}; + +// This templated function is used to wrap pointers +// (declared and defined in RAJAPerf Suite kernels) in Kokkos Views +// +template +auto getViewFromPointer(PointedAt *kokkos_ptr, Boundaries... boundaries) + -> typename Kokkos::View< + typename PointerOfNdimensions::type, + typename Kokkos::DefaultExecutionSpace::memory_space> + +{ + + using host_view_type = typename Kokkos::View< + typename PointerOfNdimensions::type, + typename Kokkos::DefaultHostExecutionSpace::memory_space>; + + using device_view_type = typename Kokkos::View< + typename PointerOfNdimensions::type, + typename Kokkos::DefaultExecutionSpace::memory_space>; + + + using mirror_view_type = typename device_view_type::HostMirror; + + + host_view_type pointer_holder(kokkos_ptr, boundaries...); + + // The boundaries parameter pack contains the array dimenions; + // An allocation is implicitly made here + device_view_type device_data_copy("StringName", boundaries...); + + mirror_view_type cpu_to_gpu_mirror = + Kokkos::create_mirror_view(device_data_copy); + + + Kokkos::deep_copy(cpu_to_gpu_mirror, pointer_holder); + + Kokkos::deep_copy(device_data_copy, cpu_to_gpu_mirror); + + // Kokkos::View return type + + return device_data_copy; +} + +// This function will move data in a Kokkos::View back to host from device, +// and will be stored in the existing pointer(s) +template +void moveDataToHostFromKokkosView(PointedAt *kokkos_ptr, ExistingView my_view, + Boundaries... boundaries) +{ + + using host_view_type = typename Kokkos::View< + typename PointerOfNdimensions::type, + typename Kokkos::DefaultHostExecutionSpace::memory_space>; + + using device_view_type = typename Kokkos::View< + typename PointerOfNdimensions::type, + typename Kokkos::DefaultExecutionSpace::memory_space>; + + using mirror_view_type = typename device_view_type::HostMirror; + + + host_view_type pointer_holder(kokkos_ptr, boundaries...); + + // Layout is optimal for gpu, but data are actually located on CPU + mirror_view_type cpu_to_gpu_mirror = Kokkos::create_mirror_view(my_view); + + // Actual copying of the data from the gpu (my_view) back to the cpu + Kokkos::deep_copy(cpu_to_gpu_mirror, my_view); + + // This copies from the mirror on the host cpu back to the existing + // pointer(s) + Kokkos::deep_copy(pointer_holder, cpu_to_gpu_mirror); +} + +#endif // RUN_KOKKOS } // closing brace for rajaperf namespace #endif // closing endif for header file include guard From a8b4fc5a526799fc96f19d7ad65cd244091e81c4 Mon Sep 17 00:00:00 2001 From: Amy Powell Date: Thu, 13 Jan 2022 10:14:32 -0700 Subject: [PATCH 04/28] Clean up basic kernels and CMakeLists.txt --- CMakeLists.txt | 36 +++++++++++++++++--------------- src/basic/IF_QUAD.cpp | 6 ++---- src/basic/INIT3.cpp | 5 ++--- src/basic/INIT3.hpp | 1 - src/basic/INIT_VIEW1D.cpp | 3 --- src/basic/INIT_VIEW1D_OFFSET.cpp | 3 --- src/basic/MAT_MAT_SHARED.hpp | 6 +++--- src/basic/MULADDSUB.cpp | 4 ---- src/basic/NESTED_INIT.cpp | 2 -- src/basic/PI_ATOMIC.cpp | 2 ++ src/basic/PI_REDUCE.hpp | 6 +++--- src/basic/REDUCE3_INT.cpp | 3 --- src/basic/TRAP_INT.cpp | 3 --- 13 files changed, 31 insertions(+), 49 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b39d4653b..c5fd9f818 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,6 +52,7 @@ cmake_dependent_option(RAJA_PERFSUITE_ENABLE_OPENMP5_SCAN "Build OpenMP scan var # # Define RAJA settings... +# set(RAJA_ENABLE_TESTS Off CACHE BOOL "") set(RAJA_ENABLE_EXAMPLES Off CACHE BOOL "") @@ -96,6 +97,9 @@ set(RAJA_PERFSUITE_VERSION_PATCHLEVEL 0) set(RAJA_PERFSUITE_DEPENDS RAJA) +if (RAJA_PERFSUITE_ENABLE_MPI) + list(APPEND RAJA_PERFSUITE_DEPENDS mpi) +endif() if (ENABLE_OPENMP) list(APPEND RAJA_PERFSUITE_DEPENDS openmp) endif() @@ -104,7 +108,7 @@ if (ENABLE_CUDA) endif() # Kokkos requires hipcc as the CMAKE_CXX_COMPILER for HIP AMD/VEGA GPU -# platforms, whereas RAJAPerf Suite uses blt/CMake FindHIP to set HIP compiler +# platforms, whereas RAJAPerf Suite uses blt/CMake FindHIP to set HIP compiler. # Separate RAJAPerf Suite and Kokkos handling of HIP compilers if ((ENABLE_HIP) AND (NOT ENABLE_KOKKOS)) @@ -122,10 +126,12 @@ set(RAJAPERF_BUILD_HOST $ENV{HOSTNAME}) if (ENABLE_CUDA) if (ENABLE_KOKKOS) set(CMAKE_CUDA_STANDARD 17) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -restrict --extended-lambda --expt-relaxed-constexpr") else() set(CMAKE_CUDA_STANDARD 14) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -restrict -arch ${CUDA_ARCH} --expt-extended-lambda --expt-relaxed-constexpr") endif() - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -restrict -arch ${CUDA_ARCH} --expt-extended-lambda --expt-relaxed-constexpr") + set(RAJAPERF_COMPILER "${CUDA_NVCC_EXECUTABLE}") list(APPEND RAJAPERF_COMPILER ${CMAKE_CXX_COMPILER}) set(RAJAPERF_COMPILER_OPTIONS "${CUDA_NVCC_FLAGS}") @@ -145,44 +151,41 @@ configure_file(${CMAKE_SOURCE_DIR}/src/rajaperf_config.hpp.in include_directories($) -# Make sure RAJA flags propagate (we need to do some tidying to +# Make sure RAJA flags propagate (we need to do some tidying to # remove project-specific CMake variables that are no longer needed) set (CUDA_NVCC_FLAGS ${RAJA_NVCC_FLAGS}) +# +# Each directory in the perf suite has its own CMakeLists.txt file. +# + # The statement below is required for Kokkos compilation. if(ENABLE_KOKKOS) include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/tpl/RAJA/include/) endif() - # ENABLE_KOKKOS is A RAJAPerf Suite Option if(ENABLE_KOKKOS) add_definitions(-DRUN_KOKKOS) if(ENABLE_HIP) set(Kokkos_ENABLE_HIP ON CACHE BOOL "Kokkos builds for AMD HIP set the -Kokkos_ENABLE_HIP variable to ON") - #set(Kokkos_ARCH_VEGA900 ON CACHE BOOL "Docstring") #TODO: better +Kokkos_ENABLE_HIP variable to ON") endif() + if(ENABLE_TARGET_OPENMP) - set(Kokkos_ENABLE_OPENMPTARGET ON CACHE BOOL "Docstring") - set(Kokkos_ARCH_VOLTA70 ON CACHE BOOL "Docstring") #TODO: better - set(CMAKE_CXX_STANDARD 17) - set(BLT_CXX_STANDARD 17) + set(Kokkos_ENABLE_OPENMPTARGET ON CACHE BOOL "Docstring") set(RAJA_ENABLE_TARGET_OPENMP ON CACHE BOOL "Docstring") if(NOT CMAKE_BUILD_TYPE MATCHES Debug) if(NOT EXPERIMENTAL_BUILD) - message(FATAL_ERROR "Kokkos builds with OpenMPTarget require a Debug build to succeed at the moment. Rebuild with CMAKE_BUILD_TYPE=Debug. If you're a compiler developer, rebuild with -DEXPERIMENTAL_BUILD=ON") + message(FATAL_ERROR "Kokkos builds with OpenMPTarget require a Debug build to succeed at the moment. Rebuild with CMAKE_BUILD_TYPE=Debug. If you're a compiler developer, rebuild with -DEXPERIMENTAL_BUILD=ON") endif() endif() - - #add_definitions(-DRAJA_ENABLE_TARGET_OPENMP) endif() # ENABLE_CUDA IS A RAJA PERFSUITE OPTION if(ENABLE_CUDA) - set(Kokkos_ENABLE_CUDA ON CACHE BOOL "Docstring") - set(Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL "Docstring") - set(Kokkos_ARCH_VOLTA70 ON CACHE BOOL "Docstring") #TODO: better + set(Kokkos_ENABLE_CUDA ON CACHE BOOL "Docstring") + set(Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL "Docstring") enable_language(CUDA) endif() if(ENABLE_OPENMP) @@ -192,7 +195,6 @@ Kokkos_ENABLE_HIP variable to ON") add_subdirectory(tpl/kokkos) get_property(KOKKOS_INCLUDE_DIRS DIRECTORY tpl/kokkos PROPERTY INCLUDE_DIRECTORIES) include_directories(${KOKKOS_INCLUDE_DIRS}) - list(APPEND RAJA_PERFSUITE_DEPENDS kokkos) endif() diff --git a/src/basic/IF_QUAD.cpp b/src/basic/IF_QUAD.cpp index 412f248b4..4a8d60035 100644 --- a/src/basic/IF_QUAD.cpp +++ b/src/basic/IF_QUAD.cpp @@ -37,10 +37,6 @@ IF_QUAD::IF_QUAD(const RunParams& params) setUsesFeature(Forall); - - setVariantDefined( Kokkos_Lambda ); - - setVariantDefined( Base_Seq ); setVariantDefined( Lambda_Seq ); setVariantDefined( RAJA_Seq ); @@ -59,6 +55,8 @@ IF_QUAD::IF_QUAD(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } IF_QUAD::~IF_QUAD() diff --git a/src/basic/INIT3.cpp b/src/basic/INIT3.cpp index 1de99cceb..a504fa914 100644 --- a/src/basic/INIT3.cpp +++ b/src/basic/INIT3.cpp @@ -36,13 +36,11 @@ INIT3::INIT3(const RunParams& params) setVariantDefined( Base_Seq ); setVariantDefined( Lambda_Seq ); setVariantDefined( RAJA_Seq ); - setVariantDefined( Kokkos_Lambda ); setVariantDefined( Base_OpenMP ); setVariantDefined( Lambda_OpenMP ); setVariantDefined( RAJA_OpenMP ); - setVariantDefined( Base_OpenMPTarget ); setVariantDefined( RAJA_OpenMPTarget ); @@ -50,10 +48,11 @@ INIT3::INIT3(const RunParams& params) setVariantDefined( Lambda_CUDA ); setVariantDefined( RAJA_CUDA ); - setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } INIT3::~INIT3() diff --git a/src/basic/INIT3.hpp b/src/basic/INIT3.hpp index 9bd152c04..4358daf00 100644 --- a/src/basic/INIT3.hpp +++ b/src/basic/INIT3.hpp @@ -66,7 +66,6 @@ class INIT3 : public KernelBase - private: static const size_t default_gpu_block_size = 256; diff --git a/src/basic/INIT_VIEW1D.cpp b/src/basic/INIT_VIEW1D.cpp index ff65af383..2cb2b2376 100644 --- a/src/basic/INIT_VIEW1D.cpp +++ b/src/basic/INIT_VIEW1D.cpp @@ -54,9 +54,6 @@ INIT_VIEW1D::INIT_VIEW1D(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Kokkos_Lambda ); - - - } INIT_VIEW1D::~INIT_VIEW1D() diff --git a/src/basic/INIT_VIEW1D_OFFSET.cpp b/src/basic/INIT_VIEW1D_OFFSET.cpp index 0c6691338..f31395b07 100644 --- a/src/basic/INIT_VIEW1D_OFFSET.cpp +++ b/src/basic/INIT_VIEW1D_OFFSET.cpp @@ -54,9 +54,6 @@ INIT_VIEW1D_OFFSET::INIT_VIEW1D_OFFSET(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Kokkos_Lambda ); - - - } INIT_VIEW1D_OFFSET::~INIT_VIEW1D_OFFSET() diff --git a/src/basic/MAT_MAT_SHARED.hpp b/src/basic/MAT_MAT_SHARED.hpp index 7013ca69c..8c046e47d 100644 --- a/src/basic/MAT_MAT_SHARED.hpp +++ b/src/basic/MAT_MAT_SHARED.hpp @@ -148,11 +148,11 @@ class MAT_MAT_SHARED : public KernelBase { template < size_t block_size > void runHipVariantImpl(VariantID vid); - // { -// getCout() << "\n MAT_MAT_SHARED : Unknown Kokkos variant id = " << vid << std::endl; + { + getCout() << "\n MAT_MAT_SHARED : Unknown Kokkos variant id = " << vid << std::endl; //getCout() << "\n MAT_MAT_SHARED : Unknown Kokkos variant id and/or tune_idx = " << tune_idx << std::endl; //getCout() << "\n MAT_MAT_SHARED : No Kokkos version yet " << std::endl; -// } + } private: static const size_t default_gpu_block_size = TL_SZ * TL_SZ; diff --git a/src/basic/MULADDSUB.cpp b/src/basic/MULADDSUB.cpp index ec06b061d..1d4981ca2 100644 --- a/src/basic/MULADDSUB.cpp +++ b/src/basic/MULADDSUB.cpp @@ -53,10 +53,6 @@ MULADDSUB::MULADDSUB(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Kokkos_Lambda ); - - - - } MULADDSUB::~MULADDSUB() diff --git a/src/basic/NESTED_INIT.cpp b/src/basic/NESTED_INIT.cpp index fb15bef18..30cbd0254 100644 --- a/src/basic/NESTED_INIT.cpp +++ b/src/basic/NESTED_INIT.cpp @@ -64,8 +64,6 @@ NESTED_INIT::NESTED_INIT(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Kokkos_Lambda ); - - } NESTED_INIT::~NESTED_INIT() diff --git a/src/basic/PI_ATOMIC.cpp b/src/basic/PI_ATOMIC.cpp index 26230bb49..7e5b075c6 100644 --- a/src/basic/PI_ATOMIC.cpp +++ b/src/basic/PI_ATOMIC.cpp @@ -57,6 +57,8 @@ PI_ATOMIC::PI_ATOMIC(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( Lambda_HIP ); setVariantDefined( RAJA_HIP ); + + setVariantDefined( Kokkos_Lambda ); } PI_ATOMIC::~PI_ATOMIC() diff --git a/src/basic/PI_REDUCE.hpp b/src/basic/PI_REDUCE.hpp index c5c2107fe..89daac956 100644 --- a/src/basic/PI_REDUCE.hpp +++ b/src/basic/PI_REDUCE.hpp @@ -65,9 +65,9 @@ class PI_REDUCE : public KernelBase template < size_t block_size > void runHipVariantImpl(VariantID vid); -// { -// getCout() << "\n PI_REDUCE : Unknown Kokkos variant id = " << vid << std::endl; -// } + { + getCout() << "\n PI_REDUCE : Unknown Kokkos variant id = " << vid << std::endl; + } private: static const size_t default_gpu_block_size = 256; diff --git a/src/basic/REDUCE3_INT.cpp b/src/basic/REDUCE3_INT.cpp index afdba37b5..941d85ac1 100644 --- a/src/basic/REDUCE3_INT.cpp +++ b/src/basic/REDUCE3_INT.cpp @@ -58,9 +58,6 @@ REDUCE3_INT::REDUCE3_INT(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Kokkos_Lambda ); - - - } REDUCE3_INT::~REDUCE3_INT() diff --git a/src/basic/TRAP_INT.cpp b/src/basic/TRAP_INT.cpp index 585a657f7..63da29799 100644 --- a/src/basic/TRAP_INT.cpp +++ b/src/basic/TRAP_INT.cpp @@ -53,9 +53,6 @@ TRAP_INT::TRAP_INT(const RunParams& params) setVariantDefined( RAJA_HIP ); setVariantDefined( Kokkos_Lambda ); - - - } TRAP_INT::~TRAP_INT() From 42f1181da8087af1d33fb8d170225c0dd86ec05a Mon Sep 17 00:00:00 2001 From: Amy Powell Date: Tue, 1 Feb 2022 17:21:02 -0700 Subject: [PATCH 05/28] Addressing PR comments before merge with RPS develop --- CMakeLists.txt | 6 - src/RAJAPerfSuiteDriver.cpp | 6 +- src/basic-kokkos/CMakeLists.txt | 8 - src/basic-kokkos/DAXPY-Kokkos.cpp | 80 +++----- src/basic-kokkos/IF_QUAD-Kokkos.cpp | 104 ++++------- src/basic-kokkos/INIT3-Kokkos.cpp | 85 ++++----- src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp | 62 +++---- .../INIT_VIEW1D_OFFSET-Kokkos.cpp | 60 +++--- src/basic-kokkos/MULADDSUB-Kokkos.cpp | 76 +++----- src/basic-kokkos/NESTED_INIT-Kokkos.cpp | 43 ++--- src/basic-kokkos/PI_ATOMIC-Kokkos.cpp | 12 +- src/basic-kokkos/REDUCE3_INT-Kokkos.cpp | 174 ++++++------------ src/basic-kokkos/TRAP_INT-Kokkos.cpp | 132 +++---------- src/common/KokkosViewUtils.hpp | 103 +++++++++++ src/common/RAJAPerfSuite.hpp | 56 +++--- 15 files changed, 403 insertions(+), 604 deletions(-) create mode 100644 src/common/KokkosViewUtils.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index c5fd9f818..c64fee113 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -157,12 +157,6 @@ set (CUDA_NVCC_FLAGS ${RAJA_NVCC_FLAGS}) # # Each directory in the perf suite has its own CMakeLists.txt file. -# - -# The statement below is required for Kokkos compilation. -if(ENABLE_KOKKOS) - include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/tpl/RAJA/include/) -endif() # ENABLE_KOKKOS is A RAJAPerf Suite Option if(ENABLE_KOKKOS) diff --git a/src/RAJAPerfSuiteDriver.cpp b/src/RAJAPerfSuiteDriver.cpp index 17e64f0f0..5d0680482 100644 --- a/src/RAJAPerfSuiteDriver.cpp +++ b/src/RAJAPerfSuiteDriver.cpp @@ -19,14 +19,14 @@ #endif //------------------------------------------------------------------------------ -int main( int argc, char** argv ) -{ +int main(int argc, char **argv) { #ifdef RAJA_PERFSUITE_ENABLE_MPI MPI_Init(&argc, &argv); int num_ranks; MPI_Comm_size(MPI_COMM_WORLD, &num_ranks); - rajaperf::getCout() << "\n\nRunning with " << num_ranks << " MPI ranks..." << std::endl; + rajaperf::getCout() << "\n\nRunning with " << num_ranks << " MPI ranks..." + << std::endl; #endif #ifdef RUN_KOKKOS Kokkos::initialize(argc, argv); diff --git a/src/basic-kokkos/CMakeLists.txt b/src/basic-kokkos/CMakeLists.txt index c859747c2..02e14cfc4 100644 --- a/src/basic-kokkos/CMakeLists.txt +++ b/src/basic-kokkos/CMakeLists.txt @@ -23,11 +23,3 @@ blt_add_library( TRAP_INT-Kokkos.cpp DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} ) - -# Diagnostics -message (STATUS "${RAJA_PERFSUITE_DEPENDS}") - -blt_print_target_properties(TARGET RAJA) - -get_source_file_property(blah ATOMIC_PI-Kokkos.cpp HIP_SOURCE_PROPERTY_FORMAT) -message (STATUS "DOGS1 - ${blah}") diff --git a/src/basic-kokkos/DAXPY-Kokkos.cpp b/src/basic-kokkos/DAXPY-Kokkos.cpp index dd8294cb7..eb2dac484 100644 --- a/src/basic-kokkos/DAXPY-Kokkos.cpp +++ b/src/basic-kokkos/DAXPY-Kokkos.cpp @@ -7,90 +7,60 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// #include "DAXPY.hpp" - -#include "RAJA/RAJA.hpp" - +#include "common/KokkosViewUtils.hpp" #include -namespace rajaperf -{ -namespace basic -{ +namespace rajaperf { +namespace basic { struct DaxpyFunctor { Real_ptr x; Real_ptr y; Real_type a; - DaxpyFunctor(Real_ptr m_x, Real_ptr m_y, Real_type m_a) : x(m_x), y(m_y), a(m_a) { } + DaxpyFunctor(Real_ptr m_x, Real_ptr m_y, Real_type m_a) + : x(m_x), y(m_y), a(m_a) {} void operator()(Index_type i) const { DAXPY_BODY; } }; -void DAXPY::runKokkosVariant(VariantID vid) -{ +void DAXPY::runKokkosVariant(VariantID vid) { const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); DAXPY_DATA_SETUP; - // Declare KokkosViews for the pointers that will be wrapped. - // Get pointer names in the KERNEL_NAME.hpp file - // Wrap pointers x and y in separate KokkosViews - // This is a one dimension array - // One dimensional arrays are indexed to iend (RAJAPerfSuite convention) - // New template-based machinery in /rajaperf/src/common/RAJAPerfSuite.hpp - auto x_view = getViewFromPointer(x, iend); - auto y_view = getViewFromPointer(y, iend); + switch (vid) { + case Kokkos_Lambda: { - auto daxpy_lam = [=](Index_type i) { - DAXPY_BODY; - }; - - - -#if defined(RUN_KOKKOS) - - switch ( vid ) { + Kokkos::fence(); + startTimer(); - case Kokkos_Lambda: { - Kokkos::fence(); - - startTimer(); - - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - Kokkos::parallel_for("DAXPY-Kokkos Kokkos_Lambda", - Kokkos::RangePolicy(ibegin, iend), - // Increment y_view (pointer wrapped in KokksView) - // by product of a and ith entry of x_view - // DAXPY_BODY substituted with the - // calculation defined in DAXPY.hpp - KOKKOS_LAMBDA(Index_type i) { y_view[i] += a * x_view[i];} - ); - } - // Kokkos fence - Kokkos::fence(); - - stopTimer(); - - break; - } - default : { - std::cout << "\n DAXPY : Unknown variant id = " << vid << std::endl; + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + Kokkos::parallel_for( + "DAXPY-Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { y_view[i] += a * x_view[i]; }); } + Kokkos::fence(); + stopTimer(); + + break; + } + default: { + std::cout << "\n DAXPY : Unknown variant id = " << vid << std::endl; + } } - // Move data (i.e., pointer, KokkosView-wrapped ponter) back to the host from the device + // Move data (i.e., pointer, KokkosView-wrapped ponter) back to the host from + // the device moveDataToHostFromKokkosView(x, x_view, iend); - moveDataToHostFromKokkosView(y, y_view, iend); - -#endif // RUN_KOKKOS } } // end namespace basic diff --git a/src/basic-kokkos/IF_QUAD-Kokkos.cpp b/src/basic-kokkos/IF_QUAD-Kokkos.cpp index 39f2f6dc2..9eb994a45 100644 --- a/src/basic-kokkos/IF_QUAD-Kokkos.cpp +++ b/src/basic-kokkos/IF_QUAD-Kokkos.cpp @@ -7,18 +7,13 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// #include "IF_QUAD.hpp" - -#include "RAJA/RAJA.hpp" - +#include "common/KokkosViewUtils.hpp" #include -namespace rajaperf -{ -namespace basic -{ +namespace rajaperf { +namespace basic { -void IF_QUAD::runKokkosVariant(VariantID vid) -{ +void IF_QUAD::runKokkosVariant(VariantID vid) { const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); @@ -27,72 +22,53 @@ void IF_QUAD::runKokkosVariant(VariantID vid) // Instantiating views using getViewFromPointer for the IF_QUAD definition - auto a_view = getViewFromPointer(a, iend); - auto b_view = getViewFromPointer(b, iend); - auto c_view = getViewFromPointer(c, iend); - auto x1_view = getViewFromPointer(x1, iend); - auto x2_view = getViewFromPointer(x2, iend); - - - auto ifquad_lam = [=](Index_type i) { - IF_QUAD_BODY; - }; - - - -#if defined(RUN_KOKKOS) - - switch ( vid ) { - - - case Kokkos_Lambda : { - - Kokkos::fence(); - startTimer(); - - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - Kokkos::parallel_for("IF_QUAD_Kokkos Kokkos_Lambda", - Kokkos::RangePolicy(ibegin, iend), - KOKKOS_LAMBDA (Index_type i) { - - Real_type s = b_view[i]*b_view[i] - 4.0*a_view[i]*c_view[i]; - if ( s >= 0 ) { - s = sqrt(s); - x2_view[i] = (-b_view[i]+s)/(2.0*a_view[i]); - x1_view[i] = (-b_view[i]-s)/(2.0*a_view[i]); - } - else { - x2_view[i] = 0.0; - x1_view[i] = 0.0; - - } -}); - - } - - Kokkos::fence(); - stopTimer(); - - break; - + auto a_view = getViewFromPointer(a, iend); + auto b_view = getViewFromPointer(b, iend); + auto c_view = getViewFromPointer(c, iend); + auto x1_view = getViewFromPointer(x1, iend); + auto x2_view = getViewFromPointer(x2, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for( + "IF_QUAD_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + Real_type s = b_view[i] * b_view[i] - 4.0 * a_view[i] * c_view[i]; + if (s >= 0) { + s = sqrt(s); + x2_view[i] = (-b_view[i] + s) / (2.0 * a_view[i]); + x1_view[i] = (-b_view[i] - s) / (2.0 * a_view[i]); + } else { + x2_view[i] = 0.0; + x1_view[i] = 0.0; + } + }); } - default : { - std::cout << "\n IF_QUAD : Unknown variant id = " << vid << std::endl; - } + Kokkos::fence(); + stopTimer(); + break; } -#endif // RUN_KOKKOS + default: { + std::cout << "\n IF_QUAD : Unknown variant id = " << vid << std::endl; + } + } moveDataToHostFromKokkosView(a, a_view, iend); moveDataToHostFromKokkosView(b, b_view, iend); moveDataToHostFromKokkosView(c, c_view, iend); moveDataToHostFromKokkosView(x1, x1_view, iend); moveDataToHostFromKokkosView(x2, x2_view, iend); - - } } // end namespace basic diff --git a/src/basic-kokkos/INIT3-Kokkos.cpp b/src/basic-kokkos/INIT3-Kokkos.cpp index 1ebaf83cc..78d338617 100644 --- a/src/basic-kokkos/INIT3-Kokkos.cpp +++ b/src/basic-kokkos/INIT3-Kokkos.cpp @@ -7,83 +7,64 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// #include "INIT3.hpp" - -#include "RAJA/RAJA.hpp" - +#include "common/KokkosViewUtils.hpp" #include -namespace rajaperf -{ -namespace basic -{ +namespace rajaperf { +namespace basic { - -void INIT3::runKokkosVariant(VariantID vid) -{ +void INIT3::runKokkosVariant(VariantID vid) { const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); - - INIT3_DATA_SETUP; // Instantiating Views using getViewFromPointer for the INIT3 definition // (i.e., INIT3.hpp) - + + INIT3_DATA_SETUP; + // The pointer is the first argument, and the last index, denoted by iend, is // your second argument // auto out1_view = getViewFromPointer(out1, iend); auto out2_view = getViewFromPointer(out2, iend); auto out3_view = getViewFromPointer(out3, iend); - auto in1_view = getViewFromPointer(in1, iend); - auto in2_view = getViewFromPointer(in2, iend); + auto in1_view = getViewFromPointer(in1, iend); + auto in2_view = getViewFromPointer(in2, iend); - // Next step, integrate the INIT3_BODY into the Kokkos parallel expression + switch (vid) { - auto init3_lam = [=](Index_type i) { - INIT3_BODY; - }; + case Kokkos_Lambda: { -#if defined(RUN_KOKKOS) + Kokkos::fence(); + startTimer(); - switch ( vid ) { + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { -// Nota bene -- Conversion of Raja code begins here - case Kokkos_Lambda : { - - Kokkos::fence(); - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - // Kokkos translation of INIT3_BODY - Kokkos::parallel_for("INIT3-Kokkos Kokkos_Lambda", - Kokkos::RangePolicy(ibegin, iend), - KOKKOS_LAMBDA(Index_type i) { - //INIT3_BODY definition: - // out1[i] = out2[i] = out3[i] = - in1[i] - in2[i] ; - out1_view[i] = out2_view[i] = out3_view[i] = - in1_view[i] - in2_view[i]; - }); - } - Kokkos::fence(); - stopTimer(); - - break; - } - - default : { - std::cout << "\n INIT3 : Unknown variant id = " << vid << std::endl; + Kokkos::parallel_for( + "INIT3-Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + out1_view[i] = out2_view[i] = out3_view[i] = + -in1_view[i] - in2_view[i]; + }); } + Kokkos::fence(); + stopTimer(); + break; } -#endif // RUN_KOKKOS - - moveDataToHostFromKokkosView(out1, out1_view, iend); - moveDataToHostFromKokkosView(out2, out2_view, iend); - moveDataToHostFromKokkosView(out3, out3_view, iend); - moveDataToHostFromKokkosView(in1, in1_view, iend); - moveDataToHostFromKokkosView(in2, in2_view, iend); + default: { + std::cout << "\n INIT3 : Unknown variant id = " << vid << std::endl; + } + } + moveDataToHostFromKokkosView(out1, out1_view, iend); + moveDataToHostFromKokkosView(out2, out2_view, iend); + moveDataToHostFromKokkosView(out3, out3_view, iend); + moveDataToHostFromKokkosView(in1, in1_view, iend); + moveDataToHostFromKokkosView(in2, in2_view, iend); } } // end namespace basic diff --git a/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp b/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp index 95702570e..59dc4d814 100644 --- a/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp +++ b/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp @@ -7,67 +7,49 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// #include "INIT_VIEW1D.hpp" - -#include "RAJA/RAJA.hpp" - +#include "common/KokkosViewUtils.hpp" #include -namespace rajaperf -{ -namespace basic -{ - +namespace rajaperf { +namespace basic { -void INIT_VIEW1D::runKokkosVariant(VariantID vid) -{ +void INIT_VIEW1D::runKokkosVariant(VariantID vid) { const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); INIT_VIEW1D_DATA_SETUP; - // Declare a Kokkos View that will be used to wrap a pointer auto a_view = getViewFromPointer(a, iend); -#if defined(RUN_KOKKOS) - - switch ( vid ) { - - case Kokkos_Lambda : { - - Kokkos::fence(); - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + switch (vid) { - Kokkos::parallel_for("INIT_VIEW1D_Kokkos Kokkos_Lambda", - Kokkos::RangePolicy(ibegin,iend), - KOKKOS_LAMBDA (Index_type i) { - //INIT_VIEW1D_BODY_RAJA - //Instead, use the INIT_VIEW1D_BODY definition - //with Kokkos View - //a[i] = (i+1) * v; - a_view[i] = (i + 1) * v; + case Kokkos_Lambda: { - }); + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - } - - Kokkos::fence(); - stopTimer(); - - break; + Kokkos::parallel_for( + "INIT_VIEW1D_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + a_view[i] = (i + 1) * v; + }); } - default : { - std::cout << "\n INIT_VIEW1D : Unknown variant id = " << vid << std::endl; - } + Kokkos::fence(); + stopTimer(); + break; } -#endif // RUN_KOKKOS + default: { + std::cout << "\n INIT_VIEW1D : Unknown variant id = " << vid << std::endl; + } + } moveDataToHostFromKokkosView(a, a_view, iend); - } } // end namespace basic diff --git a/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp b/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp index bc2d9d955..c8e0c13ae 100644 --- a/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp +++ b/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp @@ -7,67 +7,51 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// #include "INIT_VIEW1D_OFFSET.hpp" - -#include "RAJA/RAJA.hpp" +#include "common/KokkosViewUtils.hpp" #include -namespace rajaperf -{ -namespace basic -{ - +namespace rajaperf { +namespace basic { -void INIT_VIEW1D_OFFSET::runKokkosVariant(VariantID vid) -{ +void INIT_VIEW1D_OFFSET::runKokkosVariant(VariantID vid) { const Index_type run_reps = getRunReps(); const Index_type ibegin = 1; - const Index_type iend = getActualProblemSize()+1; + const Index_type iend = getActualProblemSize() + 1; INIT_VIEW1D_OFFSET_DATA_SETUP; auto a_view = getViewFromPointer(a, iend); + switch (vid) { -#if defined(RUN_KOKKOS) - - switch ( vid ) { - - case Kokkos_Lambda : { - - Kokkos::fence(); - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + case Kokkos_Lambda: { - Kokkos::parallel_for("INIT_VIEW1D_OFFSET_Kokkos Kokkos_Lambda", - Kokkos::RangePolicy(ibegin, iend), - KOKKOS_LAMBDA (Index_type i) { - //INIT_VIEW1D_OFFSET_BODY_RAJA - //Instead, use the INIT_VIEW1D_OFFSET_BODY - //definition: - //a[i-ibegin] = i * v; - a_view[i-ibegin] = i * v; - }); + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - } - Kokkos::fence(); - stopTimer(); - - break; + Kokkos::parallel_for( + "INIT_VIEW1D_OFFSET_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { a_view[i - ibegin] = i * v; }); } - default : { - std::cout << "\n INIT_VIEW1D_OFFSET : Unknown variant id = " << vid << std::endl; - } + Kokkos::fence(); + stopTimer(); + break; } -#endif // RUN_KOKKOS + default: { + std::cout << "\n INIT_VIEW1D_OFFSET : Unknown variant id = " << vid + << std::endl; + } + } // Move data from Kokkos View (on Device) back to Host moveDataToHostFromKokkosView(a, a_view, iend); - } } // end namespace basic diff --git a/src/basic-kokkos/MULADDSUB-Kokkos.cpp b/src/basic-kokkos/MULADDSUB-Kokkos.cpp index 0caad2748..2d5872fd0 100644 --- a/src/basic-kokkos/MULADDSUB-Kokkos.cpp +++ b/src/basic-kokkos/MULADDSUB-Kokkos.cpp @@ -7,83 +7,63 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// #include "MULADDSUB.hpp" - -#include "RAJA/RAJA.hpp" +#include "common/KokkosViewUtils.hpp" #include -namespace rajaperf -{ -namespace basic -{ - +namespace rajaperf { +namespace basic { -void MULADDSUB::runKokkosVariant(VariantID vid) -{ +void MULADDSUB::runKokkosVariant(VariantID vid) { const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); MULADDSUB_DATA_SETUP; - // Define Kokkos Views that will wrap pointers defined in MULADDSUB.hpp auto out1_view = getViewFromPointer(out1, iend); auto out2_view = getViewFromPointer(out2, iend); auto out3_view = getViewFromPointer(out3, iend); - auto in1_view = getViewFromPointer(in1, iend); - auto in2_view = getViewFromPointer(in2, iend); - - auto mas_lam = [=](Index_type i) { - MULADDSUB_BODY; - }; - - -#if defined(RUN_KOKKOS) - - switch ( vid ) { + auto in1_view = getViewFromPointer(in1, iend); + auto in2_view = getViewFromPointer(in2, iend); + switch (vid) { - case Kokkos_Lambda : { + case Kokkos_Lambda: { - Kokkos::fence(); - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + Kokkos::fence(); + startTimer(); - // If SIMD really matters , consider using Kokkos SIMD - Kokkos::parallel_for("MULTISUB-KokkosSeq Kokkos_Lambda", - Kokkos::RangePolicy(ibegin, iend), - KOKKOS_LAMBDA(Index_type i) { - //MULADDSUB_BODY definition: - //out1[i] = in1[i] * in2[i] ; - //out2[i] = in1[i] + in2[i] ; - //out3[i] = in1[i] - in2[i] ; - // WITH KOKKOS VIEWS - out1_view[i] = in1_view[i] * in2_view[i] ; - out2_view[i] = in1_view[i] + in2_view[i] ; - out3_view[i] = in1_view[i] - in2_view[i] ; - }); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - } - Kokkos::fence(); - stopTimer(); - - break; + // If SIMD really matters , consider using Kokkos SIMD + Kokkos::parallel_for( + "MULTISUB-KokkosSeq Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + out1_view[i] = in1_view[i] * in2_view[i]; + out2_view[i] = in1_view[i] + in2_view[i]; + out3_view[i] = in1_view[i] - in2_view[i]; + }); } - default : { - std::cout << "\n MULADDSUB : Unknown variant id = " << vid << std::endl; - } + Kokkos::fence(); + stopTimer(); + + break; + } + default: { + std::cout << "\n MULADDSUB : Unknown variant id = " << vid << std::endl; + } } -#endif // RUN_KOKKOS moveDataToHostFromKokkosView(out1, out1_view, iend); moveDataToHostFromKokkosView(out2, out2_view, iend); moveDataToHostFromKokkosView(out3, out3_view, iend); moveDataToHostFromKokkosView(out3, out3_view, iend); moveDataToHostFromKokkosView(in1, in1_view, iend); moveDataToHostFromKokkosView(in2, in2_view, iend); - } } // end namespace basic diff --git a/src/basic-kokkos/NESTED_INIT-Kokkos.cpp b/src/basic-kokkos/NESTED_INIT-Kokkos.cpp index 6f58e34c4..133962990 100644 --- a/src/basic-kokkos/NESTED_INIT-Kokkos.cpp +++ b/src/basic-kokkos/NESTED_INIT-Kokkos.cpp @@ -7,33 +7,31 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// #include "NESTED_INIT.hpp" - -#include "RAJA/RAJA.hpp" +#include "common/KokkosViewUtils.hpp" #include namespace rajaperf { namespace basic { - void NESTED_INIT::runKokkosVariant(VariantID vid) { const Index_type run_reps = getRunReps(); NESTED_INIT_DATA_SETUP; - // Wrap the nested init array pointer in a Kokkos View - // In a Kokkos View, array arguments for array boundaries go from outmost - // to innermost dimension sizes - // See the basic NESTED_INIT.hpp file for defnition of NESTED_INIT - - auto array_kokkos_view = getViewFromPointer(array, nk, nj, ni); + // Wrap the nested init array pointer in a Kokkos View + // In a Kokkos View, array arguments for array boundaries go from outmost + // to innermost dimension sizes + // See the basic NESTED_INIT.hpp file for defnition of NESTED_INIT + auto array_kokkos_view = getViewFromPointer(array, nk, nj, ni); + // + // Used in Kokkos variant (below). Do not remove. + // auto nestedinit_lam = [=](Index_type i, Index_type j, Index_type k) { NESTED_INIT_BODY; }; -#if defined RUN_KOKKOS - switch (vid) { case Kokkos_Lambda: { @@ -44,17 +42,17 @@ void NESTED_INIT::runKokkosVariant(VariantID vid) { for (RepIndex_type irep = 0; irep < run_reps; ++irep) { // MDRange can be optimized - Kokkos::parallel_for("NESTED_INIT KokkosSeq", - // Range policy to define amount of work to be done - Kokkos::MDRangePolicy, - // Execution space - Kokkos::DefaultExecutionSpace>({0, 0, 0}, {nk, nj, ni}), - // Loop body - KOKKOS_LAMBDA(Index_type k, Index_type j, Index_type i) { - // #define NESTED_INIT_BODY - // array[i+ni*(j+nj*k)] = 0.00000001 * i * j * k ; - array_kokkos_view(k, j, i) = 0.00000001 * i * j * k; - }); + Kokkos::parallel_for( + "NESTED_INIT KokkosSeq", + // Range policy to define amount of work to be done + Kokkos::MDRangePolicy, + // Execution space + Kokkos::DefaultExecutionSpace>({0, 0, 0}, + {nk, nj, ni}), + // Loop body + KOKKOS_LAMBDA(Index_type k, Index_type j, Index_type i) { + array_kokkos_view(k, j, i) = 0.00000001 * i * j * k; + }); } Kokkos::fence(); @@ -72,7 +70,6 @@ void NESTED_INIT::runKokkosVariant(VariantID vid) { std::cout << "\n NESTED_INIT : Unknown variant id = " << vid << std::endl; } } -#endif // RUN_KOKKOS } } // end namespace basic diff --git a/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp b/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp index 51a819951..982c01dad 100644 --- a/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp +++ b/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp @@ -7,9 +7,7 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// #include "PI_ATOMIC.hpp" - -#include "RAJA/RAJA.hpp" - +#include "common/KokkosViewUtils.hpp" #include namespace rajaperf { @@ -25,8 +23,6 @@ void PI_ATOMIC::runKokkosVariant(VariantID vid) { // Declare Kokkos View that will wrap the pointer defined in PI_ATOMIC.hpp auto pi_view = getViewFromPointer(pi, 1); -#if defined(RUN_KOKKOS) - switch (vid) { case Kokkos_Lambda: { @@ -47,10 +43,6 @@ void PI_ATOMIC::runKokkosVariant(VariantID vid) { double x = (double(i) + 0.5) * dx; // Make a reference to the 0th element of a 1D view with one // element - // Atomic operation is an uninterruptable, single operation; e.g., - // addition, multiplication, division, etc. All of these atomic - // operations are architecture dependent. Atomics are advantageous - // from a correctness point of view Kokkos::atomic_add(&pi_view(0), dx / (1.0 + x * x)); }); // Moving the data on the device (held in the KokkosView) BACK to the @@ -69,8 +61,6 @@ void PI_ATOMIC::runKokkosVariant(VariantID vid) { std::cout << "\n PI_ATOMIC : Unknown variant id = " << vid << std::endl; } } -#endif // RUN_KOKKOS - } } // end namespace basic diff --git a/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp b/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp index 4f340a919..3000b3c12 100644 --- a/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp +++ b/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp @@ -7,142 +7,72 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// #include "REDUCE3_INT.hpp" +#include "common/KokkosViewUtils.hpp" -#include "RAJA/RAJA.hpp" - -#include #include +#include -namespace rajaperf -{ -namespace basic -{ - +namespace rajaperf { +namespace basic { -void REDUCE3_INT::runKokkosVariant(VariantID vid) -{ +void REDUCE3_INT::runKokkosVariant(VariantID vid) { const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); REDUCE3_INT_DATA_SETUP; - //Declare KokkosView that will wrap the pointer to a vector - - auto vec_view = getViewFromPointer(vec, iend); - -#if defined(RUN_KOKKOS) - - switch ( vid ) { - - case Base_Seq : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - Int_type vsum = m_vsum_init; - Int_type vmin = m_vmin_init; - Int_type vmax = m_vmax_init; - - for (Index_type i = ibegin; i < iend; ++i ) { - REDUCE3_INT_BODY; - } - - m_vsum += vsum; - m_vmin = RAJA_MIN(m_vmin, vmin); - m_vmax = RAJA_MAX(m_vmax, vmax); - - } - stopTimer(); - - break; + // Declare KokkosView that will wrap the pointer to a vector + + auto vec_view = getViewFromPointer(vec, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + // The values below are initilized elsewhere by RPS + // These variables were declared to Kokkos-ify the parallel_reduce + // construct: + +// If the RAJA OPENMP TARGET OPTION IS NOT DEFINED +//#ifndef RAJA_ENABLE_TARGET_OPENMP + Int_type max_value = m_vmax_init; + Int_type min_value = m_vmin_init; + Int_type sum = m_vsum_init; + + parallel_reduce( + "REDUCE3-Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(const int64_t i, Int_type &tl_max, Int_type &tl_min, + Int_type &tl_sum) { + Int_type vec_i = vec_view[i]; + if (vec_i > tl_max) + tl_max = vec_i; + if (vec_i < tl_min) + tl_min = vec_i; + tl_sum += vec_i; + }, + Kokkos::Max(max_value), Kokkos::Min(min_value), + sum); + m_vsum += static_cast(sum); + m_vmin = Kokkos::Experimental::min(m_vmin, static_cast(min_value)); + m_vmax = Kokkos::Experimental::max(m_vmax, static_cast(max_value)); + +//#endif } + Kokkos::fence(); + stopTimer(); - case Lambda_Seq : { - - auto init3_base_lam = [=](Index_type i) -> Int_type { - return vec[i]; - }; - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - Int_type vsum = m_vsum_init; - Int_type vmin = m_vmin_init; - Int_type vmax = m_vmax_init; - - for (Index_type i = ibegin; i < iend; ++i ) { - vsum += init3_base_lam(i); - vmin = RAJA_MIN(vmin, init3_base_lam(i)); - vmax = RAJA_MAX(vmax, init3_base_lam(i)); - } - - m_vsum += vsum; - m_vmin = RAJA_MIN(m_vmin, vmin); - m_vmax = RAJA_MAX(m_vmax, vmax); - - } - stopTimer(); - - break; - } - - case Kokkos_Lambda : { - - Kokkos::fence(); - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { -/* - RAJA::ReduceSum vsum(m_vsum_init); - RAJA::ReduceMin vmin(m_vmin_init); - RAJA::ReduceMax vmax(m_vmax_init); - - RAJA::forall( - RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { - REDUCE3_INT_BODY_RAJA; - }); - - m_vsum += static_cast(vsum.get()); - m_vmin = RAJA_MIN(m_vmin, static_cast(vmin.get())); - m_vmax = RAJA_MAX(m_vmax, static_cast(vmax.get())); -*/ - // These values are initilized elsewhere by RPS - // These variables were declared to Kokkos-ify the parallel_reduce - // construct: -#ifndef RAJA_ENABLE_TARGET_OPENMP - Int_type max_value = m_vmax_init; - Int_type min_value = m_vmin_init; - Int_type sum = m_vsum_init; - - - parallel_reduce("REDUCE3-Kokkos Kokkos_Lambda", - Kokkos::RangePolicy(ibegin, iend), - KOKKOS_LAMBDA(const int64_t i, Int_type& tl_max, Int_type& tl_min, Int_type& tl_sum){ - Int_type vec_i = vec_view[i]; - if (vec_i > tl_max) tl_max = vec_i; - if (vec_i < tl_min) tl_min = vec_i; - tl_sum += vec_i; - }, - Kokkos::Max(max_value), - Kokkos::Min(min_value), - sum); - m_vsum += static_cast(sum); - m_vmin = RAJA_MIN(m_vmin, static_cast(min_value)); - m_vmax = RAJA_MAX(m_vmax, static_cast(max_value)); -#endif - } - Kokkos::fence(); - stopTimer(); - - break; - } - - default : { - std::cout << "\n REDUCE3_INT : Unknown variant id = " << vid << std::endl; - } + break; + } + default: { + std::cout << "\n REDUCE3_INT : Unknown variant id = " << vid << std::endl; + } } -#endif // RUN_KOKKOS moveDataToHostFromKokkosView(vec, vec_view, iend); } diff --git a/src/basic-kokkos/TRAP_INT-Kokkos.cpp b/src/basic-kokkos/TRAP_INT-Kokkos.cpp index 45e822015..12d24021c 100644 --- a/src/basic-kokkos/TRAP_INT-Kokkos.cpp +++ b/src/basic-kokkos/TRAP_INT-Kokkos.cpp @@ -7,136 +7,60 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// #include "TRAP_INT.hpp" - -#include "RAJA/RAJA.hpp" +#include "common/KokkosViewUtils.hpp" #include -namespace rajaperf -{ -namespace basic -{ +namespace rajaperf { +namespace basic { // // Function used in TRAP_INT loop. // RAJA_INLINE -// +// KOKKOS_FUNCTION -Real_type trap_int_func(Real_type x, - Real_type y, - Real_type xp, - Real_type yp) -{ - Real_type denom = (x - xp)*(x - xp) + (y - yp)*(y - yp); - denom = 1.0/sqrt(denom); - return denom; +Real_type trap_int_func(Real_type x, Real_type y, Real_type xp, Real_type yp) { + Real_type denom = (x - xp) * (x - xp) + (y - yp) * (y - yp); + denom = 1.0 / sqrt(denom); + return denom; } - -void TRAP_INT::runKokkosVariant(VariantID vid) -{ +void TRAP_INT::runKokkosVariant(VariantID vid) { const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); TRAP_INT_DATA_SETUP; -// Declare KokkosViews that will wrap a pointer - not relevant in this case -// ...? - - - -#if defined(RUN_KOKKOS) - - switch ( vid ) { - - case Base_Seq : { - - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - - Real_type sumx = m_sumx_init; - - for (Index_type i = ibegin; i < iend; ++i ) { - TRAP_INT_BODY; - } - - m_sumx += sumx * h; - - } - stopTimer(); - - break; - } - - case Lambda_Seq : { + switch (vid) { - auto trapint_base_lam = [=](Index_type i) -> Real_type { - Real_type x = x0 + i*h; - return trap_int_func(x, y, xp, yp); - }; + case Kokkos_Lambda: { - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + Kokkos::fence(); + startTimer(); + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - Real_type sumx = m_sumx_init; + Real_type trap_integral_val = m_sumx_init; - for (Index_type i = ibegin; i < iend; ++i ) { - sumx += trapint_base_lam(i); - } + Kokkos::parallel_reduce( + "TRAP_INT_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(const int64_t i, Real_type &sumx){TRAP_INT_BODY}, + trap_integral_val); - m_sumx += sumx * h; - - } - stopTimer(); - - break; - } - - case Kokkos_Lambda : { - - Kokkos::fence(); - startTimer(); - for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - -// RAJA::ReduceSum sumx(m_sumx_init); - -// RAJA::forall( -// RAJA::RangeSegment(ibegin, iend), [=](Index_type i) { -// TRAP_INT_BODY; -// -// Begin Kokkos translation -// A RAJA reduce translates into a Kokkoss::parallel_reduce -// To perform the translation: - // Declare and initialize variables - // To perform a reduction, you need: 1) an initial value; 2) iterate - // over an iterable; 3) to be able to extract the result at the end of - // the reduction (in this case, trap_integral_val) - - Real_type trap_integral_val = m_sumx_init; - - Kokkos::parallel_reduce("TRAP_INT_Kokkos Kokkos_Lambda", - Kokkos::RangePolicy(ibegin, iend), - KOKKOS_LAMBDA(const int64_t i, Real_type& sumx) {TRAP_INT_BODY}, - trap_integral_val - ); - - m_sumx += static_cast(trap_integral_val) * h; - - } - Kokkos::fence(); - stopTimer(); - - break; + m_sumx += static_cast(trap_integral_val) * h; } + Kokkos::fence(); + stopTimer(); - default : { - std::cout << "\n TRAP_INT : Unknown variant id = " << vid << std::endl; - } + break; + } + default: { + std::cout << "\n TRAP_INT : Unknown variant id = " << vid << std::endl; + } } -#endif //RUN_KOKKOS } } // end namespace basic diff --git a/src/common/KokkosViewUtils.hpp b/src/common/KokkosViewUtils.hpp new file mode 100644 index 000000000..fc9210f36 --- /dev/null +++ b/src/common/KokkosViewUtils.hpp @@ -0,0 +1,103 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +/// +/// Types and methods for managing Suite kernels, variants, features, etc.. +/// + +#ifndef KokkosViewUtils_HPP +#define KokkosViewUtils_HPP + +#include "Kokkos_Core.hpp" + +#include +#include + +namespace rajaperf { +template struct PointerOfNdimensions; + +template struct PointerOfNdimensions { + using type = PointedAt; +}; + +template struct PointerOfNdimensions { + using type = + typename PointerOfNdimensions::type *; +}; + +// This templated function is used to wrap pointers +// (declared and defined in RAJAPerf Suite kernels) in Kokkos Views +// +template +auto getViewFromPointer(PointedAt *kokkos_ptr, Boundaries... boundaries) -> + Kokkos::View< + typename PointerOfNdimensions::type, + typename Kokkos::DefaultExecutionSpace::memory_space> + +{ + + using host_view_type = typename Kokkos::View< + typename PointerOfNdimensions::type, + typename Kokkos::DefaultHostExecutionSpace::memory_space>; + + using device_view_type = typename Kokkos::View< + typename PointerOfNdimensions::type, + typename Kokkos::DefaultExecutionSpace::memory_space>; + + using mirror_view_type = typename device_view_type::HostMirror; + + host_view_type pointer_holder(kokkos_ptr, boundaries...); + + // The boundaries parameter pack contains the array dimenions; + // An allocation is implicitly made here + device_view_type device_data_copy("StringName", boundaries...); + + mirror_view_type cpu_to_gpu_mirror = + Kokkos::create_mirror_view(device_data_copy); + + Kokkos::deep_copy(cpu_to_gpu_mirror, pointer_holder); + + Kokkos::deep_copy(device_data_copy, cpu_to_gpu_mirror); + + // Kokkos::View return type + + return device_data_copy; +} + +// This function will move data in a Kokkos::View back to host from device, +// and will be stored in the existing pointer(s) +template +void moveDataToHostFromKokkosView(PointedAt *kokkos_ptr, ExistingView my_view, + Boundaries... boundaries) { + + using host_view_type = typename Kokkos::View< + typename PointerOfNdimensions::type, + typename Kokkos::DefaultHostExecutionSpace::memory_space>; + + using device_view_type = typename Kokkos::View< + typename PointerOfNdimensions::type, + typename Kokkos::DefaultExecutionSpace::memory_space>; + + using mirror_view_type = typename device_view_type::HostMirror; + + host_view_type pointer_holder(kokkos_ptr, boundaries...); + + // Layout is optimal for gpu, but data are actually located on CPU + mirror_view_type cpu_to_gpu_mirror = Kokkos::create_mirror_view(my_view); + + // Actual copying of the data from the gpu (my_view) back to the cpu + Kokkos::deep_copy(cpu_to_gpu_mirror, my_view); + + // This copies from the mirror on the host cpu back to the existing + // pointer(s) + Kokkos::deep_copy(pointer_holder, cpu_to_gpu_mirror); +} + +} // namespace rajaperf + +#endif // closing endif for header file include guard diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index d0466a559..4b722d703 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -7,7 +7,7 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// /// -/// Tyoes and methods for managing Suite kernels, variants, features, etc.. +/// Types and methods for managing Suite kernels, variants, features, etc.. /// #ifndef RAJAPerfSuite_HPP @@ -16,20 +16,19 @@ #include "RAJA/config.hpp" #include "rajaperf_config.hpp" + #if defined(RUN_KOKKOS) #include "Kokkos_Core.hpp" #endif // RUN_KOKKOS -#include #include +#include -namespace rajaperf -{ +namespace rajaperf { class KernelBase; class RunParams; - /*! ******************************************************************************* * @@ -55,7 +54,6 @@ enum GroupID { }; - // /*! ******************************************************************************* @@ -71,9 +69,9 @@ enum GroupID { */ enum KernelID { -// -// Basic kernels... -// + // + // Basic kernels... + // Basic_DAXPY = 0, Basic_DAXPY_ATOMIC, Basic_IF_QUAD, @@ -93,9 +91,9 @@ enum KernelID { #ifdef RUN_KOKKOS // move this up to the point implemented with Kokkos NumKernels, #endif -// -// Lcals kernels... -// + // + // Lcals kernels... + // Lcals_DIFF_PREDICT, Lcals_EOS, Lcals_FIRST_DIFF, @@ -108,9 +106,9 @@ enum KernelID { Lcals_PLANCKIAN, Lcals_TRIDIAG_ELIM, -// -// Polybench kernels... -// + // + // Polybench kernels... + // Polybench_2MM, Polybench_3MM, Polybench_ADI, @@ -125,9 +123,9 @@ enum KernelID { Polybench_JACOBI_2D, Polybench_MVT, -// -// Stream kernels... -// + // + // Stream kernels... + // Stream_ADD, Stream_COPY, Stream_DOT, @@ -168,7 +166,6 @@ enum KernelID { }; - /*! ******************************************************************************* * @@ -208,7 +205,6 @@ enum VariantID { }; - /*! ******************************************************************************* * @@ -240,7 +236,6 @@ enum FeatureID { }; - /*! ******************************************************************************* * @@ -248,7 +243,7 @@ enum FeatureID { * ******************************************************************************* */ -const std::string& getGroupName(GroupID gid); +const std::string &getGroupName(GroupID gid); /*! ******************************************************************************* @@ -270,7 +265,7 @@ std::string getKernelName(KernelID kid); * ******************************************************************************* */ -const std::string& getFullKernelName(KernelID kid); +const std::string &getFullKernelName(KernelID kid); /*! ******************************************************************************* @@ -279,7 +274,7 @@ const std::string& getFullKernelName(KernelID kid); * ******************************************************************************* */ -const std::string& getVariantName(VariantID vid); +const std::string &getVariantName(VariantID vid); /*! ******************************************************************************* @@ -308,7 +303,7 @@ bool isVariantGPU(VariantID vid); * ******************************************************************************* */ -const std::string& getFeatureName(FeatureID vid); +const std::string &getFeatureName(FeatureID vid); /*! ******************************************************************************* @@ -319,7 +314,7 @@ const std::string& getFeatureName(FeatureID vid); * ******************************************************************************* */ -KernelBase* getKernelObject(KernelID kid, const RunParams& run_params); +KernelBase *getKernelObject(KernelID kid, const RunParams &run_params); /*! ******************************************************************************* @@ -330,7 +325,7 @@ KernelBase* getKernelObject(KernelID kid, const RunParams& run_params); * ******************************************************************************* */ -std::ostream& getCout(); +std::ostream &getCout(); /*! ******************************************************************************* @@ -339,7 +334,7 @@ std::ostream& getCout(); * ******************************************************************************* */ -std::ostream* makeNullStream(); +std::ostream *makeNullStream(); /*! ******************************************************************************* @@ -449,6 +444,7 @@ void moveDataToHostFromKokkosView(PointedAt *kokkos_ptr, ExistingView my_view, } #endif // RUN_KOKKOS -} // closing brace for rajaperf namespace +*/ +} // namespace rajaperf -#endif // closing endif for header file include guard +#endif // closing endif for header file include guard From a9ed2f716876683f4f6b6f85555f4762f6a22e13 Mon Sep 17 00:00:00 2001 From: Amy Powell Date: Mon, 7 Feb 2022 16:54:13 -0700 Subject: [PATCH 06/28] Add DAXPY_ATOMIC, formatting, in REDUCE3, Kokkos::min, Kokkos::max --- CMakeLists.txt | 1 - src/basic-kokkos/CMakeLists.txt | 1 + src/basic-kokkos/DAXPY_ATOMIC-Kokkos.cpp | 67 ++++++++++++++++++++++++ src/basic-kokkos/REDUCE3_INT-Kokkos.cpp | 15 ++---- src/basic/DAXPY_ATOMIC.cpp | 51 ++++++++---------- src/basic/DAXPY_ATOMIC.hpp | 25 ++++----- 6 files changed, 105 insertions(+), 55 deletions(-) create mode 100644 src/basic-kokkos/DAXPY_ATOMIC-Kokkos.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index c64fee113..83a574af2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -168,7 +168,6 @@ Kokkos_ENABLE_HIP variable to ON") if(ENABLE_TARGET_OPENMP) set(Kokkos_ENABLE_OPENMPTARGET ON CACHE BOOL "Docstring") - set(RAJA_ENABLE_TARGET_OPENMP ON CACHE BOOL "Docstring") if(NOT CMAKE_BUILD_TYPE MATCHES Debug) if(NOT EXPERIMENTAL_BUILD) message(FATAL_ERROR "Kokkos builds with OpenMPTarget require a Debug build to succeed at the moment. Rebuild with CMAKE_BUILD_TYPE=Debug. If you're a compiler developer, rebuild with -DEXPERIMENTAL_BUILD=ON") diff --git a/src/basic-kokkos/CMakeLists.txt b/src/basic-kokkos/CMakeLists.txt index 02e14cfc4..a85b13df6 100644 --- a/src/basic-kokkos/CMakeLists.txt +++ b/src/basic-kokkos/CMakeLists.txt @@ -21,5 +21,6 @@ blt_add_library( NESTED_INIT-Kokkos.cpp REDUCE3_INT-Kokkos.cpp TRAP_INT-Kokkos.cpp + DAXPY_ATOMIC-Kokkos.cpp DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} ) diff --git a/src/basic-kokkos/DAXPY_ATOMIC-Kokkos.cpp b/src/basic-kokkos/DAXPY_ATOMIC-Kokkos.cpp new file mode 100644 index 000000000..07607ca08 --- /dev/null +++ b/src/basic-kokkos/DAXPY_ATOMIC-Kokkos.cpp @@ -0,0 +1,67 @@ +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// +// Copyright (c) 2017-21, Lawrence Livermore National Security, LLC +// and RAJA Performance Suite project contributors. +// See the RAJAPerf/LICENSE file for details. +// +// SPDX-License-Identifier: (BSD-3-Clause) +//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + +#include "DAXPY_ATOMIC.hpp" +#include "common/KokkosViewUtils.hpp" +#include + +// Delete me +// For de-bugging: +#include "RAJA/RAJA.hpp" + +namespace rajaperf { +namespace basic { + +void DAXPY_ATOMIC::runKokkosVariant(VariantID vid) { + + const Index_type run_reps = getRunReps(); + const Index_type ibegin = 0; + const Index_type iend = getActualProblemSize(); + + DAXPY_ATOMIC_DATA_SETUP; + // + // Kokkos Views to wrap pointers declared in DAXPY_ATOMIC.hpp + // + + auto x_view = getViewFromPointer(x, iend); + auto y_view = getViewFromPointer(y, iend); + + switch (vid) { + + case Kokkos_Lambda: { + + Kokkos::fence(); + startTimer(); + + for (RepIndex_type irep = 0; irep < run_reps; ++irep) { + + Kokkos::parallel_for( + "DAXPY_ATOMIC_Kokkos Kokkos_Lambda", + Kokkos::RangePolicy(ibegin, iend), + KOKKOS_LAMBDA(Index_type i) { + Kokkos::atomic_add(&y_view[i], a * x_view[i]); + }); + } + + Kokkos::fence(); + stopTimer(); + + break; + } + + default: { + getCout() << "\n DAXPY_ATOMIC : Unknown variant id = " << vid << std::endl; + } + } + + moveDataToHostFromKokkosView(x, x_view, iend); + moveDataToHostFromKokkosView(y, y_view, iend); +} + +} // end namespace basic +} // end namespace rajaperf diff --git a/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp b/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp index 3000b3c12..9fd287ef3 100644 --- a/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp +++ b/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp @@ -33,16 +33,11 @@ void REDUCE3_INT::runKokkosVariant(VariantID vid) { Kokkos::fence(); startTimer(); for (RepIndex_type irep = 0; irep < run_reps; ++irep) { - // The values below are initilized elsewhere by RPS - // These variables were declared to Kokkos-ify the parallel_reduce - // construct: -// If the RAJA OPENMP TARGET OPTION IS NOT DEFINED -//#ifndef RAJA_ENABLE_TARGET_OPENMP Int_type max_value = m_vmax_init; Int_type min_value = m_vmin_init; Int_type sum = m_vsum_init; - + // ADL: argument-dependent look up here parallel_reduce( "REDUCE3-Kokkos Kokkos_Lambda", Kokkos::RangePolicy(ibegin, iend), @@ -58,10 +53,10 @@ void REDUCE3_INT::runKokkosVariant(VariantID vid) { Kokkos::Max(max_value), Kokkos::Min(min_value), sum); m_vsum += static_cast(sum); - m_vmin = Kokkos::Experimental::min(m_vmin, static_cast(min_value)); - m_vmax = Kokkos::Experimental::max(m_vmax, static_cast(max_value)); - -//#endif + m_vmin = + Kokkos::Experimental::min(m_vmin, static_cast(min_value)); + m_vmax = + Kokkos::Experimental::max(m_vmax, static_cast(max_value)); } Kokkos::fence(); stopTimer(); diff --git a/src/basic/DAXPY_ATOMIC.cpp b/src/basic/DAXPY_ATOMIC.cpp index 1e5d4e00e..ebbe08678 100644 --- a/src/basic/DAXPY_ATOMIC.cpp +++ b/src/basic/DAXPY_ATOMIC.cpp @@ -12,49 +12,44 @@ #include "common/DataUtils.hpp" -namespace rajaperf -{ -namespace basic -{ - +namespace rajaperf { +namespace basic { -DAXPY_ATOMIC::DAXPY_ATOMIC(const RunParams& params) - : KernelBase(rajaperf::Basic_DAXPY_ATOMIC, params) -{ +DAXPY_ATOMIC::DAXPY_ATOMIC(const RunParams ¶ms) + : KernelBase(rajaperf::Basic_DAXPY_ATOMIC, params) { setDefaultProblemSize(1000000); setDefaultReps(500); - setActualProblemSize( getTargetProblemSize() ); + setActualProblemSize(getTargetProblemSize()); - setItsPerRep( getActualProblemSize() ); + setItsPerRep(getActualProblemSize()); setKernelsPerRep(1); - setBytesPerRep( (1*sizeof(Real_type) + 2*sizeof(Real_type)) * getActualProblemSize() ); + setBytesPerRep((1 * sizeof(Real_type) + 2 * sizeof(Real_type)) * + getActualProblemSize()); setFLOPsPerRep(2 * getActualProblemSize()); setUsesFeature(Forall); - setVariantDefined( Base_Seq ); - setVariantDefined( Lambda_Seq ); - setVariantDefined( RAJA_Seq ); + setVariantDefined(Base_Seq); + setVariantDefined(Lambda_Seq); + setVariantDefined(RAJA_Seq); - setVariantDefined( Base_OpenMP ); - setVariantDefined( Lambda_OpenMP ); - setVariantDefined( RAJA_OpenMP ); + setVariantDefined(Base_OpenMP); + setVariantDefined(Lambda_OpenMP); + setVariantDefined(RAJA_OpenMP); - setVariantDefined( Base_OpenMPTarget ); - setVariantDefined( RAJA_OpenMPTarget ); + setVariantDefined(Base_OpenMPTarget); + setVariantDefined(RAJA_OpenMPTarget); - setVariantDefined( Base_CUDA ); - setVariantDefined( Lambda_CUDA ); - setVariantDefined( RAJA_CUDA ); + setVariantDefined(Base_CUDA); + setVariantDefined(Lambda_CUDA); + setVariantDefined(RAJA_CUDA); - setVariantDefined( Base_HIP ); - setVariantDefined( Lambda_HIP ); - setVariantDefined( RAJA_HIP ); -} + setVariantDefined(Base_HIP); + setVariantDefined(Lambda_HIP); + setVariantDefined(RAJA_HIP); -DAXPY_ATOMIC::~DAXPY_ATOMIC() -{ + setVariantDefined(Kokkos_Lambda); } void DAXPY_ATOMIC::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) diff --git a/src/basic/DAXPY_ATOMIC.hpp b/src/basic/DAXPY_ATOMIC.hpp index dd52d777c..26ebde45f 100644 --- a/src/basic/DAXPY_ATOMIC.hpp +++ b/src/basic/DAXPY_ATOMIC.hpp @@ -17,32 +17,25 @@ #ifndef RAJAPerf_Basic_DAXPY_ATOMIC_HPP #define RAJAPerf_Basic_DAXPY_ATOMIC_HPP -#define DAXPY_ATOMIC_DATA_SETUP \ - Real_ptr x = m_x; \ - Real_ptr y = m_y; \ +#define DAXPY_ATOMIC_DATA_SETUP \ + Real_ptr x = m_x; \ + Real_ptr y = m_y; \ Real_type a = m_a; -#define DAXPY_ATOMIC_BODY \ - y[i] += a * x[i] ; - -#define DAXPY_ATOMIC_RAJA_BODY(policy) \ - RAJA::atomicAdd(&y[i], a * x[i]); +#define DAXPY_ATOMIC_BODY y[i] += a * x[i]; +#define DAXPY_ATOMIC_RAJA_BODY(policy) RAJA::atomicAdd(&y[i], a * x[i]); #include "common/KernelBase.hpp" -namespace rajaperf -{ +namespace rajaperf { class RunParams; -namespace basic -{ +namespace basic { -class DAXPY_ATOMIC : public KernelBase -{ +class DAXPY_ATOMIC : public KernelBase { public: - - DAXPY_ATOMIC(const RunParams& params); + DAXPY_ATOMIC(const RunParams ¶ms); ~DAXPY_ATOMIC(); From 2ba3c9df386b4dd6c0ccb2c6278ad5c640462565 Mon Sep 17 00:00:00 2001 From: Amy Powell Date: Wed, 9 Feb 2022 08:33:10 -0700 Subject: [PATCH 07/28] DAXPY_ATOMIC: copying files from llnl/develop to restore original file format This commit effectively removes clang formatting inappropriately applied to these files. --- src/basic/DAXPY_ATOMIC.cpp | 49 ++++++++++++++++++++------------------ src/basic/DAXPY_ATOMIC.hpp | 25 ++++++++++++------- 2 files changed, 42 insertions(+), 32 deletions(-) diff --git a/src/basic/DAXPY_ATOMIC.cpp b/src/basic/DAXPY_ATOMIC.cpp index ebbe08678..125cb9640 100644 --- a/src/basic/DAXPY_ATOMIC.cpp +++ b/src/basic/DAXPY_ATOMIC.cpp @@ -12,44 +12,47 @@ #include "common/DataUtils.hpp" -namespace rajaperf { -namespace basic { +namespace rajaperf +{ +namespace basic +{ -DAXPY_ATOMIC::DAXPY_ATOMIC(const RunParams ¶ms) - : KernelBase(rajaperf::Basic_DAXPY_ATOMIC, params) { + +DAXPY_ATOMIC::DAXPY_ATOMIC(const RunParams& params) + : KernelBase(rajaperf::Basic_DAXPY_ATOMIC, params) +{ setDefaultProblemSize(1000000); setDefaultReps(500); - setActualProblemSize(getTargetProblemSize()); + setActualProblemSize( getTargetProblemSize() ); - setItsPerRep(getActualProblemSize()); + setItsPerRep( getActualProblemSize() ); setKernelsPerRep(1); - setBytesPerRep((1 * sizeof(Real_type) + 2 * sizeof(Real_type)) * - getActualProblemSize()); + setBytesPerRep( (1*sizeof(Real_type) + 2*sizeof(Real_type)) * getActualProblemSize() ); setFLOPsPerRep(2 * getActualProblemSize()); setUsesFeature(Forall); - setVariantDefined(Base_Seq); - setVariantDefined(Lambda_Seq); - setVariantDefined(RAJA_Seq); + setVariantDefined( Base_Seq ); + setVariantDefined( Lambda_Seq ); + setVariantDefined( RAJA_Seq ); - setVariantDefined(Base_OpenMP); - setVariantDefined(Lambda_OpenMP); - setVariantDefined(RAJA_OpenMP); + setVariantDefined( Base_OpenMP ); + setVariantDefined( Lambda_OpenMP ); + setVariantDefined( RAJA_OpenMP ); - setVariantDefined(Base_OpenMPTarget); - setVariantDefined(RAJA_OpenMPTarget); + setVariantDefined( Base_OpenMPTarget ); + setVariantDefined( RAJA_OpenMPTarget ); - setVariantDefined(Base_CUDA); - setVariantDefined(Lambda_CUDA); - setVariantDefined(RAJA_CUDA); + setVariantDefined( Base_CUDA ); + setVariantDefined( Lambda_CUDA ); + setVariantDefined( RAJA_CUDA ); - setVariantDefined(Base_HIP); - setVariantDefined(Lambda_HIP); - setVariantDefined(RAJA_HIP); + setVariantDefined( Base_HIP ); + setVariantDefined( Lambda_HIP ); + setVariantDefined( RAJA_HIP ); - setVariantDefined(Kokkos_Lambda); + setVariantDefined( Kokkos_Lambda ); } void DAXPY_ATOMIC::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) diff --git a/src/basic/DAXPY_ATOMIC.hpp b/src/basic/DAXPY_ATOMIC.hpp index 26ebde45f..dd52d777c 100644 --- a/src/basic/DAXPY_ATOMIC.hpp +++ b/src/basic/DAXPY_ATOMIC.hpp @@ -17,25 +17,32 @@ #ifndef RAJAPerf_Basic_DAXPY_ATOMIC_HPP #define RAJAPerf_Basic_DAXPY_ATOMIC_HPP -#define DAXPY_ATOMIC_DATA_SETUP \ - Real_ptr x = m_x; \ - Real_ptr y = m_y; \ +#define DAXPY_ATOMIC_DATA_SETUP \ + Real_ptr x = m_x; \ + Real_ptr y = m_y; \ Real_type a = m_a; -#define DAXPY_ATOMIC_BODY y[i] += a * x[i]; +#define DAXPY_ATOMIC_BODY \ + y[i] += a * x[i] ; + +#define DAXPY_ATOMIC_RAJA_BODY(policy) \ + RAJA::atomicAdd(&y[i], a * x[i]); -#define DAXPY_ATOMIC_RAJA_BODY(policy) RAJA::atomicAdd(&y[i], a * x[i]); #include "common/KernelBase.hpp" -namespace rajaperf { +namespace rajaperf +{ class RunParams; -namespace basic { +namespace basic +{ -class DAXPY_ATOMIC : public KernelBase { +class DAXPY_ATOMIC : public KernelBase +{ public: - DAXPY_ATOMIC(const RunParams ¶ms); + + DAXPY_ATOMIC(const RunParams& params); ~DAXPY_ATOMIC(); From f6b8232fb51d4d95e788b3ca56f2523a6207c7a6 Mon Sep 17 00:00:00 2001 From: Amy Powell Date: Mon, 7 Feb 2022 17:15:07 -0700 Subject: [PATCH 08/28] Delete code now in KokkosViewUtils.hpp --- src/common/RAJAPerfSuite.hpp | 47 ------------------------------------ 1 file changed, 47 deletions(-) diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index 4b722d703..96501da17 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -16,7 +16,6 @@ #include "RAJA/config.hpp" #include "rajaperf_config.hpp" - #if defined(RUN_KOKKOS) #include "Kokkos_Core.hpp" #endif // RUN_KOKKOS @@ -399,52 +398,6 @@ auto getViewFromPointer(PointedAt *kokkos_ptr, Boundaries... boundaries) // An allocation is implicitly made here device_view_type device_data_copy("StringName", boundaries...); - mirror_view_type cpu_to_gpu_mirror = - Kokkos::create_mirror_view(device_data_copy); - - - Kokkos::deep_copy(cpu_to_gpu_mirror, pointer_holder); - - Kokkos::deep_copy(device_data_copy, cpu_to_gpu_mirror); - - // Kokkos::View return type - - return device_data_copy; -} - -// This function will move data in a Kokkos::View back to host from device, -// and will be stored in the existing pointer(s) -template -void moveDataToHostFromKokkosView(PointedAt *kokkos_ptr, ExistingView my_view, - Boundaries... boundaries) -{ - - using host_view_type = typename Kokkos::View< - typename PointerOfNdimensions::type, - typename Kokkos::DefaultHostExecutionSpace::memory_space>; - - using device_view_type = typename Kokkos::View< - typename PointerOfNdimensions::type, - typename Kokkos::DefaultExecutionSpace::memory_space>; - - using mirror_view_type = typename device_view_type::HostMirror; - - - host_view_type pointer_holder(kokkos_ptr, boundaries...); - - // Layout is optimal for gpu, but data are actually located on CPU - mirror_view_type cpu_to_gpu_mirror = Kokkos::create_mirror_view(my_view); - - // Actual copying of the data from the gpu (my_view) back to the cpu - Kokkos::deep_copy(cpu_to_gpu_mirror, my_view); - - // This copies from the mirror on the host cpu back to the existing - // pointer(s) - Kokkos::deep_copy(pointer_holder, cpu_to_gpu_mirror); -} - -#endif // RUN_KOKKOS -*/ } // namespace rajaperf #endif // closing endif for header file include guard From 800bd442177c24a2b7e76d6eda488390266b3553 Mon Sep 17 00:00:00 2001 From: Amy Powell Date: Tue, 8 Feb 2022 12:13:17 -0700 Subject: [PATCH 09/28] RAJAPerfSuite.hpp 0d8b139: removing RUN_KOKKOS infrastructure in unformatted file This commit effectively reverts clang formatting applied to this file in later commits, thus minimizing changes to the original file. --- src/common/RAJAPerfSuite.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index 96501da17..f93b67208 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -16,6 +16,7 @@ #include "RAJA/config.hpp" #include "rajaperf_config.hpp" + #if defined(RUN_KOKKOS) #include "Kokkos_Core.hpp" #endif // RUN_KOKKOS From ff93ba68d33fa3c990be938a59da0f44be92843d Mon Sep 17 00:00:00 2001 From: Amy Powell Date: Tue, 8 Feb 2022 12:22:21 -0700 Subject: [PATCH 10/28] RAJAPerfSuite.hpp: rm line 18 to minimize changes --- src/common/RAJAPerfSuite.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index f93b67208..96501da17 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -16,7 +16,6 @@ #include "RAJA/config.hpp" #include "rajaperf_config.hpp" - #if defined(RUN_KOKKOS) #include "Kokkos_Core.hpp" #endif // RUN_KOKKOS From 73dc173e2acd335edd634e9301e626d6c10f9b98 Mon Sep 17 00:00:00 2001 From: Amy Powell Date: Wed, 9 Feb 2022 09:11:42 -0700 Subject: [PATCH 11/28] reverting formatting changes --- src/common/RAJAPerfSuite.hpp | 56 ++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index 96501da17..b246f8852 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -23,7 +23,8 @@ #include #include -namespace rajaperf { +namespace rajaperf +{ class KernelBase; class RunParams; @@ -53,6 +54,7 @@ enum GroupID { }; + // /*! ******************************************************************************* @@ -68,9 +70,9 @@ enum GroupID { */ enum KernelID { - // - // Basic kernels... - // +// +// Basic kernels... +// Basic_DAXPY = 0, Basic_DAXPY_ATOMIC, Basic_IF_QUAD, @@ -90,9 +92,10 @@ enum KernelID { #ifdef RUN_KOKKOS // move this up to the point implemented with Kokkos NumKernels, #endif - // - // Lcals kernels... - // + +// +// Lcals kernels... +// Lcals_DIFF_PREDICT, Lcals_EOS, Lcals_FIRST_DIFF, @@ -105,9 +108,9 @@ enum KernelID { Lcals_PLANCKIAN, Lcals_TRIDIAG_ELIM, - // - // Polybench kernels... - // +// +// Polybench kernels... +// Polybench_2MM, Polybench_3MM, Polybench_ADI, @@ -122,9 +125,9 @@ enum KernelID { Polybench_JACOBI_2D, Polybench_MVT, - // - // Stream kernels... - // +// +// Stream kernels... +// Stream_ADD, Stream_COPY, Stream_DOT, @@ -134,7 +137,10 @@ enum KernelID { // // Apps kernels... // +<<<<<<< HEAD Apps_CONVECTION3DPA, +======= +>>>>>>> 78348cb9 (reverting formatting changes) Apps_COUPLE, Apps_DEL_DOT_VEC_2D, Apps_DIFFUSION3DPA, @@ -152,7 +158,10 @@ enum KernelID { // // Algorithm kernels... // +<<<<<<< HEAD Algorithm_SCAN, +======= +>>>>>>> 78348cb9 (reverting formatting changes) Algorithm_SORT, Algorithm_SORTPAIRS, Algorithm_REDUCE_SUM, @@ -165,6 +174,7 @@ enum KernelID { }; + /*! ******************************************************************************* * @@ -204,6 +214,7 @@ enum VariantID { }; + /*! ******************************************************************************* * @@ -235,6 +246,7 @@ enum FeatureID { }; + /*! ******************************************************************************* * @@ -242,7 +254,7 @@ enum FeatureID { * ******************************************************************************* */ -const std::string &getGroupName(GroupID gid); +const std::string& getGroupName(GroupID gid); /*! ******************************************************************************* @@ -264,7 +276,7 @@ std::string getKernelName(KernelID kid); * ******************************************************************************* */ -const std::string &getFullKernelName(KernelID kid); +const std::string& getFullKernelName(KernelID kid); /*! ******************************************************************************* @@ -273,7 +285,7 @@ const std::string &getFullKernelName(KernelID kid); * ******************************************************************************* */ -const std::string &getVariantName(VariantID vid); +const std::string& getVariantName(VariantID vid); /*! ******************************************************************************* @@ -302,7 +314,7 @@ bool isVariantGPU(VariantID vid); * ******************************************************************************* */ -const std::string &getFeatureName(FeatureID vid); +const std::string& getFeatureName(FeatureID vid); /*! ******************************************************************************* @@ -313,7 +325,7 @@ const std::string &getFeatureName(FeatureID vid); * ******************************************************************************* */ -KernelBase *getKernelObject(KernelID kid, const RunParams &run_params); +KernelBase* getKernelObject(KernelID kid, const RunParams& run_params); /*! ******************************************************************************* @@ -324,7 +336,7 @@ KernelBase *getKernelObject(KernelID kid, const RunParams &run_params); * ******************************************************************************* */ -std::ostream &getCout(); +std::ostream& getCout(); /*! ******************************************************************************* @@ -333,7 +345,7 @@ std::ostream &getCout(); * ******************************************************************************* */ -std::ostream *makeNullStream(); +std::ostream* makeNullStream(); /*! ******************************************************************************* @@ -398,6 +410,6 @@ auto getViewFromPointer(PointedAt *kokkos_ptr, Boundaries... boundaries) // An allocation is implicitly made here device_view_type device_data_copy("StringName", boundaries...); -} // namespace rajaperf +} // closing brace for rajaperf namespace -#endif // closing endif for header file include guard +#endif // closing endif for header file include guard From c843d99795f00e197be2afd23eeec9abef8c5143 Mon Sep 17 00:00:00 2001 From: Amy Powell Date: Fri, 20 May 2022 18:07:03 -0600 Subject: [PATCH 12/28] Restore destructor in DAXPY_ATOMIC.cpp --- src/basic/DAXPY_ATOMIC.cpp | 4 ++++ src/basic/MAT_MAT_SHARED.hpp | 11 ++++------- src/basic/PI_REDUCE.hpp | 8 ++++---- src/common/RAJAPerfSuite.hpp | 8 +------- 4 files changed, 13 insertions(+), 18 deletions(-) diff --git a/src/basic/DAXPY_ATOMIC.cpp b/src/basic/DAXPY_ATOMIC.cpp index 125cb9640..200df93db 100644 --- a/src/basic/DAXPY_ATOMIC.cpp +++ b/src/basic/DAXPY_ATOMIC.cpp @@ -55,6 +55,10 @@ DAXPY_ATOMIC::DAXPY_ATOMIC(const RunParams& params) setVariantDefined( Kokkos_Lambda ); } +DAXPY_ATOMIC::~DAXPY_ATOMIC() +{ +} + void DAXPY_ATOMIC::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { allocAndInitDataConst(m_y, getActualProblemSize(), 0.0, vid); diff --git a/src/basic/MAT_MAT_SHARED.hpp b/src/basic/MAT_MAT_SHARED.hpp index 8c046e47d..3d48c375d 100644 --- a/src/basic/MAT_MAT_SHARED.hpp +++ b/src/basic/MAT_MAT_SHARED.hpp @@ -139,7 +139,10 @@ class MAT_MAT_SHARED : public KernelBase { void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); - void runKokkosVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t) + { + getCout() << "\n MAT_MAT_SHARED : Unknown Kokkos variant id = " << vid << std::endl; + } void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); @@ -148,12 +151,6 @@ class MAT_MAT_SHARED : public KernelBase { template < size_t block_size > void runHipVariantImpl(VariantID vid); - { - getCout() << "\n MAT_MAT_SHARED : Unknown Kokkos variant id = " << vid << std::endl; - //getCout() << "\n MAT_MAT_SHARED : Unknown Kokkos variant id and/or tune_idx = " << tune_idx << std::endl; - //getCout() << "\n MAT_MAT_SHARED : No Kokkos version yet " << std::endl; - } - private: static const size_t default_gpu_block_size = TL_SZ * TL_SZ; using gpu_block_sizes_type = gpu_block_size::make_list_type; diff --git a/src/basic/PI_REDUCE.hpp b/src/basic/PI_REDUCE.hpp index 89daac956..3eba2936c 100644 --- a/src/basic/PI_REDUCE.hpp +++ b/src/basic/PI_REDUCE.hpp @@ -56,7 +56,10 @@ class PI_REDUCE : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); - void runKokkosVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t) + { + getCout() << "\n PI_REDUCE : Unknown Kokkos variant id = " << vid << std::endl; + } void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); @@ -65,9 +68,6 @@ class PI_REDUCE : public KernelBase template < size_t block_size > void runHipVariantImpl(VariantID vid); - { - getCout() << "\n PI_REDUCE : Unknown Kokkos variant id = " << vid << std::endl; - } private: static const size_t default_gpu_block_size = 256; diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index b246f8852..348583c69 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -137,10 +137,7 @@ enum KernelID { // // Apps kernels... // -<<<<<<< HEAD Apps_CONVECTION3DPA, -======= ->>>>>>> 78348cb9 (reverting formatting changes) Apps_COUPLE, Apps_DEL_DOT_VEC_2D, Apps_DIFFUSION3DPA, @@ -158,10 +155,7 @@ enum KernelID { // // Algorithm kernels... // -<<<<<<< HEAD Algorithm_SCAN, -======= ->>>>>>> 78348cb9 (reverting formatting changes) Algorithm_SORT, Algorithm_SORTPAIRS, Algorithm_REDUCE_SUM, @@ -409,7 +403,7 @@ auto getViewFromPointer(PointedAt *kokkos_ptr, Boundaries... boundaries) // The boundaries parameter pack contains the array dimenions; // An allocation is implicitly made here device_view_type device_data_copy("StringName", boundaries...); - +#endif } // closing brace for rajaperf namespace #endif // closing endif for header file include guard From 422f8d0593d969549da94073c566b252fbed2112 Mon Sep 17 00:00:00 2001 From: Amy Powell Date: Wed, 25 May 2022 11:19:11 -0600 Subject: [PATCH 13/28] Kokkos updates for running with the current RAJA + Kokkos --- src/algorithm/SORT.hpp | 4 ++ src/apps/HALOEXCHANGE_FUSED.hpp | 4 ++ src/basic-kokkos/DAXPY-Kokkos.cpp | 2 +- src/basic-kokkos/DAXPY_ATOMIC-Kokkos.cpp | 3 +- src/basic-kokkos/IF_QUAD-Kokkos.cpp | 2 +- src/basic-kokkos/INIT3-Kokkos.cpp | 2 +- src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp | 2 +- .../INIT_VIEW1D_OFFSET-Kokkos.cpp | 2 +- src/basic-kokkos/MULADDSUB-Kokkos.cpp | 2 +- src/basic-kokkos/NESTED_INIT-Kokkos.cpp | 2 +- src/basic-kokkos/PI_ATOMIC-Kokkos.cpp | 2 +- src/basic-kokkos/REDUCE3_INT-Kokkos.cpp | 2 +- src/basic-kokkos/TRAP_INT-Kokkos.cpp | 2 +- src/basic/INDEXLIST.hpp | 5 ++- src/basic/INDEXLIST_3LOOP.hpp | 6 ++- src/basic/REDUCE_STRUCT.hpp | 5 +++ src/common/Executor.cpp | 17 +++---- src/common/KernelBase.cpp | 2 +- src/common/KernelBase.hpp | 2 +- src/common/RAJAPerfSuite.cpp | 2 - src/common/RAJAPerfSuite.hpp | 44 ------------------- 21 files changed, 41 insertions(+), 73 deletions(-) diff --git a/src/algorithm/SORT.hpp b/src/algorithm/SORT.hpp index 0670c9dd0..ec8a3a65a 100644 --- a/src/algorithm/SORT.hpp +++ b/src/algorithm/SORT.hpp @@ -54,6 +54,10 @@ class SORT : public KernelBase { getCout() << "\n SORT : Unknown OMP Target variant id = " << vid << std::endl; } + void runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) + { + getCout() << "\n SORT : Unknown Kokkos variant id = " << vid << std::endl; + } private: static const size_t default_gpu_block_size = 0; diff --git a/src/apps/HALOEXCHANGE_FUSED.hpp b/src/apps/HALOEXCHANGE_FUSED.hpp index e47c1e14e..99fa6b0e8 100644 --- a/src/apps/HALOEXCHANGE_FUSED.hpp +++ b/src/apps/HALOEXCHANGE_FUSED.hpp @@ -137,6 +137,10 @@ class HALOEXCHANGE_FUSED : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) + { + getCout() << "\n Haloexchange Fused : Unknown Kokkos variant id = " << vid << std::endl; + } void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic-kokkos/DAXPY-Kokkos.cpp b/src/basic-kokkos/DAXPY-Kokkos.cpp index eb2dac484..f35e01a83 100644 --- a/src/basic-kokkos/DAXPY-Kokkos.cpp +++ b/src/basic-kokkos/DAXPY-Kokkos.cpp @@ -22,7 +22,7 @@ struct DaxpyFunctor { void operator()(Index_type i) const { DAXPY_BODY; } }; -void DAXPY::runKokkosVariant(VariantID vid) { +void DAXPY::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); diff --git a/src/basic-kokkos/DAXPY_ATOMIC-Kokkos.cpp b/src/basic-kokkos/DAXPY_ATOMIC-Kokkos.cpp index 07607ca08..4c249031e 100644 --- a/src/basic-kokkos/DAXPY_ATOMIC-Kokkos.cpp +++ b/src/basic-kokkos/DAXPY_ATOMIC-Kokkos.cpp @@ -17,7 +17,8 @@ namespace rajaperf { namespace basic { -void DAXPY_ATOMIC::runKokkosVariant(VariantID vid) { +void DAXPY_ATOMIC::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) +{ const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; diff --git a/src/basic-kokkos/IF_QUAD-Kokkos.cpp b/src/basic-kokkos/IF_QUAD-Kokkos.cpp index 9eb994a45..a092f93ae 100644 --- a/src/basic-kokkos/IF_QUAD-Kokkos.cpp +++ b/src/basic-kokkos/IF_QUAD-Kokkos.cpp @@ -13,7 +13,7 @@ namespace rajaperf { namespace basic { -void IF_QUAD::runKokkosVariant(VariantID vid) { +void IF_QUAD::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); diff --git a/src/basic-kokkos/INIT3-Kokkos.cpp b/src/basic-kokkos/INIT3-Kokkos.cpp index 78d338617..f02dd73f7 100644 --- a/src/basic-kokkos/INIT3-Kokkos.cpp +++ b/src/basic-kokkos/INIT3-Kokkos.cpp @@ -13,7 +13,7 @@ namespace rajaperf { namespace basic { -void INIT3::runKokkosVariant(VariantID vid) { +void INIT3::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); diff --git a/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp b/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp index 59dc4d814..300f0c282 100644 --- a/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp +++ b/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp @@ -13,7 +13,7 @@ namespace rajaperf { namespace basic { -void INIT_VIEW1D::runKokkosVariant(VariantID vid) { +void INIT_VIEW1D::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); diff --git a/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp b/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp index c8e0c13ae..7cfac8511 100644 --- a/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp +++ b/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp @@ -14,7 +14,7 @@ namespace rajaperf { namespace basic { -void INIT_VIEW1D_OFFSET::runKokkosVariant(VariantID vid) { +void INIT_VIEW1D_OFFSET::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { const Index_type run_reps = getRunReps(); const Index_type ibegin = 1; const Index_type iend = getActualProblemSize() + 1; diff --git a/src/basic-kokkos/MULADDSUB-Kokkos.cpp b/src/basic-kokkos/MULADDSUB-Kokkos.cpp index 2d5872fd0..6d16efc0a 100644 --- a/src/basic-kokkos/MULADDSUB-Kokkos.cpp +++ b/src/basic-kokkos/MULADDSUB-Kokkos.cpp @@ -14,7 +14,7 @@ namespace rajaperf { namespace basic { -void MULADDSUB::runKokkosVariant(VariantID vid) { +void MULADDSUB::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); diff --git a/src/basic-kokkos/NESTED_INIT-Kokkos.cpp b/src/basic-kokkos/NESTED_INIT-Kokkos.cpp index 133962990..91fb35e9e 100644 --- a/src/basic-kokkos/NESTED_INIT-Kokkos.cpp +++ b/src/basic-kokkos/NESTED_INIT-Kokkos.cpp @@ -14,7 +14,7 @@ namespace rajaperf { namespace basic { -void NESTED_INIT::runKokkosVariant(VariantID vid) { +void NESTED_INIT::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { const Index_type run_reps = getRunReps(); NESTED_INIT_DATA_SETUP; diff --git a/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp b/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp index 982c01dad..1a0f380aa 100644 --- a/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp +++ b/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp @@ -13,7 +13,7 @@ namespace rajaperf { namespace basic { -void PI_ATOMIC::runKokkosVariant(VariantID vid) { +void PI_ATOMIC::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); diff --git a/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp b/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp index 9fd287ef3..f59f52e51 100644 --- a/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp +++ b/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp @@ -15,7 +15,7 @@ namespace rajaperf { namespace basic { -void REDUCE3_INT::runKokkosVariant(VariantID vid) { +void REDUCE3_INT::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); diff --git a/src/basic-kokkos/TRAP_INT-Kokkos.cpp b/src/basic-kokkos/TRAP_INT-Kokkos.cpp index 12d24021c..1ffd4090f 100644 --- a/src/basic-kokkos/TRAP_INT-Kokkos.cpp +++ b/src/basic-kokkos/TRAP_INT-Kokkos.cpp @@ -26,7 +26,7 @@ Real_type trap_int_func(Real_type x, Real_type y, Real_type xp, Real_type yp) { return denom; } -void TRAP_INT::runKokkosVariant(VariantID vid) { +void TRAP_INT::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) { const Index_type run_reps = getRunReps(); const Index_type ibegin = 0; const Index_type iend = getActualProblemSize(); diff --git a/src/basic/INDEXLIST.hpp b/src/basic/INDEXLIST.hpp index f180b8072..a66bf58b3 100644 --- a/src/basic/INDEXLIST.hpp +++ b/src/basic/INDEXLIST.hpp @@ -60,7 +60,10 @@ class INDEXLIST : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); - void runKokkosVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t) + { + getCout() << "\n INDEXLIST : Unknown Kokkos variant id = " << vid << std::endl; + } void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/INDEXLIST_3LOOP.hpp b/src/basic/INDEXLIST_3LOOP.hpp index d4ec35f76..0e40e56b9 100644 --- a/src/basic/INDEXLIST_3LOOP.hpp +++ b/src/basic/INDEXLIST_3LOOP.hpp @@ -71,7 +71,11 @@ class INDEXLIST_3LOOP : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); - void runKokkosVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t) + { + getCout() << "\n INDEXLIST_3LOOP: Unknown Kokkos variant id = " << vid << std::endl; + } + void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/REDUCE_STRUCT.hpp b/src/basic/REDUCE_STRUCT.hpp index b1d188ca1..40cac7df2 100644 --- a/src/basic/REDUCE_STRUCT.hpp +++ b/src/basic/REDUCE_STRUCT.hpp @@ -86,6 +86,11 @@ class REDUCE_STRUCT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); + void runKokkosVariant(VariantID vid, size_t) + { + getCout() << "\n REDUCE_STRUCT : Unknown Kokkos variant id = " << vid << std::endl; + } + void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/common/Executor.cpp b/src/common/Executor.cpp index ba8a88ed8..34f9f1b3c 100644 --- a/src/common/Executor.cpp +++ b/src/common/Executor.cpp @@ -20,8 +20,10 @@ #include "basic/DAXPY.hpp" #include "basic/REDUCE3_INT.hpp" #include "basic/INDEXLIST_3LOOP.hpp" +#ifndef RUN_KOKKOS #include "algorithm/SORT.hpp" #include "apps/HALOEXCHANGE_FUSED.hpp" +#endif #include #include @@ -852,23 +854,14 @@ void Executor::runSuite() getCout() << "\n\nRun warmup kernels...\n"; vector warmup_kernels; - // TODO: Amy, check this + warmup_kernels.push_back(makeKernel()); warmup_kernels.push_back(makeKernel()); - #ifndef RUN_KOKKOS warmup_kernels.push_back(makeKernel()); - #endif - #ifndef RUN_KOKKOS +#ifndef RUN_KOKKOS warmup_kernels.push_back(makeKernel()); - #endif - #ifndef RUN_KOKKOS warmup_kernels.push_back(makeKernel()); - #endif -// warmup_kernels.push_back(new basic::DAXPY(run_params)); -// warmup_kernels.push_back(new basic::REDUCE3_INT(run_params)); -//#ifndef RUN_KOKKOS -// warmup_kernels.push_back(new algorithm::SORT(run_params)); -//#endif +#endif for (size_t ik = 0; ik < warmup_kernels.size(); ++ik) { KernelBase* warmup_kernel = warmup_kernels[ik]; diff --git a/src/common/KernelBase.cpp b/src/common/KernelBase.cpp index a949a45d8..930da54cb 100644 --- a/src/common/KernelBase.cpp +++ b/src/common/KernelBase.cpp @@ -241,7 +241,7 @@ void KernelBase::runKernel(VariantID vid, size_t tune_idx) case Kokkos_Lambda : { #if defined(RUN_KOKKOS) - runKokkosVariant(vid); + runKokkosVariant(vid, tune_idx); #endif } diff --git a/src/common/KernelBase.hpp b/src/common/KernelBase.hpp index 0ace216d1..1caf29789 100644 --- a/src/common/KernelBase.hpp +++ b/src/common/KernelBase.hpp @@ -231,7 +231,7 @@ class KernelBase virtual void runOpenMPTargetVariant(VariantID vid, size_t tune_idx) = 0; #endif #if defined(RUN_KOKKOS) - virtual void runKokkosVariant(VariantID vid) = 0; + virtual void runKokkosVariant(VariantID vid, size_t tune_idx) = 0; #endif protected: diff --git a/src/common/RAJAPerfSuite.cpp b/src/common/RAJAPerfSuite.cpp index 2f81890b2..3f366d44a 100644 --- a/src/common/RAJAPerfSuite.cpp +++ b/src/common/RAJAPerfSuite.cpp @@ -25,9 +25,7 @@ #include "basic/INIT3.hpp" #include "basic/INIT_VIEW1D.hpp" #include "basic/INIT_VIEW1D_OFFSET.hpp" -#ifndef RUN_KOKKOS #include "basic/MAT_MAT_SHARED.hpp" -#endif #include "basic/MULADDSUB.hpp" #include "basic/NESTED_INIT.hpp" #include "basic/PI_ATOMIC.hpp" diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index 348583c69..d19bf89f2 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -360,50 +360,6 @@ std::ostream& getNullStream(); template < typename... Ts > inline void ignore_unused(Ts&&...) { } -#if defined(RUN_KOKKOS) -template -struct PointerOfNdimensions; - -template -struct PointerOfNdimensions { - using type = PointedAt; -}; - -template -struct PointerOfNdimensions { - using type = - typename PointerOfNdimensions::type *; -}; - -// This templated function is used to wrap pointers -// (declared and defined in RAJAPerf Suite kernels) in Kokkos Views -// -template -auto getViewFromPointer(PointedAt *kokkos_ptr, Boundaries... boundaries) - -> typename Kokkos::View< - typename PointerOfNdimensions::type, - typename Kokkos::DefaultExecutionSpace::memory_space> - -{ - using host_view_type = typename Kokkos::View< - typename PointerOfNdimensions::type, - typename Kokkos::DefaultHostExecutionSpace::memory_space>; - - using device_view_type = typename Kokkos::View< - typename PointerOfNdimensions::type, - typename Kokkos::DefaultExecutionSpace::memory_space>; - - - using mirror_view_type = typename device_view_type::HostMirror; - - - host_view_type pointer_holder(kokkos_ptr, boundaries...); - - // The boundaries parameter pack contains the array dimenions; - // An allocation is implicitly made here - device_view_type device_data_copy("StringName", boundaries...); -#endif } // closing brace for rajaperf namespace - #endif // closing endif for header file include guard From a0f5c31a068e995b41c6a989118555d9046cdcd9 Mon Sep 17 00:00:00 2001 From: Amy Powell Date: Wed, 1 Jun 2022 11:40:45 -0600 Subject: [PATCH 14/28] Addressing D. Beckingsale PR comments --- src/basic-kokkos/CMakeLists.txt | 3 ++- src/basic/INDEXLIST.hpp | 2 +- src/basic/INDEXLIST_3LOOP.hpp | 2 +- src/basic/MAT_MAT_SHARED.hpp | 2 +- src/basic/PI_REDUCE.hpp | 2 +- src/basic/REDUCE_STRUCT.hpp | 2 +- 6 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/basic-kokkos/CMakeLists.txt b/src/basic-kokkos/CMakeLists.txt index a85b13df6..4e68a3f63 100644 --- a/src/basic-kokkos/CMakeLists.txt +++ b/src/basic-kokkos/CMakeLists.txt @@ -6,7 +6,7 @@ # SPDX-License-Identifier: (BSD-3-Clause) ############################################################################### -include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/../basic) +#include_directories(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/../basic) blt_add_library( NAME basic-kokkos @@ -22,5 +22,6 @@ blt_add_library( REDUCE3_INT-Kokkos.cpp TRAP_INT-Kokkos.cpp DAXPY_ATOMIC-Kokkos.cpp + INCLUDES(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/../basic) DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} ) diff --git a/src/basic/INDEXLIST.hpp b/src/basic/INDEXLIST.hpp index a66bf58b3..8064a0d35 100644 --- a/src/basic/INDEXLIST.hpp +++ b/src/basic/INDEXLIST.hpp @@ -62,7 +62,7 @@ class INDEXLIST : public KernelBase void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t) { - getCout() << "\n INDEXLIST : Unknown Kokkos variant id = " << vid << std::endl; + getCout() << "\n INDEXLIST : Unimplemented Kokkos variant id = " << vid << std::endl; } void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/INDEXLIST_3LOOP.hpp b/src/basic/INDEXLIST_3LOOP.hpp index 0e40e56b9..843159fac 100644 --- a/src/basic/INDEXLIST_3LOOP.hpp +++ b/src/basic/INDEXLIST_3LOOP.hpp @@ -73,7 +73,7 @@ class INDEXLIST_3LOOP : public KernelBase void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t) { - getCout() << "\n INDEXLIST_3LOOP: Unknown Kokkos variant id = " << vid << std::endl; + getCout() << "\n INDEXLIST_3LOOP: Unimplemented Kokkos variant id = " << vid << std::endl; } diff --git a/src/basic/MAT_MAT_SHARED.hpp b/src/basic/MAT_MAT_SHARED.hpp index 3d48c375d..788ede5c6 100644 --- a/src/basic/MAT_MAT_SHARED.hpp +++ b/src/basic/MAT_MAT_SHARED.hpp @@ -141,7 +141,7 @@ class MAT_MAT_SHARED : public KernelBase { void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t) { - getCout() << "\n MAT_MAT_SHARED : Unknown Kokkos variant id = " << vid << std::endl; + getCout() << "\n MAT_MAT_SHARED : Unimplemented Kokkos variant id = " << vid << std::endl; } void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/PI_REDUCE.hpp b/src/basic/PI_REDUCE.hpp index 3eba2936c..dfd414542 100644 --- a/src/basic/PI_REDUCE.hpp +++ b/src/basic/PI_REDUCE.hpp @@ -58,7 +58,7 @@ class PI_REDUCE : public KernelBase void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t) { - getCout() << "\n PI_REDUCE : Unknown Kokkos variant id = " << vid << std::endl; + getCout() << "\n PI_REDUCE : Unimplemented Kokkos variant id = " << vid << std::endl; } void setCudaTuningDefinitions(VariantID vid); diff --git a/src/basic/REDUCE_STRUCT.hpp b/src/basic/REDUCE_STRUCT.hpp index 40cac7df2..dff21f1e9 100644 --- a/src/basic/REDUCE_STRUCT.hpp +++ b/src/basic/REDUCE_STRUCT.hpp @@ -88,7 +88,7 @@ class REDUCE_STRUCT : public KernelBase void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); void runKokkosVariant(VariantID vid, size_t) { - getCout() << "\n REDUCE_STRUCT : Unknown Kokkos variant id = " << vid << std::endl; + getCout() << "\n REDUCE_STRUCT : Unimplemented Kokkos variant id = " << vid << std::endl; } From 593489b258a55231e9e4fa49ea69a05bcd0d22d8 Mon Sep 17 00:00:00 2001 From: Amy Powell Date: Fri, 17 Jun 2022 12:08:59 -0600 Subject: [PATCH 15/28] Build and runtime fix ups with Jason B. --- src/CMakeLists.txt | 17 ++--------------- src/RAJAPerfSuiteDriver.cpp | 3 ++- src/algorithm/SORT.hpp | 4 ---- src/apps/HALOEXCHANGE_FUSED.hpp | 4 ---- src/basic-kokkos/CMakeLists.txt | 2 +- src/basic-kokkos/DAXPY-Kokkos.cpp | 2 ++ src/basic-kokkos/DAXPY_ATOMIC-Kokkos.cpp | 2 ++ src/basic-kokkos/IF_QUAD-Kokkos.cpp | 2 ++ src/basic-kokkos/INIT3-Kokkos.cpp | 2 ++ src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp | 2 ++ src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp | 2 ++ src/basic-kokkos/NESTED_INIT-Kokkos.cpp | 2 ++ src/basic-kokkos/PI_ATOMIC-Kokkos.cpp | 2 ++ src/basic-kokkos/REDUCE3_INT-Kokkos.cpp | 2 ++ src/basic-kokkos/TRAP_INT-Kokkos.cpp | 2 ++ src/basic/INDEXLIST.cpp | 1 + src/basic/INDEXLIST.hpp | 4 ---- src/basic/INDEXLIST_3LOOP.cpp | 1 + src/basic/INDEXLIST_3LOOP.hpp | 5 ----- src/basic/MAT_MAT_SHARED.cpp | 1 - src/basic/MAT_MAT_SHARED.hpp | 4 ---- src/basic/PI_REDUCE.cpp | 1 - src/basic/PI_REDUCE.hpp | 4 ---- src/basic/REDUCE_STRUCT.hpp | 5 ----- src/common/Executor.cpp | 4 ---- src/common/KernelBase.cpp | 8 ++++++++ src/common/KernelBase.hpp | 10 +++++++++- src/common/RAJAPerfSuite.cpp | 13 +++++++------ src/common/RAJAPerfSuite.hpp | 8 -------- 29 files changed, 51 insertions(+), 68 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0f3e624f3..fe5f709dd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -10,10 +10,7 @@ include_directories(.) add_subdirectory(common) add_subdirectory(basic) - -# TODO: We will add Kokkos variants of kernel groups one by one -# when all are done this difference is not needed anymore -if(NOT ENABLE_KOKKOS) +add_subdirectory(basic-kokkos) add_subdirectory(apps) add_subdirectory(lcals) add_subdirectory(polybench) @@ -24,21 +21,11 @@ set(RAJA_PERFSUITE_EXECUTABLE_DEPENDS common apps basic + basic-kokkos lcals polybench stream algorithm) -endif() - -# TODO: Eventually Kokkos depends should be done via append_list -if(ENABLE_KOKKOS) - add_subdirectory(basic-kokkos) - set(RAJA_PERFSUITE_EXECUTABLE_DEPENDS - common - basic - basic-kokkos - ) -endif() list(APPEND RAJA_PERFSUITE_EXECUTABLE_DEPENDS ${RAJA_PERFSUITE_DEPENDS}) diff --git a/src/RAJAPerfSuiteDriver.cpp b/src/RAJAPerfSuiteDriver.cpp index 5d0680482..57260b976 100644 --- a/src/RAJAPerfSuiteDriver.cpp +++ b/src/RAJAPerfSuiteDriver.cpp @@ -19,7 +19,8 @@ #endif //------------------------------------------------------------------------------ -int main(int argc, char **argv) { +int main(int argc, char **argv) +{ #ifdef RAJA_PERFSUITE_ENABLE_MPI MPI_Init(&argc, &argv); diff --git a/src/algorithm/SORT.hpp b/src/algorithm/SORT.hpp index ec8a3a65a..0670c9dd0 100644 --- a/src/algorithm/SORT.hpp +++ b/src/algorithm/SORT.hpp @@ -54,10 +54,6 @@ class SORT : public KernelBase { getCout() << "\n SORT : Unknown OMP Target variant id = " << vid << std::endl; } - void runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) - { - getCout() << "\n SORT : Unknown Kokkos variant id = " << vid << std::endl; - } private: static const size_t default_gpu_block_size = 0; diff --git a/src/apps/HALOEXCHANGE_FUSED.hpp b/src/apps/HALOEXCHANGE_FUSED.hpp index 99fa6b0e8..e47c1e14e 100644 --- a/src/apps/HALOEXCHANGE_FUSED.hpp +++ b/src/apps/HALOEXCHANGE_FUSED.hpp @@ -137,10 +137,6 @@ class HALOEXCHANGE_FUSED : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); - void runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx)) - { - getCout() << "\n Haloexchange Fused : Unknown Kokkos variant id = " << vid << std::endl; - } void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic-kokkos/CMakeLists.txt b/src/basic-kokkos/CMakeLists.txt index 4e68a3f63..bccb29e1d 100644 --- a/src/basic-kokkos/CMakeLists.txt +++ b/src/basic-kokkos/CMakeLists.txt @@ -22,6 +22,6 @@ blt_add_library( REDUCE3_INT-Kokkos.cpp TRAP_INT-Kokkos.cpp DAXPY_ATOMIC-Kokkos.cpp - INCLUDES(SYSTEM ${CMAKE_CURRENT_SOURCE_DIR}/../basic) + INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/../basic DEPENDS_ON common ${RAJA_PERFSUITE_DEPENDS} ) diff --git a/src/basic-kokkos/DAXPY-Kokkos.cpp b/src/basic-kokkos/DAXPY-Kokkos.cpp index f35e01a83..6c3ad5e6e 100644 --- a/src/basic-kokkos/DAXPY-Kokkos.cpp +++ b/src/basic-kokkos/DAXPY-Kokkos.cpp @@ -7,6 +7,7 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// #include "DAXPY.hpp" +#if defined(RUN_KOKKOS) #include "common/KokkosViewUtils.hpp" #include @@ -65,3 +66,4 @@ void DAXPY::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx) } // end namespace basic } // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/DAXPY_ATOMIC-Kokkos.cpp b/src/basic-kokkos/DAXPY_ATOMIC-Kokkos.cpp index 4c249031e..0f90fa86c 100644 --- a/src/basic-kokkos/DAXPY_ATOMIC-Kokkos.cpp +++ b/src/basic-kokkos/DAXPY_ATOMIC-Kokkos.cpp @@ -7,6 +7,7 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// #include "DAXPY_ATOMIC.hpp" +#if defined(RUN_KOKKOS) #include "common/KokkosViewUtils.hpp" #include @@ -66,3 +67,4 @@ void DAXPY_ATOMIC::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tu } // end namespace basic } // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/IF_QUAD-Kokkos.cpp b/src/basic-kokkos/IF_QUAD-Kokkos.cpp index a092f93ae..e1b8cc601 100644 --- a/src/basic-kokkos/IF_QUAD-Kokkos.cpp +++ b/src/basic-kokkos/IF_QUAD-Kokkos.cpp @@ -7,6 +7,7 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// #include "IF_QUAD.hpp" +#if defined(RUN_KOKKOS) #include "common/KokkosViewUtils.hpp" #include @@ -73,3 +74,4 @@ void IF_QUAD::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_id } // end namespace basic } // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/INIT3-Kokkos.cpp b/src/basic-kokkos/INIT3-Kokkos.cpp index f02dd73f7..eef8ffbaa 100644 --- a/src/basic-kokkos/INIT3-Kokkos.cpp +++ b/src/basic-kokkos/INIT3-Kokkos.cpp @@ -7,6 +7,7 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// #include "INIT3.hpp" +#if defined (RUN_KOKKOS) #include "common/KokkosViewUtils.hpp" #include @@ -69,3 +70,4 @@ void INIT3::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx) } // end namespace basic } // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp b/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp index 300f0c282..8d59409d1 100644 --- a/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp +++ b/src/basic-kokkos/INIT_VIEW1D-Kokkos.cpp @@ -7,6 +7,7 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// #include "INIT_VIEW1D.hpp" +#if defined(RUN_KOKKOS) #include "common/KokkosViewUtils.hpp" #include @@ -54,3 +55,4 @@ void INIT_VIEW1D::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tun } // end namespace basic } // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp b/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp index 7cfac8511..ae03fe752 100644 --- a/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp +++ b/src/basic-kokkos/INIT_VIEW1D_OFFSET-Kokkos.cpp @@ -7,6 +7,7 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// #include "INIT_VIEW1D_OFFSET.hpp" +#if defined(RUN_KOKKOS) #include "common/KokkosViewUtils.hpp" #include @@ -56,3 +57,4 @@ void INIT_VIEW1D_OFFSET::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ } // end namespace basic } // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/NESTED_INIT-Kokkos.cpp b/src/basic-kokkos/NESTED_INIT-Kokkos.cpp index 91fb35e9e..f69020e57 100644 --- a/src/basic-kokkos/NESTED_INIT-Kokkos.cpp +++ b/src/basic-kokkos/NESTED_INIT-Kokkos.cpp @@ -7,6 +7,7 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// #include "NESTED_INIT.hpp" +#if defined(RUN_KOKKOS) #include "common/KokkosViewUtils.hpp" #include @@ -74,3 +75,4 @@ void NESTED_INIT::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tun } // end namespace basic } // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp b/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp index 1a0f380aa..066aca7aa 100644 --- a/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp +++ b/src/basic-kokkos/PI_ATOMIC-Kokkos.cpp @@ -7,6 +7,7 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// #include "PI_ATOMIC.hpp" +#if defined(RUN_KOKKOS) #include "common/KokkosViewUtils.hpp" #include @@ -65,3 +66,4 @@ void PI_ATOMIC::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ } // end namespace basic } // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp b/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp index f59f52e51..b1566d619 100644 --- a/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp +++ b/src/basic-kokkos/REDUCE3_INT-Kokkos.cpp @@ -7,6 +7,7 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// #include "REDUCE3_INT.hpp" +#if defined(RUN_KOKKOS) #include "common/KokkosViewUtils.hpp" #include @@ -74,3 +75,4 @@ void REDUCE3_INT::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tun } // end namespace basic } // end namespace rajaperf +#endif diff --git a/src/basic-kokkos/TRAP_INT-Kokkos.cpp b/src/basic-kokkos/TRAP_INT-Kokkos.cpp index 1ffd4090f..47c4596e6 100644 --- a/src/basic-kokkos/TRAP_INT-Kokkos.cpp +++ b/src/basic-kokkos/TRAP_INT-Kokkos.cpp @@ -7,6 +7,7 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// #include "TRAP_INT.hpp" +#if defined(RUN_KOKKOS) #include "common/KokkosViewUtils.hpp" #include @@ -65,3 +66,4 @@ void TRAP_INT::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_i } // end namespace basic } // end namespace rajaperf +#endif diff --git a/src/basic/INDEXLIST.cpp b/src/basic/INDEXLIST.cpp index df523fbf6..382a7d871 100644 --- a/src/basic/INDEXLIST.cpp +++ b/src/basic/INDEXLIST.cpp @@ -49,6 +49,7 @@ INDEXLIST::INDEXLIST(const RunParams& params) setVariantDefined( Base_CUDA ); setVariantDefined( Base_HIP ); + } INDEXLIST::~INDEXLIST() diff --git a/src/basic/INDEXLIST.hpp b/src/basic/INDEXLIST.hpp index 8064a0d35..0836d8197 100644 --- a/src/basic/INDEXLIST.hpp +++ b/src/basic/INDEXLIST.hpp @@ -60,10 +60,6 @@ class INDEXLIST : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); - void runKokkosVariant(VariantID vid, size_t) - { - getCout() << "\n INDEXLIST : Unimplemented Kokkos variant id = " << vid << std::endl; - } void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/INDEXLIST_3LOOP.cpp b/src/basic/INDEXLIST_3LOOP.cpp index e7d4215fa..b7070a6c3 100644 --- a/src/basic/INDEXLIST_3LOOP.cpp +++ b/src/basic/INDEXLIST_3LOOP.cpp @@ -58,6 +58,7 @@ INDEXLIST_3LOOP::INDEXLIST_3LOOP(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); + } INDEXLIST_3LOOP::~INDEXLIST_3LOOP() diff --git a/src/basic/INDEXLIST_3LOOP.hpp b/src/basic/INDEXLIST_3LOOP.hpp index 843159fac..e19ee5508 100644 --- a/src/basic/INDEXLIST_3LOOP.hpp +++ b/src/basic/INDEXLIST_3LOOP.hpp @@ -71,11 +71,6 @@ class INDEXLIST_3LOOP : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); - void runKokkosVariant(VariantID vid, size_t) - { - getCout() << "\n INDEXLIST_3LOOP: Unimplemented Kokkos variant id = " << vid << std::endl; - } - void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/MAT_MAT_SHARED.cpp b/src/basic/MAT_MAT_SHARED.cpp index 87a5fde1e..895d0a08d 100644 --- a/src/basic/MAT_MAT_SHARED.cpp +++ b/src/basic/MAT_MAT_SHARED.cpp @@ -61,7 +61,6 @@ MAT_MAT_SHARED::MAT_MAT_SHARED(const RunParams ¶ms) setVariantDefined(Lambda_HIP); setVariantDefined(RAJA_HIP); - setVariantDefined(Kokkos_Lambda); } MAT_MAT_SHARED::~MAT_MAT_SHARED() {} diff --git a/src/basic/MAT_MAT_SHARED.hpp b/src/basic/MAT_MAT_SHARED.hpp index 788ede5c6..095721c27 100644 --- a/src/basic/MAT_MAT_SHARED.hpp +++ b/src/basic/MAT_MAT_SHARED.hpp @@ -139,10 +139,6 @@ class MAT_MAT_SHARED : public KernelBase { void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); - void runKokkosVariant(VariantID vid, size_t) - { - getCout() << "\n MAT_MAT_SHARED : Unimplemented Kokkos variant id = " << vid << std::endl; - } void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/PI_REDUCE.cpp b/src/basic/PI_REDUCE.cpp index e8966729b..ef069a2aa 100644 --- a/src/basic/PI_REDUCE.cpp +++ b/src/basic/PI_REDUCE.cpp @@ -52,7 +52,6 @@ PI_REDUCE::PI_REDUCE(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); - setVariantDefined( Kokkos_Lambda ); } PI_REDUCE::~PI_REDUCE() diff --git a/src/basic/PI_REDUCE.hpp b/src/basic/PI_REDUCE.hpp index dfd414542..345c2862e 100644 --- a/src/basic/PI_REDUCE.hpp +++ b/src/basic/PI_REDUCE.hpp @@ -56,10 +56,6 @@ class PI_REDUCE : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); - void runKokkosVariant(VariantID vid, size_t) - { - getCout() << "\n PI_REDUCE : Unimplemented Kokkos variant id = " << vid << std::endl; - } void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/basic/REDUCE_STRUCT.hpp b/src/basic/REDUCE_STRUCT.hpp index dff21f1e9..b1d188ca1 100644 --- a/src/basic/REDUCE_STRUCT.hpp +++ b/src/basic/REDUCE_STRUCT.hpp @@ -86,11 +86,6 @@ class REDUCE_STRUCT : public KernelBase void runCudaVariant(VariantID vid, size_t tune_idx); void runHipVariant(VariantID vid, size_t tune_idx); void runOpenMPTargetVariant(VariantID vid, size_t tune_idx); - void runKokkosVariant(VariantID vid, size_t) - { - getCout() << "\n REDUCE_STRUCT : Unimplemented Kokkos variant id = " << vid << std::endl; - } - void setCudaTuningDefinitions(VariantID vid); void setHipTuningDefinitions(VariantID vid); diff --git a/src/common/Executor.cpp b/src/common/Executor.cpp index 34f9f1b3c..f3ee040ff 100644 --- a/src/common/Executor.cpp +++ b/src/common/Executor.cpp @@ -20,10 +20,8 @@ #include "basic/DAXPY.hpp" #include "basic/REDUCE3_INT.hpp" #include "basic/INDEXLIST_3LOOP.hpp" -#ifndef RUN_KOKKOS #include "algorithm/SORT.hpp" #include "apps/HALOEXCHANGE_FUSED.hpp" -#endif #include #include @@ -858,10 +856,8 @@ void Executor::runSuite() warmup_kernels.push_back(makeKernel()); warmup_kernels.push_back(makeKernel()); warmup_kernels.push_back(makeKernel()); -#ifndef RUN_KOKKOS warmup_kernels.push_back(makeKernel()); warmup_kernels.push_back(makeKernel()); -#endif for (size_t ik = 0; ik < warmup_kernels.size(); ++ik) { KernelBase* warmup_kernel = warmup_kernels[ik]; diff --git a/src/common/KernelBase.cpp b/src/common/KernelBase.cpp index 930da54cb..36efa5170 100644 --- a/src/common/KernelBase.cpp +++ b/src/common/KernelBase.cpp @@ -129,6 +129,14 @@ void KernelBase::setVariantDefined(VariantID vid) #endif break; } +// Required for running Kokkos + case Kokkos_Lambda : + { +#if defined(RUN_KOKKOS) + setKokkosTuningDefinitions(vid); +#endif + break; + } default : { #if 0 diff --git a/src/common/KernelBase.hpp b/src/common/KernelBase.hpp index 1caf29789..ca70a30f3 100644 --- a/src/common/KernelBase.hpp +++ b/src/common/KernelBase.hpp @@ -90,6 +90,11 @@ class KernelBase virtual void setOpenMPTargetTuningDefinitions(VariantID vid) { addVariantTuningName(vid, getDefaultTuningName()); } #endif +// Required for running Kokkos variants +#if defined(RUN_KOKKOS) + virtual void setKokkosTuningDefinitions(VariantID vid) + { addVariantTuningName(vid, getDefaultTuningName()); } +#endif // // Getter methods used to generate kernel execution summary @@ -231,7 +236,10 @@ class KernelBase virtual void runOpenMPTargetVariant(VariantID vid, size_t tune_idx) = 0; #endif #if defined(RUN_KOKKOS) - virtual void runKokkosVariant(VariantID vid, size_t tune_idx) = 0; + virtual void runKokkosVariant(VariantID vid, size_t tune_idx) + { + getCout() << "\n KernelBase: Unimplemented Kokkos variant id = " << vid << std::endl; + } #endif protected: diff --git a/src/common/RAJAPerfSuite.cpp b/src/common/RAJAPerfSuite.cpp index 3f366d44a..38b393fd8 100644 --- a/src/common/RAJAPerfSuite.cpp +++ b/src/common/RAJAPerfSuite.cpp @@ -159,9 +159,7 @@ static const std::string KernelNames [] = std::string("Basic_INIT3"), std::string("Basic_INIT_VIEW1D"), std::string("Basic_INIT_VIEW1D_OFFSET"), - #ifndef RUN_KOKKOS std::string("Basic_MAT_MAT_SHARED"), - #endif std::string("Basic_MULADDSUB"), std::string("Basic_NESTED_INIT"), std::string("Basic_PI_ATOMIC"), @@ -423,7 +421,7 @@ bool isVariantAvailable(VariantID vid) #endif #if defined(RUN_KOKKOS) - if (vid == Kokkos_Lambda) { + if ( vid == Kokkos_Lambda ) { ret_val = true; } #endif @@ -483,6 +481,12 @@ bool isVariantGPU(VariantID vid) } #endif +#if defined(RUN_KOKKOS) + if ( vid == Kokkos_Lambda ) { + ret_val = true; + } +#endif + return ret_val; } @@ -580,7 +584,6 @@ KernelBase* getKernelObject(KernelID kid, break; } -#ifndef RUN_KOKKOS // // Lcals kernels... // @@ -789,8 +792,6 @@ KernelBase* getKernelObject(KernelID kid, break; } -#endif - default: { getCout() << "\n Unknown Kernel ID = " << kid << std::endl; } diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index d19bf89f2..bb33fb2b1 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -89,9 +89,6 @@ enum KernelID { Basic_REDUCE3_INT, Basic_REDUCE_STRUCT, Basic_TRAP_INT, -#ifdef RUN_KOKKOS // move this up to the point implemented with Kokkos - NumKernels, -#endif // // Lcals kernels... @@ -160,12 +157,7 @@ enum KernelID { Algorithm_SORTPAIRS, Algorithm_REDUCE_SUM, -#ifndef RUN_KOKKOS NumKernels // Keep this one last and NEVER comment out (!!) -#else - KokkosDummy -#endif - }; From d042c8a7199a50ba0f018b72d82ae8e34e4e92cc Mon Sep 17 00:00:00 2001 From: Amy Powell Date: Fri, 17 Jun 2022 14:58:13 -0600 Subject: [PATCH 16/28] MULADDSUB-Kokkos.cpp: add guard to fix failing CI --- src/basic-kokkos/MULADDSUB-Kokkos.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/basic-kokkos/MULADDSUB-Kokkos.cpp b/src/basic-kokkos/MULADDSUB-Kokkos.cpp index 6d16efc0a..e81cd17d5 100644 --- a/src/basic-kokkos/MULADDSUB-Kokkos.cpp +++ b/src/basic-kokkos/MULADDSUB-Kokkos.cpp @@ -7,6 +7,7 @@ //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// #include "MULADDSUB.hpp" +#if defined(RUN_KOKKOS) #include "common/KokkosViewUtils.hpp" #include @@ -68,3 +69,4 @@ void MULADDSUB::runKokkosVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_ } // end namespace basic } // end namespace rajaperf +#endif From eb3fa282d99631767a1115dc881976402dccb69b Mon Sep 17 00:00:00 2001 From: ajpowelsnl <49000089+ajpowelsnl@users.noreply.github.com> Date: Tue, 21 Jun 2022 13:31:29 -0600 Subject: [PATCH 17/28] Update src/RAJAPerfSuiteDriver.cpp Co-authored-by: Jason Burmark --- src/RAJAPerfSuiteDriver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RAJAPerfSuiteDriver.cpp b/src/RAJAPerfSuiteDriver.cpp index 57260b976..db1a45852 100644 --- a/src/RAJAPerfSuiteDriver.cpp +++ b/src/RAJAPerfSuiteDriver.cpp @@ -19,7 +19,7 @@ #endif //------------------------------------------------------------------------------ -int main(int argc, char **argv) +int main(int argc, char** argv) { #ifdef RAJA_PERFSUITE_ENABLE_MPI MPI_Init(&argc, &argv); From 0ee2fce3e1eba8408bbefd462b3d95a9afefb22b Mon Sep 17 00:00:00 2001 From: Amy Powell Date: Tue, 21 Jun 2022 14:28:41 -0600 Subject: [PATCH 18/28] Addressing Jason B. PR comments --- src/RAJAPerfSuiteDriver.cpp | 5 ++--- src/basic/INDEXLIST.cpp | 1 - src/basic/INDEXLIST_3LOOP.cpp | 1 - src/basic/INIT3.hpp | 3 --- src/basic/MAT_MAT_SHARED.cpp | 1 - src/basic/PI_ATOMIC.cpp | 4 ---- src/basic/PI_ATOMIC.hpp | 3 +-- src/basic/PI_REDUCE.cpp | 1 - src/basic/PI_REDUCE.hpp | 1 - src/common/KernelBase.hpp | 1 - src/common/RAJAPerfSuite.hpp | 5 ----- 11 files changed, 3 insertions(+), 23 deletions(-) diff --git a/src/RAJAPerfSuiteDriver.cpp b/src/RAJAPerfSuiteDriver.cpp index 57260b976..bb0810c76 100644 --- a/src/RAJAPerfSuiteDriver.cpp +++ b/src/RAJAPerfSuiteDriver.cpp @@ -19,15 +19,14 @@ #endif //------------------------------------------------------------------------------ -int main(int argc, char **argv) +int main(int argc, char** argv) { #ifdef RAJA_PERFSUITE_ENABLE_MPI MPI_Init(&argc, &argv); int num_ranks; MPI_Comm_size(MPI_COMM_WORLD, &num_ranks); - rajaperf::getCout() << "\n\nRunning with " << num_ranks << " MPI ranks..." - << std::endl; + rajaperf::getCout() << "\n\nRunning with " << num_ranks << " MPI ranks..." << std::endl; #endif #ifdef RUN_KOKKOS Kokkos::initialize(argc, argv); diff --git a/src/basic/INDEXLIST.cpp b/src/basic/INDEXLIST.cpp index 382a7d871..df523fbf6 100644 --- a/src/basic/INDEXLIST.cpp +++ b/src/basic/INDEXLIST.cpp @@ -49,7 +49,6 @@ INDEXLIST::INDEXLIST(const RunParams& params) setVariantDefined( Base_CUDA ); setVariantDefined( Base_HIP ); - } INDEXLIST::~INDEXLIST() diff --git a/src/basic/INDEXLIST_3LOOP.cpp b/src/basic/INDEXLIST_3LOOP.cpp index b7070a6c3..e7d4215fa 100644 --- a/src/basic/INDEXLIST_3LOOP.cpp +++ b/src/basic/INDEXLIST_3LOOP.cpp @@ -58,7 +58,6 @@ INDEXLIST_3LOOP::INDEXLIST_3LOOP(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); - } INDEXLIST_3LOOP::~INDEXLIST_3LOOP() diff --git a/src/basic/INIT3.hpp b/src/basic/INIT3.hpp index 4358daf00..0f89b7c54 100644 --- a/src/basic/INIT3.hpp +++ b/src/basic/INIT3.hpp @@ -64,9 +64,6 @@ class INIT3 : public KernelBase template < size_t block_size > void runHipVariantImpl(VariantID vid); - - - private: static const size_t default_gpu_block_size = 256; using gpu_block_sizes_type = gpu_block_size::make_list_type; diff --git a/src/basic/MAT_MAT_SHARED.cpp b/src/basic/MAT_MAT_SHARED.cpp index 895d0a08d..98cd878ce 100644 --- a/src/basic/MAT_MAT_SHARED.cpp +++ b/src/basic/MAT_MAT_SHARED.cpp @@ -60,7 +60,6 @@ MAT_MAT_SHARED::MAT_MAT_SHARED(const RunParams ¶ms) setVariantDefined(Base_HIP); setVariantDefined(Lambda_HIP); setVariantDefined(RAJA_HIP); - } MAT_MAT_SHARED::~MAT_MAT_SHARED() {} diff --git a/src/basic/PI_ATOMIC.cpp b/src/basic/PI_ATOMIC.cpp index 7e5b075c6..607ad1312 100644 --- a/src/basic/PI_ATOMIC.cpp +++ b/src/basic/PI_ATOMIC.cpp @@ -35,10 +35,6 @@ PI_ATOMIC::PI_ATOMIC(const RunParams& params) setUsesFeature(Forall); setUsesFeature(Atomic); - setVariantDefined( Kokkos_Lambda ); - - - setVariantDefined( Base_Seq ); setVariantDefined( Lambda_Seq ); setVariantDefined( RAJA_Seq ); diff --git a/src/basic/PI_ATOMIC.hpp b/src/basic/PI_ATOMIC.hpp index 36e5d97d1..e69cbdb56 100644 --- a/src/basic/PI_ATOMIC.hpp +++ b/src/basic/PI_ATOMIC.hpp @@ -27,6 +27,7 @@ Real_type dx = m_dx; \ Real_ptr pi = m_pi; + #include "common/KernelBase.hpp" namespace rajaperf @@ -62,8 +63,6 @@ class PI_ATOMIC : public KernelBase template < size_t block_size > void runHipVariantImpl(VariantID vid); - - private: static const size_t default_gpu_block_size = 256; using gpu_block_sizes_type = gpu_block_size::make_list_type; diff --git a/src/basic/PI_REDUCE.cpp b/src/basic/PI_REDUCE.cpp index ef069a2aa..16d0770ba 100644 --- a/src/basic/PI_REDUCE.cpp +++ b/src/basic/PI_REDUCE.cpp @@ -51,7 +51,6 @@ PI_REDUCE::PI_REDUCE(const RunParams& params) setVariantDefined( Base_HIP ); setVariantDefined( RAJA_HIP ); - } PI_REDUCE::~PI_REDUCE() diff --git a/src/basic/PI_REDUCE.hpp b/src/basic/PI_REDUCE.hpp index 345c2862e..c7cc3258a 100644 --- a/src/basic/PI_REDUCE.hpp +++ b/src/basic/PI_REDUCE.hpp @@ -64,7 +64,6 @@ class PI_REDUCE : public KernelBase template < size_t block_size > void runHipVariantImpl(VariantID vid); - private: static const size_t default_gpu_block_size = 256; using gpu_block_sizes_type = gpu_block_size::make_list_type; diff --git a/src/common/KernelBase.hpp b/src/common/KernelBase.hpp index ca70a30f3..ed3429643 100644 --- a/src/common/KernelBase.hpp +++ b/src/common/KernelBase.hpp @@ -90,7 +90,6 @@ class KernelBase virtual void setOpenMPTargetTuningDefinitions(VariantID vid) { addVariantTuningName(vid, getDefaultTuningName()); } #endif -// Required for running Kokkos variants #if defined(RUN_KOKKOS) virtual void setKokkosTuningDefinitions(VariantID vid) { addVariantTuningName(vid, getDefaultTuningName()); } diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index bb33fb2b1..ec42df63c 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -20,9 +20,6 @@ #include "Kokkos_Core.hpp" #endif // RUN_KOKKOS -#include -#include - namespace rajaperf { @@ -351,7 +348,5 @@ std::ostream& getNullStream(); */ template < typename... Ts > inline void ignore_unused(Ts&&...) { } - - } // closing brace for rajaperf namespace #endif // closing endif for header file include guard From a9bd6513b12f1293970bfb82376e0c2a91c524da Mon Sep 17 00:00:00 2001 From: Amy Powell Date: Tue, 21 Jun 2022 14:54:07 -0600 Subject: [PATCH 19/28] Addressing Jason B. PR requests: re-adding two includes + rm Kokkos_Core header --- src/common/RAJAPerfSuite.hpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index ec42df63c..7de04d8d9 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -16,9 +16,8 @@ #include "RAJA/config.hpp" #include "rajaperf_config.hpp" -#if defined(RUN_KOKKOS) -#include "Kokkos_Core.hpp" -#endif // RUN_KOKKOS +#include +#include namespace rajaperf { From f8eef469d746c3a0a5703f3a209b2f59bc97c168 Mon Sep 17 00:00:00 2001 From: Amy Powell Date: Tue, 21 Jun 2022 16:50:53 -0600 Subject: [PATCH 20/28] RAJAPerfSuite.hpp: addressing final-ish PR comments from Jason B. --- src/common/RAJAPerfSuite.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index 7de04d8d9..a76a3176c 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -16,15 +16,14 @@ #include "RAJA/config.hpp" #include "rajaperf_config.hpp" -#include #include +#include namespace rajaperf { class KernelBase; class RunParams; - /*! ******************************************************************************* * From 0407c368e6f2db63b6ee64d79d5b18c952670115 Mon Sep 17 00:00:00 2001 From: ajpowelsnl <49000089+ajpowelsnl@users.noreply.github.com> Date: Wed, 22 Jun 2022 07:55:42 -0600 Subject: [PATCH 21/28] Update src/common/RAJAPerfSuite.hpp Accept this change Co-authored-by: Jason Burmark --- src/common/RAJAPerfSuite.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index a76a3176c..62ca756b0 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -24,6 +24,8 @@ namespace rajaperf class KernelBase; class RunParams; + + /*! ******************************************************************************* * From d57a5d9f84dfac18b002d4eb5322a875c2de702e Mon Sep 17 00:00:00 2001 From: ajpowelsnl <49000089+ajpowelsnl@users.noreply.github.com> Date: Wed, 22 Jun 2022 07:56:30 -0600 Subject: [PATCH 22/28] Update src/common/RAJAPerfSuite.hpp Co-authored-by: Jason Burmark --- src/common/RAJAPerfSuite.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index 62ca756b0..b9d72d3fd 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -155,6 +155,7 @@ enum KernelID { Algorithm_REDUCE_SUM, NumKernels // Keep this one last and NEVER comment out (!!) + }; From 3e49f7d5cc94d765f5378b6f4888c39780e02eb4 Mon Sep 17 00:00:00 2001 From: ajpowelsnl <49000089+ajpowelsnl@users.noreply.github.com> Date: Wed, 22 Jun 2022 07:57:00 -0600 Subject: [PATCH 23/28] Update src/common/RAJAPerfSuite.hpp Suggestion accepted Co-authored-by: Jason Burmark --- src/common/RAJAPerfSuite.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index b9d72d3fd..bcd5b1172 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -349,5 +349,6 @@ std::ostream& getNullStream(); */ template < typename... Ts > inline void ignore_unused(Ts&&...) { } + } // closing brace for rajaperf namespace #endif // closing endif for header file include guard From c28aff31a5dca94eb44f7ef1b5b379079f599b41 Mon Sep 17 00:00:00 2001 From: ajpowelsnl <49000089+ajpowelsnl@users.noreply.github.com> Date: Wed, 22 Jun 2022 07:58:51 -0600 Subject: [PATCH 24/28] Update src/common/RAJAPerfSuite.hpp Suggestion accepted Co-authored-by: Jason Burmark --- src/common/RAJAPerfSuite.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index bcd5b1172..e48d732ce 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -351,4 +351,5 @@ template < typename... Ts > inline void ignore_unused(Ts&&...) { } } // closing brace for rajaperf namespace + #endif // closing endif for header file include guard From 8ed1554b49c1e4c8b0b525aa0484473d3517ee22 Mon Sep 17 00:00:00 2001 From: Jason Burmark Date: Wed, 22 Jun 2022 07:33:55 -0700 Subject: [PATCH 25/28] Remove extra whitespace --- src/common/RAJAPerfSuite.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index e48d732ce..a5aa75d97 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -155,7 +155,6 @@ enum KernelID { Algorithm_REDUCE_SUM, NumKernels // Keep this one last and NEVER comment out (!!) - }; From dbbae2545091cffe1f611d7b76befc3c01351761 Mon Sep 17 00:00:00 2001 From: Jason Burmark Date: Wed, 22 Jun 2022 07:34:37 -0700 Subject: [PATCH 26/28] Add back in newline --- src/common/RAJAPerfSuite.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/common/RAJAPerfSuite.hpp b/src/common/RAJAPerfSuite.hpp index a5aa75d97..6abd4e9a3 100644 --- a/src/common/RAJAPerfSuite.hpp +++ b/src/common/RAJAPerfSuite.hpp @@ -155,6 +155,7 @@ enum KernelID { Algorithm_REDUCE_SUM, NumKernels // Keep this one last and NEVER comment out (!!) + }; From 9c2c431c11f9d04ca5bad3459b0bbbd0ec2133c0 Mon Sep 17 00:00:00 2001 From: Jason Burmark Date: Wed, 22 Jun 2022 07:36:03 -0700 Subject: [PATCH 27/28] Remove whitespace change --- src/RAJAPerfSuiteDriver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/RAJAPerfSuiteDriver.cpp b/src/RAJAPerfSuiteDriver.cpp index 67330d67e..17e64f0f0 100644 --- a/src/RAJAPerfSuiteDriver.cpp +++ b/src/RAJAPerfSuiteDriver.cpp @@ -19,7 +19,7 @@ #endif //------------------------------------------------------------------------------ -int main(int argc, char** argv) +int main( int argc, char** argv ) { #ifdef RAJA_PERFSUITE_ENABLE_MPI MPI_Init(&argc, &argv); From 6add081d60bd293a1329c56c4d4332097bb8aa2f Mon Sep 17 00:00:00 2001 From: Jason Burmark Date: Wed, 22 Jun 2022 07:36:26 -0700 Subject: [PATCH 28/28] Remove whitespace change --- src/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index fe5f709dd..dbc4c6969 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -26,7 +26,6 @@ set(RAJA_PERFSUITE_EXECUTABLE_DEPENDS polybench stream algorithm) - list(APPEND RAJA_PERFSUITE_EXECUTABLE_DEPENDS ${RAJA_PERFSUITE_DEPENDS}) if(ENABLE_TARGET_OPENMP)