From f24672a5d9e5fd35fdf53289cf9fe652895851fe Mon Sep 17 00:00:00 2001 From: mehmet yusufoglu Date: Thu, 1 Aug 2024 15:08:37 +0200 Subject: [PATCH 1/5] add cloverleaf benchmark --- benchmarks/CMakeLists.txt | 2 + benchmarks/cloverleaf/CMakeLists.txt | 49 +++ .../cloverleaf/src/cloverLeafKernels.hpp | 294 ++++++++++++++ .../cloverleaf/src/cloverLeafMainTest.cpp | 371 ++++++++++++++++++ 4 files changed, 716 insertions(+) create mode 100644 benchmarks/cloverleaf/CMakeLists.txt create mode 100644 benchmarks/cloverleaf/src/cloverLeafKernels.hpp create mode 100644 benchmarks/cloverleaf/src/cloverLeafMainTest.cpp diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index d27b858f39a..da414861fe9 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -21,3 +21,5 @@ if(NOT BUILD_TESTING) endif() add_subdirectory("babelstream/") +add_subdirectory("cloverleaf/") + diff --git a/benchmarks/cloverleaf/CMakeLists.txt b/benchmarks/cloverleaf/CMakeLists.txt new file mode 100644 index 00000000000..6e69096d6d7 --- /dev/null +++ b/benchmarks/cloverleaf/CMakeLists.txt @@ -0,0 +1,49 @@ +# +# Copyright 2023 Erik Zenker, Benjamin Worpitz, Jan Stephan, Bernhard Manfred Gruber +# SPDX-License-Identifier: ISC +# + +cmake_minimum_required(VERSION 3.22) +set_property(GLOBAL PROPERTY USE_FOLDERS ON) + +project(cloverleaf LANGUAGES CXX) + +if(NOT TARGET alpaka::alpaka) + option(alpaka_USE_SOURCE_TREE "Use alpaka's source tree instead of an alpaka installation" OFF) + if(alpaka_USE_SOURCE_TREE) + # Don't build the benchmarks recursively + set(alpaka_BUILD_BENCHMARKS OFF) + add_subdirectory("${CMAKE_CURRENT_LIST_DIR}/../.." "${CMAKE_BINARY_DIR}/alpaka") + else() + find_package(alpaka REQUIRED) + endif() +endif() + + +set(_TARGET_NAME "cloverleaf") +append_recursive_files_add_to_src_group("src/" "src/" "cpp" _FILES_SOURCE) + +alpaka_add_executable( + ${_TARGET_NAME} + ${_FILES_SOURCE}) + +target_include_directories( + ${_TARGET_NAME} + PRIVATE "src") + +target_link_libraries( + ${_TARGET_NAME} + PRIVATE common) + +set_target_properties(${_TARGET_NAME} PROPERTIES FOLDER benchmarks/cloverleaf) + +#Run as a ctest +if(alpaka_CI) + # Only run for release builds since this is a benchmark + if(CMAKE_BUILD_TYPE STREQUAL "Release") + add_test(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME} --benchmark-samples 1) + endif() +else() + # For a normal benchmark test, number of samples should be equal to the default value. + add_test(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME}) +endif() diff --git a/benchmarks/cloverleaf/src/cloverLeafKernels.hpp b/benchmarks/cloverleaf/src/cloverLeafKernels.hpp new file mode 100644 index 00000000000..2d8f9dd71ee --- /dev/null +++ b/benchmarks/cloverleaf/src/cloverLeafKernels.hpp @@ -0,0 +1,294 @@ +#pragma once + +#include + +#include + +using Data = float; +using Dim3 = alpaka::DimInt<3u>; +using Idx = std::uint32_t; + +const Idx nx = 16; // Number of cells in x direction +const Idx ny = 128; // Number of cells in y direction +const Idx nz = 512; // Number of cells in z direction + +// Kernel to update the halo regions +struct UpdateHaloKernel +{ + template + ALPAKA_FN_ACC auto operator()(TAcc const& acc, MdSpan density) const -> void + { + auto const i = alpaka::getIdx(acc)[0]; + auto const j = alpaka::getIdx(acc)[1]; + auto const k = alpaka::getIdx(acc)[2]; + + if(i < nx && j < ny && k < nz) + { + // Update halo cells for density (simplified example) + // Assuming a single layer halo, and periodic boundary conditions + if(i == 0) + density(i, j, k) = density(nx - 2, j, k); + if(i == nx - 1) + density(i, j, k) = density(1, j, k); + + if(j == 0) + density(i, j, k) = density(i, ny - 2, k); + if(j == ny - 1) + density(i, j, k) = density(i, 1, k); + + if(k == 0) + density(i, j, k) = density(i, j, nz - 2); + if(k == nz - 1) + density(i, j, k) = density(i, j, 1); + } + } +}; + +// Kernel to compute the ideal gas equation of state +struct IdealGasKernel +{ + template + ALPAKA_FN_ACC auto operator()( + TAcc const& acc, + MdSpan density, + MdSpan energy, + MdSpan pressure, + MdSpan soundspeed, + float gamma) const -> void + { + auto const i = alpaka::getIdx(acc)[0]; + auto const j = alpaka::getIdx(acc)[1]; + auto const k = alpaka::getIdx(acc)[2]; + + if(i < nx && j < ny && k < nz) + { + pressure(i, j, k) = (gamma - 1.0f) * density(i, j, k) * energy(i, j, k); + soundspeed(i, j, k) = sqrt(gamma * pressure(i, j, k) / density(i, j, k)); + } + } +}; + +// Kernel to initialize the simulation variables +struct InitializerKernel +{ + template + ALPAKA_FN_ACC auto operator()( + TAcc const& acc, + MdSpan density, + MdSpan energy, + MdSpan pressure, + MdSpan velocityX, + MdSpan velocityY, + MdSpan velocityZ) const -> void + { + auto const i = alpaka::getIdx(acc)[0]; + auto const j = alpaka::getIdx(acc)[1]; + auto const k = alpaka::getIdx(acc)[2]; + + if(i < nx && j < ny && k < nz) + { + density(i, j, k) = 1.0f; // Initial density + energy(i, j, k) = 2.5f; // Initial energy + pressure(i, j, k) = 1.0f; // Initial pressure + velocityX(i, j, k) = 0.0f; // Initial velocity + + if(i < nx && j < ny && k < nz) + { + // Simple advection calculation (this is a simplified example) + density(i, j, k) += (velocityX(i, j, k) + velocityY(i, j, k) + velocityZ(i, j, k)) * 0.01f; + } + } + } +}; + +// Kernel to compute the equation of state (EOS) and additional calculations +struct EOSKernel +{ + template + ALPAKA_FN_ACC auto operator()( + TAcc const& acc, + MdSpan density, + MdSpan energy, + MdSpan pressure, + MdSpan velocityX, + MdSpan velocityY, + MdSpan velocityZ, + float gamma) const -> void + { + auto const i = alpaka::getIdx(acc)[0]; + auto const j = alpaka::getIdx(acc)[1]; + auto const k = alpaka::getIdx(acc)[2]; + + if(i < nx && j < ny && k < nz) + { + // Compute pressure using ideal gas law: P = (gamma - 1) * density * energy + pressure(i, j, k) = (gamma - 1.0f) * density(i, j, k) * energy(i, j, k); + + // Additional calculations to update velocities (this is a simplified example) + velocityX(i, j, k) += pressure(i, j, k) * 0.1f; + velocityY(i, j, k) += pressure(i, j, k) * 0.1f; + velocityZ(i, j, k) += pressure(i, j, k) * 0.1f; + } + } +}; + +// Kernel for Flux calculations +struct FluxKernel +{ + template + ALPAKA_FN_ACC auto operator()( + TAcc const& acc, + MdSpan density, + MdSpan energy, + MdSpan pressure, + MdSpan velocityX, + MdSpan velocityY, + MdSpan velocityZ, + MdSpan fluxDensity, + MdSpan fluxEnergy, + MdSpan fluxVelocityX, + MdSpan fluxVelocityY, + MdSpan fluxVelocityZ) const -> void + { + auto const i = alpaka::getIdx(acc)[0]; + auto const j = alpaka::getIdx(acc)[1]; + auto const k = alpaka::getIdx(acc)[2]; + + if(i < nx && j < ny && k < nz) + { + // Compute fluxes (this is a simplified example) + fluxDensity(i, j, k) = density(i, j, k) * velocityX(i, j, k); + fluxEnergy(i, j, k) = energy(i, j, k) * velocityX(i, j, k); + fluxVelocityX(i, j, k) = velocityX(i, j, k) * velocityX(i, j, k) + pressure(i, j, k); + fluxVelocityY(i, j, k) = velocityY(i, j, k) * velocityX(i, j, k); + fluxVelocityZ(i, j, k) = velocityZ(i, j, k) * velocityX(i, j, k); + } + } +}; + +// Kernel for the advection step +struct AdvectionKernel +{ + template + ALPAKA_FN_ACC auto operator()( + TAcc const& acc, + MdSpan density, + MdSpan velocityX, + MdSpan velocityY, + MdSpan velocityZ) const -> void + { + auto const i = alpaka::getIdx(acc)[0]; + auto const j = alpaka::getIdx(acc)[1]; + auto const k = alpaka::getIdx(acc)[2]; + + if(i < nx && j < ny && k < nz) + { + // Simple advection calculation (this is a simplified example) + density(i, j, k) += (velocityX(i, j, k) + velocityY(i, j, k) + velocityZ(i, j, k)) * 0.01f; + } + } +}; + +// Kernel for the Lagrangian step +struct LagrangianKernel +{ + template + ALPAKA_FN_ACC auto operator()( + TAcc const& acc, + MdSpan density, + MdSpan energy, + MdSpan velocityX, + MdSpan velocityY, + MdSpan velocityZ, + MdSpan fluxDensity, + MdSpan fluxEnergy, + MdSpan fluxVelocityX, + MdSpan fluxVelocityY, + MdSpan fluxVelocityZ) const -> void + { + auto const i = alpaka::getIdx(acc)[0]; + auto const j = alpaka::getIdx(acc)[1]; + auto const k = alpaka::getIdx(acc)[2]; + + if(i < nx && j < ny && k < nz) + { + // Update the cell-centered variables based on flux calculations + density(i, j, k) -= fluxDensity(i, j, k) * 0.1f; + energy(i, j, k) -= fluxEnergy(i, j, k) * 0.1f; + velocityX(i, j, k) -= fluxVelocityX(i, j, k) * 0.1f; + velocityY(i, j, k) -= fluxVelocityY(i, j, k) * 0.1f; + velocityZ(i, j, k) -= fluxVelocityZ(i, j, k) * 0.1f; + } + } +}; + +// Kernel for viscosity calculations +struct ViscosityKernel +{ + template + ALPAKA_FN_ACC auto operator()( + TAcc const& acc, + MdSpan density, + MdSpan velocityX, + MdSpan velocityY, + MdSpan velocityZ, + MdSpan pressure, + MdSpan viscosity) const -> void + { + auto const i = alpaka::getIdx(acc)[0]; + auto const j = alpaka::getIdx(acc)[1]; + auto const k = alpaka::getIdx(acc)[2]; + + if(i > 0 && i < nx - 1 && j > 0 && j < ny - 1 && k > 0 && k < nz - 1) + { + float gradVx = (velocityX(i + 1, j, k) - velocityX(i - 1, j, k)) * 0.5f; + float gradVy = (velocityY(i, j + 1, k) - velocityY(i, j - 1, k)) * 0.5f; + float gradVz = (velocityZ(i, j, k + 1) - velocityZ(i, j, k - 1)) * 0.5f; + + viscosity(i, j, k) = density(i, j, k) * (gradVx * gradVx + gradVy * gradVy + gradVz * gradVz) * 0.01f; + + // Apply viscosity to pressure + pressure(i, j, k) += viscosity(i, j, k); + } + } +}; + +// Kernel to find the maximum velocity +struct MaxVelocityKernel +{ + template + ALPAKA_FN_ACC auto operator()( + TAcc const& acc, + MdSpan velocityX, + MdSpan velocityY, + MdSpan velocityZ, + Data* maxVelocity) const -> void + { + auto const i = alpaka::getIdx(acc)[0]; + auto const j = alpaka::getIdx(acc)[1]; + auto const k = alpaka::getIdx(acc)[2]; + + if(i < nx && j < ny && k < nz) + { + float vx = velocityX(i, j, k); + float vy = velocityY(i, j, k); + float vz = velocityZ(i, j, k); + float v = alpaka::math::sqrt(acc, (vx * vx + vy * vy + vz * vz)); + + // Atomic operation to find the maximum velocity + float val = alpaka::atomicMax(acc, maxVelocity, v); + maxVelocity[0] = val; + } + } +}; + +[[maybe_unused]] static float calculateTimeStep(float dx, float dy, float dz, float maxVelocity, float cMax) +{ + // Compute the smallest grid spacing + float minDx = std::min({dx, dy, dz}); + + // Calculate the time step based on the CFL condition + float dt = cMax * minDx / maxVelocity; + + return dt; +} diff --git a/benchmarks/cloverleaf/src/cloverLeafMainTest.cpp b/benchmarks/cloverleaf/src/cloverLeafMainTest.cpp new file mode 100644 index 00000000000..c9ccff6d64e --- /dev/null +++ b/benchmarks/cloverleaf/src/cloverLeafMainTest.cpp @@ -0,0 +1,371 @@ +#include "cloverLeafKernels.hpp" + +#include +#include + +#include +#include +#include + +#include +#include +#include + +/** + * Cloverleaf benchmark + */ + +//! \brief The Function for testing cloverleaf kernels for given Acc type and data type. +//! \tparam Acc the accelerator type +//! \tparam DataType The data type to differentiate single or double data type based tests. +template +void testKernels() +{ + // Define device and queue + using Queue = alpaka::Queue; + + auto const platformHost = alpaka::PlatformCpu{}; + auto const devHost = alpaka::getDevByIdx(platformHost, 0); + auto const platformAcc = alpaka::Platform{}; + auto const devAcc = alpaka::getDevByIdx(platformAcc, 0); + + std::cout << "Using alpaka accelerator: " << alpaka::getAccName() << std::endl; + + Queue queue(devAcc); + + // Define the 3D extent (dimensions) + alpaka::Vec const extent3D(static_cast(nx), static_cast(ny), static_cast(nz)); + + // Allocate host memory + auto hDensity = alpaka::allocBuf(devHost, extent3D); + auto hEnergy = alpaka::allocBuf(devHost, extent3D); + auto hPressure = alpaka::allocBuf(devHost, extent3D); + auto hVelocityX = alpaka::allocBuf(devHost, extent3D); + auto hVelocityY = alpaka::allocBuf(devHost, extent3D); + auto hVelocityZ = alpaka::allocBuf(devHost, extent3D); + auto hSoundspeed = alpaka::allocBuf(devHost, extent3D); // Additional buffer for soundspeed + + // Allocate device memory + auto dDensity = alpaka::allocBuf(devAcc, extent3D); + auto dEnergy = alpaka::allocBuf(devAcc, extent3D); + auto dPressure = alpaka::allocBuf(devAcc, extent3D); + auto dVelocityX = alpaka::allocBuf(devAcc, extent3D); + auto dVelocityY = alpaka::allocBuf(devAcc, extent3D); + auto dVelocityZ = alpaka::allocBuf(devAcc, extent3D); + auto dSoundspeed = alpaka::allocBuf(devAcc, extent3D); // Additional buffer for soundspeed + + // Convert directly to mdspan + auto mdDensity = alpaka::experimental::getMdSpan(dDensity); + auto mdEnergy = alpaka::experimental::getMdSpan(dEnergy); + auto mdPressure = alpaka::experimental::getMdSpan(dPressure); + auto mdVelocityX = alpaka::experimental::getMdSpan(dVelocityX); + auto mdVelocityY = alpaka::experimental::getMdSpan(dVelocityY); + auto mdVelocityZ = alpaka::experimental::getMdSpan(dVelocityZ); + auto mdSoundspeed = alpaka::experimental::getMdSpan(dSoundspeed); // Additional mdspan for soundspeed + + auto hFluxDensity = alpaka::allocBuf(devHost, extent3D); + auto hFluxEnergy = alpaka::allocBuf(devHost, extent3D); + auto hFluxVelocityX = alpaka::allocBuf(devHost, extent3D); + auto hFluxVelocityY = alpaka::allocBuf(devHost, extent3D); + auto hFluxVelocityZ = alpaka::allocBuf(devHost, extent3D); + + auto dFluxDensity = alpaka::allocBuf(devAcc, extent3D); + auto dFluxEnergy = alpaka::allocBuf(devAcc, extent3D); + auto dFluxVelocityX = alpaka::allocBuf(devAcc, extent3D); + auto dFluxVelocityY = alpaka::allocBuf(devAcc, extent3D); + auto dFluxVelocityZ = alpaka::allocBuf(devAcc, extent3D); + + auto mdFluxDensity = alpaka::experimental::getMdSpan(dFluxDensity); + auto mdFluxEnergy = alpaka::experimental::getMdSpan(dFluxEnergy); + auto mdFluxVelocityX = alpaka::experimental::getMdSpan(dFluxVelocityX); + auto mdFluxVelocityY = alpaka::experimental::getMdSpan(dFluxVelocityY); + auto mdFluxVelocityZ = alpaka::experimental::getMdSpan(dFluxVelocityZ); + + // Allocate host memory for viscosity + auto hViscosity = alpaka::allocBuf(devHost, extent3D); + + // Allocate device memory for viscosity + auto dViscosity = alpaka::allocBuf(devAcc, extent3D); + + // Create mdspan view for viscosity using alpaka::experimental::getMdSpan + auto mdViscosity = alpaka::experimental::getMdSpan(dViscosity); + + // Define work divisions + const alpaka::Vec size{nx, ny, nz}; + + // Define work divisions + + + auto const bundeledInitKernel = alpaka::KernelBundle( + InitializerKernel{}, + mdDensity, + mdEnergy, + mdPressure, + mdVelocityX, + mdVelocityY, + mdVelocityZ); + + auto workDiv = alpaka::getValidWorkDivForKernel( + devAcc, + bundeledInitKernel, + size, + alpaka::Vec::ones(), + false, + alpaka::GridBlockExtentSubDivRestrictions::Unrestricted); + // Simulation parameters + float totalTime = 0.0f; // Total simulation time + float endTime = 1.0f; // End time of the simulation + + // Exec the initialization kernel + alpaka::exec( + queue, + workDiv, + InitializerKernel{}, + mdDensity, + mdEnergy, + mdPressure, + mdVelocityX, + mdVelocityY, + mdVelocityZ); + // std::cout << "InitializerKernel finished" << std::endl; + // Wait for the kernel to finish + alpaka::wait(queue); + + // Variables to calculate the dt at each step + float dx = 0.01f; + float dy = 0.01f; + float dz = 0.01f; + float cMax = 0.5f; + float dt = 0.01f; + + + alpaka::Vec, Idx> const extent1D = alpaka::Vec, Idx>::all(1u); + auto resultGpuHost = alpaka::allocBuf(devHost, extent1D); + resultGpuHost[0] = static_cast(1.0f); + auto outputDeviceMemory = alpaka::allocBuf(devAcc, extent1D); + alpaka::memcpy(queue, outputDeviceMemory, resultGpuHost); + std::cout << "Workdiv:" << workDiv << std::endl; + std::cout << "Running cloverleaf loop." << std::endl; + while(totalTime < endTime) + { + // Calculate the time step (dt), max velocity is needed. + // Launch the kernel to compute the maximum velocity + alpaka::exec( + queue, + workDiv, + MaxVelocityKernel{}, + mdVelocityX, + mdVelocityY, + mdVelocityZ, + std::data(outputDeviceMemory)); + + // Copy result from device memory to host + alpaka::memcpy(queue, resultGpuHost, outputDeviceMemory, extent1D); + + // std::cout << "MaxVelocityKernel finished" << std::endl; + + alpaka::wait(queue); + + // Now use maxVelocity to compute the time step + dt = calculateTimeStep(dx, dy, dz, resultGpuHost[0], cMax); + + // Exec the halo update kernel + alpaka::exec(queue, workDiv, UpdateHaloKernel{}, mdDensity); + + // Wait for the halo update kernel to finish + alpaka::wait(queue); + // std::cout << "Halo kernel finished" << std::endl; + + // Exec the ideal gas kernel + alpaka::exec( + queue, + workDiv, + IdealGasKernel{}, + mdDensity, + mdEnergy, + mdPressure, + mdSoundspeed, + 1.4f); // gamma + + // std::cout << "IdealGasKernel finished" << std::endl; + + // Wait for the ideal gas kernel to finish + alpaka::wait(queue); + + // Exec the EOS kernel + float gamma = 1.4f; // Specific heat ratio + alpaka::exec( + queue, + workDiv, + EOSKernel{}, + mdDensity, + mdEnergy, + mdPressure, + mdVelocityX, + mdVelocityY, + mdVelocityZ, + gamma); + + // std::cout << "EOSKernel finished" << std::endl; + + // Wait for the EOS kernel to finish + alpaka::wait(queue); + + // Exec the Flux kernel + alpaka::exec( + queue, + workDiv, + FluxKernel{}, + mdDensity, + mdEnergy, + mdPressure, + mdVelocityX, + mdVelocityY, + mdVelocityZ, + mdFluxDensity, + mdFluxEnergy, + mdFluxVelocityX, + mdFluxVelocityY, + mdFluxVelocityZ); + + // std::cout << "FluxKernel finished" << std::endl; + + // Wait for the Flux kernel to finish + alpaka::wait(queue); + + // Exec the advection kernel (fourth step) + alpaka::exec(queue, workDiv, AdvectionKernel{}, mdDensity, mdVelocityX, mdVelocityY, mdVelocityZ); + + // std::cout << "AdvectionKernel finished" << std::endl; + + // Wait for the Advection kernel to finish + alpaka::wait(queue); + + // Copy data back to host for verification (optional) + alpaka::memcpy(queue, hDensity, dDensity); + alpaka::memcpy(queue, hEnergy, dEnergy); + alpaka::memcpy(queue, hPressure, dPressure); + alpaka::memcpy(queue, hVelocityX, dVelocityX); + alpaka::memcpy(queue, hVelocityY, dVelocityY); + alpaka::memcpy(queue, hVelocityZ, dVelocityZ); + + // Flux densities + alpaka::memcpy(queue, hFluxDensity, dFluxDensity); + alpaka::memcpy(queue, hFluxEnergy, dFluxEnergy); + alpaka::memcpy(queue, hFluxVelocityX, dFluxVelocityX); + alpaka::memcpy(queue, hFluxVelocityY, dFluxVelocityY); + alpaka::memcpy(queue, hFluxVelocityZ, dFluxVelocityZ); + + // Wait for data transfer to complete + alpaka::wait(queue); + + // Exec the Lagrangian kernel + alpaka::exec( + queue, + workDiv, + LagrangianKernel{}, + mdDensity, + mdEnergy, + mdVelocityX, + mdVelocityY, + mdVelocityZ, + mdFluxDensity, + mdFluxEnergy, + mdFluxVelocityX, + mdFluxVelocityY, + mdFluxVelocityZ); + + // std::cout << "LagrangianKernel finished" << std::endl; + + // Wait for the Lagrangian kernel to finish + alpaka::wait(queue); + + // Exec the Viscosity kernel + alpaka::exec( + queue, + workDiv, + ViscosityKernel{}, + mdDensity, + mdVelocityX, + mdVelocityY, + mdVelocityZ, + mdPressure, + mdViscosity); + + // std::cout << "ViscosityKernel finished" << std::endl; + + // Wait for the Viscosity kernel to finish + alpaka::wait(queue); + + // Update the simulation time + totalTime += dt; + } + std::cout << "Copying results back." << std::endl; + // Copy final data back to host for verification + alpaka::memcpy(queue, hDensity, dDensity); + alpaka::memcpy(queue, hEnergy, dEnergy); + alpaka::memcpy(queue, hPressure, dPressure); + alpaka::memcpy(queue, hVelocityX, dVelocityX); + alpaka::memcpy(queue, hVelocityY, dVelocityY); + alpaka::memcpy(queue, hVelocityZ, dVelocityZ); + alpaka::memcpy(queue, hFluxDensity, dFluxDensity); + alpaka::memcpy(queue, hFluxEnergy, dFluxEnergy); + alpaka::memcpy(queue, hFluxVelocityX, dFluxVelocityX); + alpaka::memcpy(queue, hFluxVelocityY, dFluxVelocityY); + alpaka::memcpy(queue, hFluxVelocityZ, dFluxVelocityZ); + alpaka::memcpy(queue, hViscosity, dViscosity); + + std::cout << "Data copy to host finished" << std::endl; + // Wait for all data transfers to complete + alpaka::wait(queue); + + // Verification (check for NaNs and Infs) + auto mdHostDensity = alpaka::experimental::getMdSpan(hDensity); + auto mdHostEnergy = alpaka::experimental::getMdSpan(hEnergy); + auto mdHostPressure = alpaka::experimental::getMdSpan(hPressure); + auto mdHostVelocityX = alpaka::experimental::getMdSpan(hVelocityX); + auto mdHostVelocityY = alpaka::experimental::getMdSpan(hVelocityY); + auto mdHostVelocityZ = alpaka::experimental::getMdSpan(hVelocityZ); + + bool success = true; + for(Idx i = 0; i < nx; ++i) + { + for(Idx j = 0; j < ny; ++j) + { + for(Idx k = 0; k < nz; ++k) + { + if(std::isnan(mdHostDensity(i, j, k)) || std::isinf(mdHostDensity(i, j, k)) + || std::isnan(mdHostEnergy(i, j, k)) || std::isinf(mdHostEnergy(i, j, k)) + || std::isnan(mdHostPressure(i, j, k)) || std::isinf(mdHostPressure(i, j, k)) + || std::isnan(mdHostVelocityX(i, j, k)) || std::isinf(mdHostVelocityX(i, j, k)) + || std::isnan(mdHostVelocityY(i, j, k)) || std::isinf(mdHostVelocityY(i, j, k)) + || std::isnan(mdHostVelocityZ(i, j, k)) || std::isinf(mdHostVelocityZ(i, j, k))) + { + success = false; + break; + } + } + } + } + + if(success) + { + std::cout << "Simulation succeeded!" << std::endl; + } + else + { + std::cout << "Simulation failed due to NaN or Inf values!" << std::endl; + } +} + +using TestAccs3D = alpaka::test::EnabledAccs, std::uint32_t>; + +TEMPLATE_LIST_TEST_CASE("TEST: Cloverleaf", "[benchmark-test]", TestAccs3D) +{ + using Acc = TestType; + if constexpr(alpaka::accMatchesTags) + { + // Run tests for float data type + testKernels(); + } +} From 50c5d0354dfda2a8fd9774980499658bb9daed78 Mon Sep 17 00:00:00 2001 From: mehmet yusufoglu Date: Thu, 1 Aug 2024 17:52:34 +0200 Subject: [PATCH 2/5] this config worked in hal --- benchmarks/cloverleaf/src/cloverLeafKernels.hpp | 4 ++-- benchmarks/cloverleaf/src/cloverLeafMainTest.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmarks/cloverleaf/src/cloverLeafKernels.hpp b/benchmarks/cloverleaf/src/cloverLeafKernels.hpp index 2d8f9dd71ee..d88148b87a4 100644 --- a/benchmarks/cloverleaf/src/cloverLeafKernels.hpp +++ b/benchmarks/cloverleaf/src/cloverLeafKernels.hpp @@ -8,8 +8,8 @@ using Data = float; using Dim3 = alpaka::DimInt<3u>; using Idx = std::uint32_t; -const Idx nx = 16; // Number of cells in x direction -const Idx ny = 128; // Number of cells in y direction +const Idx nx = 512; // Number of cells in x direction +const Idx ny = 512; // Number of cells in y direction const Idx nz = 512; // Number of cells in z direction // Kernel to update the halo regions diff --git a/benchmarks/cloverleaf/src/cloverLeafMainTest.cpp b/benchmarks/cloverleaf/src/cloverLeafMainTest.cpp index c9ccff6d64e..cd02d51918b 100644 --- a/benchmarks/cloverleaf/src/cloverLeafMainTest.cpp +++ b/benchmarks/cloverleaf/src/cloverLeafMainTest.cpp @@ -114,7 +114,7 @@ void testKernels() alpaka::GridBlockExtentSubDivRestrictions::Unrestricted); // Simulation parameters float totalTime = 0.0f; // Total simulation time - float endTime = 1.0f; // End time of the simulation + float endTime = 0.50f; // End time of the simulation // Exec the initialization kernel alpaka::exec( From 64a9e970f0fbec5309693918c0469c3abff60907 Mon Sep 17 00:00:00 2001 From: mehmet yusufoglu Date: Thu, 1 Aug 2024 18:14:23 +0200 Subject: [PATCH 3/5] realistic start end times and while lopp --- .../cloverleaf/src/cloverLeafKernels.hpp | 146 +++++-------- .../cloverleaf/src/cloverLeafMainTest.cpp | 197 +++++++----------- 2 files changed, 136 insertions(+), 207 deletions(-) diff --git a/benchmarks/cloverleaf/src/cloverLeafKernels.hpp b/benchmarks/cloverleaf/src/cloverLeafKernels.hpp index d88148b87a4..59b8ee72f28 100644 --- a/benchmarks/cloverleaf/src/cloverLeafKernels.hpp +++ b/benchmarks/cloverleaf/src/cloverLeafKernels.hpp @@ -5,12 +5,11 @@ #include using Data = float; -using Dim3 = alpaka::DimInt<3u>; +using Dim2 = alpaka::DimInt<2u>; using Idx = std::uint32_t; -const Idx nx = 512; // Number of cells in x direction -const Idx ny = 512; // Number of cells in y direction -const Idx nz = 512; // Number of cells in z direction +const Idx nx = 960; // Number of cells in x direction +const Idx ny = 960; // Number of cells in y direction // Kernel to update the halo regions struct UpdateHaloKernel @@ -20,26 +19,20 @@ struct UpdateHaloKernel { auto const i = alpaka::getIdx(acc)[0]; auto const j = alpaka::getIdx(acc)[1]; - auto const k = alpaka::getIdx(acc)[2]; - if(i < nx && j < ny && k < nz) + if(i < nx && j < ny) { // Update halo cells for density (simplified example) // Assuming a single layer halo, and periodic boundary conditions if(i == 0) - density(i, j, k) = density(nx - 2, j, k); + density(i, j) = density(nx - 2, j); if(i == nx - 1) - density(i, j, k) = density(1, j, k); + density(i, j) = density(1, j); if(j == 0) - density(i, j, k) = density(i, ny - 2, k); + density(i, j) = density(i, ny - 2); if(j == ny - 1) - density(i, j, k) = density(i, 1, k); - - if(k == 0) - density(i, j, k) = density(i, j, nz - 2); - if(k == nz - 1) - density(i, j, k) = density(i, j, 1); + density(i, j) = density(i, 1); } } }; @@ -58,12 +51,11 @@ struct IdealGasKernel { auto const i = alpaka::getIdx(acc)[0]; auto const j = alpaka::getIdx(acc)[1]; - auto const k = alpaka::getIdx(acc)[2]; - if(i < nx && j < ny && k < nz) + if(i < nx && j < ny) { - pressure(i, j, k) = (gamma - 1.0f) * density(i, j, k) * energy(i, j, k); - soundspeed(i, j, k) = sqrt(gamma * pressure(i, j, k) / density(i, j, k)); + pressure(i, j) = (gamma - 1.0f) * density(i, j) * energy(i, j); + soundspeed(i, j) = sqrt(gamma * pressure(i, j) / density(i, j)); } } }; @@ -78,25 +70,28 @@ struct InitializerKernel MdSpan energy, MdSpan pressure, MdSpan velocityX, - MdSpan velocityY, - MdSpan velocityZ) const -> void + MdSpan velocityY) const -> void { auto const i = alpaka::getIdx(acc)[0]; auto const j = alpaka::getIdx(acc)[1]; - auto const k = alpaka::getIdx(acc)[2]; - if(i < nx && j < ny && k < nz) + if(i < nx && j < ny) { - density(i, j, k) = 1.0f; // Initial density - energy(i, j, k) = 2.5f; // Initial energy - pressure(i, j, k) = 1.0f; // Initial pressure - velocityX(i, j, k) = 0.0f; // Initial velocity - - if(i < nx && j < ny && k < nz) + if(i >= static_cast(0.0 * nx) && i < static_cast(5.0 * nx / 10.0) + && j >= static_cast(0.0 * ny) && j < static_cast(2.0 * ny / 10.0)) + { + density(i, j) = 1.0f; + energy(i, j) = 2.5f; + } + else { - // Simple advection calculation (this is a simplified example) - density(i, j, k) += (velocityX(i, j, k) + velocityY(i, j, k) + velocityZ(i, j, k)) * 0.01f; + density(i, j) = 0.2f; + energy(i, j) = 1.0f; } + + pressure(i, j) = 1.0f; + velocityX(i, j) = 0.0f; + velocityY(i, j) = 0.0f; } } }; @@ -112,22 +107,19 @@ struct EOSKernel MdSpan pressure, MdSpan velocityX, MdSpan velocityY, - MdSpan velocityZ, float gamma) const -> void { auto const i = alpaka::getIdx(acc)[0]; auto const j = alpaka::getIdx(acc)[1]; - auto const k = alpaka::getIdx(acc)[2]; - if(i < nx && j < ny && k < nz) + if(i < nx && j < ny) { // Compute pressure using ideal gas law: P = (gamma - 1) * density * energy - pressure(i, j, k) = (gamma - 1.0f) * density(i, j, k) * energy(i, j, k); + pressure(i, j) = (gamma - 1.0f) * density(i, j) * energy(i, j); // Additional calculations to update velocities (this is a simplified example) - velocityX(i, j, k) += pressure(i, j, k) * 0.1f; - velocityY(i, j, k) += pressure(i, j, k) * 0.1f; - velocityZ(i, j, k) += pressure(i, j, k) * 0.1f; + velocityX(i, j) += pressure(i, j) * 0.1f; + velocityY(i, j) += pressure(i, j) * 0.1f; } } }; @@ -143,25 +135,21 @@ struct FluxKernel MdSpan pressure, MdSpan velocityX, MdSpan velocityY, - MdSpan velocityZ, MdSpan fluxDensity, MdSpan fluxEnergy, MdSpan fluxVelocityX, - MdSpan fluxVelocityY, - MdSpan fluxVelocityZ) const -> void + MdSpan fluxVelocityY) const -> void { auto const i = alpaka::getIdx(acc)[0]; auto const j = alpaka::getIdx(acc)[1]; - auto const k = alpaka::getIdx(acc)[2]; - if(i < nx && j < ny && k < nz) + if(i < nx && j < ny) { // Compute fluxes (this is a simplified example) - fluxDensity(i, j, k) = density(i, j, k) * velocityX(i, j, k); - fluxEnergy(i, j, k) = energy(i, j, k) * velocityX(i, j, k); - fluxVelocityX(i, j, k) = velocityX(i, j, k) * velocityX(i, j, k) + pressure(i, j, k); - fluxVelocityY(i, j, k) = velocityY(i, j, k) * velocityX(i, j, k); - fluxVelocityZ(i, j, k) = velocityZ(i, j, k) * velocityX(i, j, k); + fluxDensity(i, j) = density(i, j) * velocityX(i, j); + fluxEnergy(i, j) = energy(i, j) * velocityX(i, j); + fluxVelocityX(i, j) = velocityX(i, j) * velocityX(i, j) + pressure(i, j); + fluxVelocityY(i, j) = velocityY(i, j) * velocityX(i, j); } } }; @@ -170,21 +158,15 @@ struct FluxKernel struct AdvectionKernel { template - ALPAKA_FN_ACC auto operator()( - TAcc const& acc, - MdSpan density, - MdSpan velocityX, - MdSpan velocityY, - MdSpan velocityZ) const -> void + ALPAKA_FN_ACC auto operator()(TAcc const& acc, MdSpan density, MdSpan velocityX, MdSpan velocityY) const -> void { auto const i = alpaka::getIdx(acc)[0]; auto const j = alpaka::getIdx(acc)[1]; - auto const k = alpaka::getIdx(acc)[2]; - if(i < nx && j < ny && k < nz) + if(i < nx && j < ny) { // Simple advection calculation (this is a simplified example) - density(i, j, k) += (velocityX(i, j, k) + velocityY(i, j, k) + velocityZ(i, j, k)) * 0.01f; + density(i, j) += (velocityX(i, j) + velocityY(i, j)) * 0.01f; } } }; @@ -199,25 +181,21 @@ struct LagrangianKernel MdSpan energy, MdSpan velocityX, MdSpan velocityY, - MdSpan velocityZ, MdSpan fluxDensity, MdSpan fluxEnergy, MdSpan fluxVelocityX, - MdSpan fluxVelocityY, - MdSpan fluxVelocityZ) const -> void + MdSpan fluxVelocityY) const -> void { auto const i = alpaka::getIdx(acc)[0]; auto const j = alpaka::getIdx(acc)[1]; - auto const k = alpaka::getIdx(acc)[2]; - if(i < nx && j < ny && k < nz) + if(i < nx && j < ny) { // Update the cell-centered variables based on flux calculations - density(i, j, k) -= fluxDensity(i, j, k) * 0.1f; - energy(i, j, k) -= fluxEnergy(i, j, k) * 0.1f; - velocityX(i, j, k) -= fluxVelocityX(i, j, k) * 0.1f; - velocityY(i, j, k) -= fluxVelocityY(i, j, k) * 0.1f; - velocityZ(i, j, k) -= fluxVelocityZ(i, j, k) * 0.1f; + density(i, j) -= fluxDensity(i, j) * 0.1f; + energy(i, j) -= fluxEnergy(i, j) * 0.1f; + velocityX(i, j) -= fluxVelocityX(i, j) * 0.1f; + velocityY(i, j) -= fluxVelocityY(i, j) * 0.1f; } } }; @@ -231,24 +209,21 @@ struct ViscosityKernel MdSpan density, MdSpan velocityX, MdSpan velocityY, - MdSpan velocityZ, MdSpan pressure, MdSpan viscosity) const -> void { auto const i = alpaka::getIdx(acc)[0]; auto const j = alpaka::getIdx(acc)[1]; - auto const k = alpaka::getIdx(acc)[2]; - if(i > 0 && i < nx - 1 && j > 0 && j < ny - 1 && k > 0 && k < nz - 1) + if(i > 0 && i < nx - 1 && j > 0 && j < ny - 1) { - float gradVx = (velocityX(i + 1, j, k) - velocityX(i - 1, j, k)) * 0.5f; - float gradVy = (velocityY(i, j + 1, k) - velocityY(i, j - 1, k)) * 0.5f; - float gradVz = (velocityZ(i, j, k + 1) - velocityZ(i, j, k - 1)) * 0.5f; + float gradVx = (velocityX(i + 1, j) - velocityX(i - 1, j)) * 0.5f; + float gradVy = (velocityY(i, j + 1) - velocityY(i, j - 1)) * 0.5f; - viscosity(i, j, k) = density(i, j, k) * (gradVx * gradVx + gradVy * gradVy + gradVz * gradVz) * 0.01f; + viscosity(i, j) = density(i, j) * (gradVx * gradVx + gradVy * gradVy) * 0.01f; // Apply viscosity to pressure - pressure(i, j, k) += viscosity(i, j, k); + pressure(i, j) += viscosity(i, j); } } }; @@ -257,23 +232,16 @@ struct ViscosityKernel struct MaxVelocityKernel { template - ALPAKA_FN_ACC auto operator()( - TAcc const& acc, - MdSpan velocityX, - MdSpan velocityY, - MdSpan velocityZ, - Data* maxVelocity) const -> void + ALPAKA_FN_ACC auto operator()(TAcc const& acc, MdSpan velocityX, MdSpan velocityY, Data* maxVelocity) const -> void { auto const i = alpaka::getIdx(acc)[0]; auto const j = alpaka::getIdx(acc)[1]; - auto const k = alpaka::getIdx(acc)[2]; - if(i < nx && j < ny && k < nz) + if(i < nx && j < ny) { - float vx = velocityX(i, j, k); - float vy = velocityY(i, j, k); - float vz = velocityZ(i, j, k); - float v = alpaka::math::sqrt(acc, (vx * vx + vy * vy + vz * vz)); + float vx = velocityX(i, j); + float vy = velocityY(i, j); + float v = alpaka::math::sqrt(acc, (vx * vx + vy * vy)); // Atomic operation to find the maximum velocity float val = alpaka::atomicMax(acc, maxVelocity, v); @@ -282,10 +250,10 @@ struct MaxVelocityKernel } }; -[[maybe_unused]] static float calculateTimeStep(float dx, float dy, float dz, float maxVelocity, float cMax) +[[maybe_unused]] static float calculateTimeStep(float dx, float dy, float maxVelocity, float cMax) { // Compute the smallest grid spacing - float minDx = std::min({dx, dy, dz}); + float minDx = std::min({dx, dy}); // Calculate the time step based on the CFL condition float dt = cMax * minDx / maxVelocity; diff --git a/benchmarks/cloverleaf/src/cloverLeafMainTest.cpp b/benchmarks/cloverleaf/src/cloverLeafMainTest.cpp index cd02d51918b..21edc98fa63 100644 --- a/benchmarks/cloverleaf/src/cloverLeafMainTest.cpp +++ b/benchmarks/cloverleaf/src/cloverLeafMainTest.cpp @@ -33,26 +33,24 @@ void testKernels() Queue queue(devAcc); - // Define the 3D extent (dimensions) - alpaka::Vec const extent3D(static_cast(nx), static_cast(ny), static_cast(nz)); + // Define the 2D extent (dimensions) + alpaka::Vec const extent2D(static_cast(nx), static_cast(ny)); // Allocate host memory - auto hDensity = alpaka::allocBuf(devHost, extent3D); - auto hEnergy = alpaka::allocBuf(devHost, extent3D); - auto hPressure = alpaka::allocBuf(devHost, extent3D); - auto hVelocityX = alpaka::allocBuf(devHost, extent3D); - auto hVelocityY = alpaka::allocBuf(devHost, extent3D); - auto hVelocityZ = alpaka::allocBuf(devHost, extent3D); - auto hSoundspeed = alpaka::allocBuf(devHost, extent3D); // Additional buffer for soundspeed + auto hDensity = alpaka::allocBuf(devHost, extent2D); + auto hEnergy = alpaka::allocBuf(devHost, extent2D); + auto hPressure = alpaka::allocBuf(devHost, extent2D); + auto hVelocityX = alpaka::allocBuf(devHost, extent2D); + auto hVelocityY = alpaka::allocBuf(devHost, extent2D); + auto hSoundspeed = alpaka::allocBuf(devHost, extent2D); // Additional buffer for soundspeed // Allocate device memory - auto dDensity = alpaka::allocBuf(devAcc, extent3D); - auto dEnergy = alpaka::allocBuf(devAcc, extent3D); - auto dPressure = alpaka::allocBuf(devAcc, extent3D); - auto dVelocityX = alpaka::allocBuf(devAcc, extent3D); - auto dVelocityY = alpaka::allocBuf(devAcc, extent3D); - auto dVelocityZ = alpaka::allocBuf(devAcc, extent3D); - auto dSoundspeed = alpaka::allocBuf(devAcc, extent3D); // Additional buffer for soundspeed + auto dDensity = alpaka::allocBuf(devAcc, extent2D); + auto dEnergy = alpaka::allocBuf(devAcc, extent2D); + auto dPressure = alpaka::allocBuf(devAcc, extent2D); + auto dVelocityX = alpaka::allocBuf(devAcc, extent2D); + auto dVelocityY = alpaka::allocBuf(devAcc, extent2D); + auto dSoundspeed = alpaka::allocBuf(devAcc, extent2D); // Additional buffer for soundspeed // Convert directly to mdspan auto mdDensity = alpaka::experimental::getMdSpan(dDensity); @@ -60,84 +58,63 @@ void testKernels() auto mdPressure = alpaka::experimental::getMdSpan(dPressure); auto mdVelocityX = alpaka::experimental::getMdSpan(dVelocityX); auto mdVelocityY = alpaka::experimental::getMdSpan(dVelocityY); - auto mdVelocityZ = alpaka::experimental::getMdSpan(dVelocityZ); auto mdSoundspeed = alpaka::experimental::getMdSpan(dSoundspeed); // Additional mdspan for soundspeed - auto hFluxDensity = alpaka::allocBuf(devHost, extent3D); - auto hFluxEnergy = alpaka::allocBuf(devHost, extent3D); - auto hFluxVelocityX = alpaka::allocBuf(devHost, extent3D); - auto hFluxVelocityY = alpaka::allocBuf(devHost, extent3D); - auto hFluxVelocityZ = alpaka::allocBuf(devHost, extent3D); + auto hFluxDensity = alpaka::allocBuf(devHost, extent2D); + auto hFluxEnergy = alpaka::allocBuf(devHost, extent2D); + auto hFluxVelocityX = alpaka::allocBuf(devHost, extent2D); + auto hFluxVelocityY = alpaka::allocBuf(devHost, extent2D); - auto dFluxDensity = alpaka::allocBuf(devAcc, extent3D); - auto dFluxEnergy = alpaka::allocBuf(devAcc, extent3D); - auto dFluxVelocityX = alpaka::allocBuf(devAcc, extent3D); - auto dFluxVelocityY = alpaka::allocBuf(devAcc, extent3D); - auto dFluxVelocityZ = alpaka::allocBuf(devAcc, extent3D); + auto dFluxDensity = alpaka::allocBuf(devAcc, extent2D); + auto dFluxEnergy = alpaka::allocBuf(devAcc, extent2D); + auto dFluxVelocityX = alpaka::allocBuf(devAcc, extent2D); + auto dFluxVelocityY = alpaka::allocBuf(devAcc, extent2D); auto mdFluxDensity = alpaka::experimental::getMdSpan(dFluxDensity); auto mdFluxEnergy = alpaka::experimental::getMdSpan(dFluxEnergy); auto mdFluxVelocityX = alpaka::experimental::getMdSpan(dFluxVelocityX); auto mdFluxVelocityY = alpaka::experimental::getMdSpan(dFluxVelocityY); - auto mdFluxVelocityZ = alpaka::experimental::getMdSpan(dFluxVelocityZ); // Allocate host memory for viscosity - auto hViscosity = alpaka::allocBuf(devHost, extent3D); + auto hViscosity = alpaka::allocBuf(devHost, extent2D); // Allocate device memory for viscosity - auto dViscosity = alpaka::allocBuf(devAcc, extent3D); + auto dViscosity = alpaka::allocBuf(devAcc, extent2D); // Create mdspan view for viscosity using alpaka::experimental::getMdSpan auto mdViscosity = alpaka::experimental::getMdSpan(dViscosity); // Define work divisions - const alpaka::Vec size{nx, ny, nz}; - - // Define work divisions + const alpaka::Vec size{nx, ny}; - - auto const bundeledInitKernel = alpaka::KernelBundle( - InitializerKernel{}, - mdDensity, - mdEnergy, - mdPressure, - mdVelocityX, - mdVelocityY, - mdVelocityZ); + auto const bundledInitKernel + = alpaka::KernelBundle(InitializerKernel{}, mdDensity, mdEnergy, mdPressure, mdVelocityX, mdVelocityY); auto workDiv = alpaka::getValidWorkDivForKernel( devAcc, - bundeledInitKernel, + bundledInitKernel, size, - alpaka::Vec::ones(), + alpaka::Vec::ones(), false, alpaka::GridBlockExtentSubDivRestrictions::Unrestricted); + // Simulation parameters float totalTime = 0.0f; // Total simulation time - float endTime = 0.50f; // End time of the simulation + float endTime = 16.0f; // End time of the simulation // Exec the initialization kernel - alpaka::exec( - queue, - workDiv, - InitializerKernel{}, - mdDensity, - mdEnergy, - mdPressure, - mdVelocityX, - mdVelocityY, - mdVelocityZ); - // std::cout << "InitializerKernel finished" << std::endl; + alpaka::exec(queue, workDiv, InitializerKernel{}, mdDensity, mdEnergy, mdPressure, mdVelocityX, mdVelocityY); + + std::cout << "InitializerKernel finished" << std::endl; + // Wait for the kernel to finish alpaka::wait(queue); // Variables to calculate the dt at each step - float dx = 0.01f; - float dy = 0.01f; - float dz = 0.01f; + float dx = 10.0f / nx; + float dy = 10.0f / ny; float cMax = 0.5f; - float dt = 0.01f; - + float dt = 0.04f; alpaka::Vec, Idx> const extent1D = alpaka::Vec, Idx>::all(1u); auto resultGpuHost = alpaka::allocBuf(devHost, extent1D); @@ -146,6 +123,9 @@ void testKernels() alpaka::memcpy(queue, outputDeviceMemory, resultGpuHost); std::cout << "Workdiv:" << workDiv << std::endl; std::cout << "Running cloverleaf loop." << std::endl; + + int iteration = 0; + while(totalTime < endTime) { // Calculate the time step (dt), max velocity is needed. @@ -156,25 +136,25 @@ void testKernels() MaxVelocityKernel{}, mdVelocityX, mdVelocityY, - mdVelocityZ, std::data(outputDeviceMemory)); // Copy result from device memory to host alpaka::memcpy(queue, resultGpuHost, outputDeviceMemory, extent1D); - // std::cout << "MaxVelocityKernel finished" << std::endl; + // std::cout << "MaxVelocityKernel finished" << std::endl; alpaka::wait(queue); // Now use maxVelocity to compute the time step - dt = calculateTimeStep(dx, dy, dz, resultGpuHost[0], cMax); + dt = calculateTimeStep(dx, dy, resultGpuHost[0], cMax); // Exec the halo update kernel alpaka::exec(queue, workDiv, UpdateHaloKernel{}, mdDensity); + // std::cout << "Halo kernel finished" << std::endl; + // Wait for the halo update kernel to finish alpaka::wait(queue); - // std::cout << "Halo kernel finished" << std::endl; // Exec the ideal gas kernel alpaka::exec( @@ -187,26 +167,17 @@ void testKernels() mdSoundspeed, 1.4f); // gamma - // std::cout << "IdealGasKernel finished" << std::endl; + // std::cout << "IdealGasKernel finished" << std::endl; // Wait for the ideal gas kernel to finish alpaka::wait(queue); // Exec the EOS kernel float gamma = 1.4f; // Specific heat ratio - alpaka::exec( - queue, - workDiv, - EOSKernel{}, - mdDensity, - mdEnergy, - mdPressure, - mdVelocityX, - mdVelocityY, - mdVelocityZ, - gamma); + alpaka::exec< + Acc>(queue, workDiv, EOSKernel{}, mdDensity, mdEnergy, mdPressure, mdVelocityX, mdVelocityY, gamma); - // std::cout << "EOSKernel finished" << std::endl; + // std::cout << "EOSKernel finished" << std::endl; // Wait for the EOS kernel to finish alpaka::wait(queue); @@ -221,22 +192,20 @@ void testKernels() mdPressure, mdVelocityX, mdVelocityY, - mdVelocityZ, mdFluxDensity, mdFluxEnergy, mdFluxVelocityX, - mdFluxVelocityY, - mdFluxVelocityZ); + mdFluxVelocityY); - // std::cout << "FluxKernel finished" << std::endl; + // std::cout << "FluxKernel finished" << std::endl; // Wait for the Flux kernel to finish alpaka::wait(queue); // Exec the advection kernel (fourth step) - alpaka::exec(queue, workDiv, AdvectionKernel{}, mdDensity, mdVelocityX, mdVelocityY, mdVelocityZ); + alpaka::exec(queue, workDiv, AdvectionKernel{}, mdDensity, mdVelocityX, mdVelocityY); - // std::cout << "AdvectionKernel finished" << std::endl; + // std::cout << "AdvectionKernel finished" << std::endl; // Wait for the Advection kernel to finish alpaka::wait(queue); @@ -247,14 +216,12 @@ void testKernels() alpaka::memcpy(queue, hPressure, dPressure); alpaka::memcpy(queue, hVelocityX, dVelocityX); alpaka::memcpy(queue, hVelocityY, dVelocityY); - alpaka::memcpy(queue, hVelocityZ, dVelocityZ); // Flux densities alpaka::memcpy(queue, hFluxDensity, dFluxDensity); alpaka::memcpy(queue, hFluxEnergy, dFluxEnergy); alpaka::memcpy(queue, hFluxVelocityX, dFluxVelocityX); alpaka::memcpy(queue, hFluxVelocityY, dFluxVelocityY); - alpaka::memcpy(queue, hFluxVelocityZ, dFluxVelocityZ); // Wait for data transfer to complete alpaka::wait(queue); @@ -268,54 +235,53 @@ void testKernels() mdEnergy, mdVelocityX, mdVelocityY, - mdVelocityZ, mdFluxDensity, mdFluxEnergy, mdFluxVelocityX, - mdFluxVelocityY, - mdFluxVelocityZ); + mdFluxVelocityY); - // std::cout << "LagrangianKernel finished" << std::endl; + // std::cout << "LagrangianKernel finished" << std::endl; // Wait for the Lagrangian kernel to finish alpaka::wait(queue); // Exec the Viscosity kernel - alpaka::exec( - queue, - workDiv, - ViscosityKernel{}, - mdDensity, - mdVelocityX, - mdVelocityY, - mdVelocityZ, - mdPressure, - mdViscosity); + alpaka::exec< + Acc>(queue, workDiv, ViscosityKernel{}, mdDensity, mdVelocityX, mdVelocityY, mdPressure, mdViscosity); - // std::cout << "ViscosityKernel finished" << std::endl; + // std::cout << "ViscosityKernel finished" << std::endl; // Wait for the Viscosity kernel to finish alpaka::wait(queue); // Update the simulation time totalTime += dt; + + if(iteration % 2000 == 0) + { + std::cout << "Current Time: " << totalTime << ", End Time: " << endTime << ", Step Size: " << dt + << ", iteration: " << iteration << std::endl; + } + + iteration++; } + std::cout << "Copying results back." << std::endl; + // Copy final data back to host for verification alpaka::memcpy(queue, hDensity, dDensity); alpaka::memcpy(queue, hEnergy, dEnergy); alpaka::memcpy(queue, hPressure, dPressure); alpaka::memcpy(queue, hVelocityX, dVelocityX); alpaka::memcpy(queue, hVelocityY, dVelocityY); - alpaka::memcpy(queue, hVelocityZ, dVelocityZ); alpaka::memcpy(queue, hFluxDensity, dFluxDensity); alpaka::memcpy(queue, hFluxEnergy, dFluxEnergy); alpaka::memcpy(queue, hFluxVelocityX, dFluxVelocityX); alpaka::memcpy(queue, hFluxVelocityY, dFluxVelocityY); - alpaka::memcpy(queue, hFluxVelocityZ, dFluxVelocityZ); alpaka::memcpy(queue, hViscosity, dViscosity); - std::cout << "Data copy to host finished" << std::endl; + std::cout << "Data copy to host finished" << std::endl; + // Wait for all data transfers to complete alpaka::wait(queue); @@ -325,25 +291,20 @@ void testKernels() auto mdHostPressure = alpaka::experimental::getMdSpan(hPressure); auto mdHostVelocityX = alpaka::experimental::getMdSpan(hVelocityX); auto mdHostVelocityY = alpaka::experimental::getMdSpan(hVelocityY); - auto mdHostVelocityZ = alpaka::experimental::getMdSpan(hVelocityZ); bool success = true; for(Idx i = 0; i < nx; ++i) { for(Idx j = 0; j < ny; ++j) { - for(Idx k = 0; k < nz; ++k) + if(std::isnan(mdHostDensity(i, j)) || std::isinf(mdHostDensity(i, j)) || std::isnan(mdHostEnergy(i, j)) + || std::isinf(mdHostEnergy(i, j)) || std::isnan(mdHostPressure(i, j)) + || std::isinf(mdHostPressure(i, j)) || std::isnan(mdHostVelocityX(i, j)) + || std::isinf(mdHostVelocityX(i, j)) || std::isnan(mdHostVelocityY(i, j)) + || std::isinf(mdHostVelocityY(i, j))) { - if(std::isnan(mdHostDensity(i, j, k)) || std::isinf(mdHostDensity(i, j, k)) - || std::isnan(mdHostEnergy(i, j, k)) || std::isinf(mdHostEnergy(i, j, k)) - || std::isnan(mdHostPressure(i, j, k)) || std::isinf(mdHostPressure(i, j, k)) - || std::isnan(mdHostVelocityX(i, j, k)) || std::isinf(mdHostVelocityX(i, j, k)) - || std::isnan(mdHostVelocityY(i, j, k)) || std::isinf(mdHostVelocityY(i, j, k)) - || std::isnan(mdHostVelocityZ(i, j, k)) || std::isinf(mdHostVelocityZ(i, j, k))) - { - success = false; - break; - } + success = false; + break; } } } @@ -358,9 +319,9 @@ void testKernels() } } -using TestAccs3D = alpaka::test::EnabledAccs, std::uint32_t>; +using TestAccs2D = alpaka::test::EnabledAccs, std::uint32_t>; -TEMPLATE_LIST_TEST_CASE("TEST: Cloverleaf", "[benchmark-test]", TestAccs3D) +TEMPLATE_LIST_TEST_CASE("TEST: Cloverleaf", "[benchmark-test]", TestAccs2D) { using Acc = TestType; if constexpr(alpaka::accMatchesTags) From bce203a40634289c49057c176e80c8d3174e1c7e Mon Sep 17 00:00:00 2001 From: mehmet yusufoglu Date: Thu, 1 Aug 2024 18:56:48 +0200 Subject: [PATCH 4/5] 3D version with z=1 illegal mem adress --- .../cloverleaf/src/cloverLeafKernels.hpp | 145 ++++++----- .../cloverleaf/src/cloverLeafMainTest.cpp | 228 ++++++++++-------- 2 files changed, 217 insertions(+), 156 deletions(-) diff --git a/benchmarks/cloverleaf/src/cloverLeafKernels.hpp b/benchmarks/cloverleaf/src/cloverLeafKernels.hpp index 59b8ee72f28..2dd934c2c36 100644 --- a/benchmarks/cloverleaf/src/cloverLeafKernels.hpp +++ b/benchmarks/cloverleaf/src/cloverLeafKernels.hpp @@ -5,11 +5,12 @@ #include using Data = float; -using Dim2 = alpaka::DimInt<2u>; +using Dim3 = alpaka::DimInt<3u>; using Idx = std::uint32_t; -const Idx nx = 960; // Number of cells in x direction +const Idx nx = 1920; // Number of cells in x direction const Idx ny = 960; // Number of cells in y direction +const Idx nz = 1; // Number of cells in z direction // Kernel to update the halo regions struct UpdateHaloKernel @@ -19,20 +20,26 @@ struct UpdateHaloKernel { auto const i = alpaka::getIdx(acc)[0]; auto const j = alpaka::getIdx(acc)[1]; + auto const k = alpaka::getIdx(acc)[2]; - if(i < nx && j < ny) + if(i < nx && j < ny && k < nz) { // Update halo cells for density (simplified example) // Assuming a single layer halo, and periodic boundary conditions if(i == 0) - density(i, j) = density(nx - 2, j); + density(i, j, k) = density(nx - 2, j, k); if(i == nx - 1) - density(i, j) = density(1, j); + density(i, j, k) = density(1, j, k); if(j == 0) - density(i, j) = density(i, ny - 2); + density(i, j, k) = density(i, ny - 2, k); if(j == ny - 1) - density(i, j) = density(i, 1); + density(i, j, k) = density(i, 1, k); + + if(k == 0) + density(i, j, k) = density(i, j, nz - 2); + if(k == nz - 1) + density(i, j, k) = density(i, j, 1); } } }; @@ -51,11 +58,12 @@ struct IdealGasKernel { auto const i = alpaka::getIdx(acc)[0]; auto const j = alpaka::getIdx(acc)[1]; + auto const k = alpaka::getIdx(acc)[2]; - if(i < nx && j < ny) + if(i < nx && j < ny && k < nz) { - pressure(i, j) = (gamma - 1.0f) * density(i, j) * energy(i, j); - soundspeed(i, j) = sqrt(gamma * pressure(i, j) / density(i, j)); + pressure(i, j, k) = (gamma - 1.0f) * density(i, j, k) * energy(i, j, k); + soundspeed(i, j, k) = sqrt(gamma * pressure(i, j, k) / density(i, j, k)); } } }; @@ -70,28 +78,28 @@ struct InitializerKernel MdSpan energy, MdSpan pressure, MdSpan velocityX, - MdSpan velocityY) const -> void + MdSpan velocityY, + MdSpan velocityZ) const -> void { auto const i = alpaka::getIdx(acc)[0]; auto const j = alpaka::getIdx(acc)[1]; + auto const k = alpaka::getIdx(acc)[2]; - if(i < nx && j < ny) + if(i < nx && j < ny && k < nz) { - if(i >= static_cast(0.0 * nx) && i < static_cast(5.0 * nx / 10.0) - && j >= static_cast(0.0 * ny) && j < static_cast(2.0 * ny / 10.0)) - { - density(i, j) = 1.0f; - energy(i, j) = 2.5f; - } - else + density(i, j, k) = 0.2f; // State 1 density + energy(i, j, k) = 1.0f; // State 1 energy + + if(i > 0 && i < 5.0f * nx / 10.0f && j > 0 && j < 2.0f * ny / 10.0f) { - density(i, j) = 0.2f; - energy(i, j) = 1.0f; + density(i, j, k) = 1.0f; // State 2 density + energy(i, j, k) = 2.5f; // State 2 energy } - pressure(i, j) = 1.0f; - velocityX(i, j) = 0.0f; - velocityY(i, j) = 0.0f; + pressure(i, j, k) = 1.0f; // Initial pressure + velocityX(i, j, k) = 0.0f; // Initial velocity in x direction + velocityY(i, j, k) = 0.0f; // Initial velocity in y direction + velocityZ(i, j, k) = 0.0f; // Initial velocity in z direction } } }; @@ -107,19 +115,22 @@ struct EOSKernel MdSpan pressure, MdSpan velocityX, MdSpan velocityY, + MdSpan velocityZ, float gamma) const -> void { auto const i = alpaka::getIdx(acc)[0]; auto const j = alpaka::getIdx(acc)[1]; + auto const k = alpaka::getIdx(acc)[2]; - if(i < nx && j < ny) + if(i < nx && j < ny && k < nz) { // Compute pressure using ideal gas law: P = (gamma - 1) * density * energy - pressure(i, j) = (gamma - 1.0f) * density(i, j) * energy(i, j); + pressure(i, j, k) = (gamma - 1.0f) * density(i, j, k) * energy(i, j, k); // Additional calculations to update velocities (this is a simplified example) - velocityX(i, j) += pressure(i, j) * 0.1f; - velocityY(i, j) += pressure(i, j) * 0.1f; + velocityX(i, j, k) += pressure(i, j, k) * 0.1f; + velocityY(i, j, k) += pressure(i, j, k) * 0.1f; + velocityZ(i, j, k) += pressure(i, j, k) * 0.1f; } } }; @@ -135,21 +146,25 @@ struct FluxKernel MdSpan pressure, MdSpan velocityX, MdSpan velocityY, + MdSpan velocityZ, MdSpan fluxDensity, MdSpan fluxEnergy, MdSpan fluxVelocityX, - MdSpan fluxVelocityY) const -> void + MdSpan fluxVelocityY, + MdSpan fluxVelocityZ) const -> void { auto const i = alpaka::getIdx(acc)[0]; auto const j = alpaka::getIdx(acc)[1]; + auto const k = alpaka::getIdx(acc)[2]; - if(i < nx && j < ny) + if(i < nx && j < ny && k < nz) { // Compute fluxes (this is a simplified example) - fluxDensity(i, j) = density(i, j) * velocityX(i, j); - fluxEnergy(i, j) = energy(i, j) * velocityX(i, j); - fluxVelocityX(i, j) = velocityX(i, j) * velocityX(i, j) + pressure(i, j); - fluxVelocityY(i, j) = velocityY(i, j) * velocityX(i, j); + fluxDensity(i, j, k) = density(i, j, k) * velocityX(i, j, k); + fluxEnergy(i, j, k) = energy(i, j, k) * velocityX(i, j, k); + fluxVelocityX(i, j, k) = velocityX(i, j, k) * velocityX(i, j, k) + pressure(i, j, k); + fluxVelocityY(i, j, k) = velocityY(i, j, k) * velocityX(i, j, k); + fluxVelocityZ(i, j, k) = velocityZ(i, j, k) * velocityX(i, j, k); } } }; @@ -158,15 +173,21 @@ struct FluxKernel struct AdvectionKernel { template - ALPAKA_FN_ACC auto operator()(TAcc const& acc, MdSpan density, MdSpan velocityX, MdSpan velocityY) const -> void + ALPAKA_FN_ACC auto operator()( + TAcc const& acc, + MdSpan density, + MdSpan velocityX, + MdSpan velocityY, + MdSpan velocityZ) const -> void { auto const i = alpaka::getIdx(acc)[0]; auto const j = alpaka::getIdx(acc)[1]; + auto const k = alpaka::getIdx(acc)[2]; - if(i < nx && j < ny) + if(i < nx && j < ny && k < nz) { // Simple advection calculation (this is a simplified example) - density(i, j) += (velocityX(i, j) + velocityY(i, j)) * 0.01f; + density(i, j, k) += (velocityX(i, j, k) + velocityY(i, j, k) + velocityZ(i, j, k)) * 0.01f; } } }; @@ -181,21 +202,25 @@ struct LagrangianKernel MdSpan energy, MdSpan velocityX, MdSpan velocityY, + MdSpan velocityZ, MdSpan fluxDensity, MdSpan fluxEnergy, MdSpan fluxVelocityX, - MdSpan fluxVelocityY) const -> void + MdSpan fluxVelocityY, + MdSpan fluxVelocityZ) const -> void { auto const i = alpaka::getIdx(acc)[0]; auto const j = alpaka::getIdx(acc)[1]; + auto const k = alpaka::getIdx(acc)[2]; - if(i < nx && j < ny) + if(i < nx && j < ny && k < nz) { // Update the cell-centered variables based on flux calculations - density(i, j) -= fluxDensity(i, j) * 0.1f; - energy(i, j) -= fluxEnergy(i, j) * 0.1f; - velocityX(i, j) -= fluxVelocityX(i, j) * 0.1f; - velocityY(i, j) -= fluxVelocityY(i, j) * 0.1f; + density(i, j, k) -= fluxDensity(i, j, k) * 0.1f; + energy(i, j, k) -= fluxEnergy(i, j, k) * 0.1f; + velocityX(i, j, k) -= fluxVelocityX(i, j, k) * 0.1f; + velocityY(i, j, k) -= fluxVelocityY(i, j, k) * 0.1f; + velocityZ(i, j, k) -= fluxVelocityZ(i, j, k) * 0.1f; } } }; @@ -209,21 +234,24 @@ struct ViscosityKernel MdSpan density, MdSpan velocityX, MdSpan velocityY, + MdSpan velocityZ, MdSpan pressure, MdSpan viscosity) const -> void { auto const i = alpaka::getIdx(acc)[0]; auto const j = alpaka::getIdx(acc)[1]; + auto const k = alpaka::getIdx(acc)[2]; - if(i > 0 && i < nx - 1 && j > 0 && j < ny - 1) + if(i > 0 && i < nx - 1 && j > 0 && j < ny - 1 && k > 0 && k < nz - 1) { - float gradVx = (velocityX(i + 1, j) - velocityX(i - 1, j)) * 0.5f; - float gradVy = (velocityY(i, j + 1) - velocityY(i, j - 1)) * 0.5f; + float gradVx = (velocityX(i + 1, j, k) - velocityX(i - 1, j, k)) * 0.5f; + float gradVy = (velocityY(i, j + 1, k) - velocityY(i, j - 1, k)) * 0.5f; + float gradVz = (velocityZ(i, j, k + 1) - velocityZ(i, j, k - 1)) * 0.5f; - viscosity(i, j) = density(i, j) * (gradVx * gradVx + gradVy * gradVy) * 0.01f; + viscosity(i, j, k) = density(i, j, k) * (gradVx * gradVx + gradVy * gradVy + gradVz * gradVz) * 0.01f; // Apply viscosity to pressure - pressure(i, j) += viscosity(i, j); + pressure(i, j, k) += viscosity(i, j, k); } } }; @@ -232,16 +260,23 @@ struct ViscosityKernel struct MaxVelocityKernel { template - ALPAKA_FN_ACC auto operator()(TAcc const& acc, MdSpan velocityX, MdSpan velocityY, Data* maxVelocity) const -> void + ALPAKA_FN_ACC auto operator()( + TAcc const& acc, + MdSpan velocityX, + MdSpan velocityY, + MdSpan velocityZ, + Data* maxVelocity) const -> void { auto const i = alpaka::getIdx(acc)[0]; auto const j = alpaka::getIdx(acc)[1]; + auto const k = alpaka::getIdx(acc)[2]; - if(i < nx && j < ny) + if(i < nx && j < ny && k < nz) { - float vx = velocityX(i, j); - float vy = velocityY(i, j); - float v = alpaka::math::sqrt(acc, (vx * vx + vy * vy)); + float vx = velocityX(i, j, k); + float vy = velocityY(i, j, k); + float vz = velocityZ(i, j, k); + float v = alpaka::math::sqrt(acc, (vx * vx + vy * vy + vz * vz)); // Atomic operation to find the maximum velocity float val = alpaka::atomicMax(acc, maxVelocity, v); @@ -250,10 +285,10 @@ struct MaxVelocityKernel } }; -[[maybe_unused]] static float calculateTimeStep(float dx, float dy, float maxVelocity, float cMax) +[[maybe_unused]] static float calculateTimeStep(float dx, float dy, float dz, float maxVelocity, float cMax) { // Compute the smallest grid spacing - float minDx = std::min({dx, dy}); + float minDx = std::min({dx, dy, dz}); // Calculate the time step based on the CFL condition float dt = cMax * minDx / maxVelocity; diff --git a/benchmarks/cloverleaf/src/cloverLeafMainTest.cpp b/benchmarks/cloverleaf/src/cloverLeafMainTest.cpp index 21edc98fa63..0bddf81207c 100644 --- a/benchmarks/cloverleaf/src/cloverLeafMainTest.cpp +++ b/benchmarks/cloverleaf/src/cloverLeafMainTest.cpp @@ -15,6 +15,28 @@ * Cloverleaf benchmark */ +float const density1 = 0.2f; +float const energy1 = 1.0f; +float const density2 = 1.0f; +float const energy2 = 2.5f; + +float const initialTimestep = 0.04f; +float const timestepRise = 1.5f; +float const maxTimestep = 0.04f; +float const endTime = 15.5f; +int const endStep = 2955; + +float const xmin = 0.0f; +float const ymin = 0.0f; +float const xmax = 10.0f; +float const ymax = 10.0f; + + +float const dx = (xmax - xmin) / nx; +float const dy = (ymax - ymin) / ny; +float const dz = 0.01f; +float const cMax = 0.5f; + //! \brief The Function for testing cloverleaf kernels for given Acc type and data type. //! \tparam Acc the accelerator type //! \tparam DataType The data type to differentiate single or double data type based tests. @@ -33,24 +55,26 @@ void testKernels() Queue queue(devAcc); - // Define the 2D extent (dimensions) - alpaka::Vec const extent2D(static_cast(nx), static_cast(ny)); + // Define the 3D extent (dimensions) + alpaka::Vec const extent3D(static_cast(nx), static_cast(ny), static_cast(nz)); // Allocate host memory - auto hDensity = alpaka::allocBuf(devHost, extent2D); - auto hEnergy = alpaka::allocBuf(devHost, extent2D); - auto hPressure = alpaka::allocBuf(devHost, extent2D); - auto hVelocityX = alpaka::allocBuf(devHost, extent2D); - auto hVelocityY = alpaka::allocBuf(devHost, extent2D); - auto hSoundspeed = alpaka::allocBuf(devHost, extent2D); // Additional buffer for soundspeed + auto hDensity = alpaka::allocBuf(devHost, extent3D); + auto hEnergy = alpaka::allocBuf(devHost, extent3D); + auto hPressure = alpaka::allocBuf(devHost, extent3D); + auto hVelocityX = alpaka::allocBuf(devHost, extent3D); + auto hVelocityY = alpaka::allocBuf(devHost, extent3D); + auto hVelocityZ = alpaka::allocBuf(devHost, extent3D); + auto hSoundspeed = alpaka::allocBuf(devHost, extent3D); // Additional buffer for soundspeed // Allocate device memory - auto dDensity = alpaka::allocBuf(devAcc, extent2D); - auto dEnergy = alpaka::allocBuf(devAcc, extent2D); - auto dPressure = alpaka::allocBuf(devAcc, extent2D); - auto dVelocityX = alpaka::allocBuf(devAcc, extent2D); - auto dVelocityY = alpaka::allocBuf(devAcc, extent2D); - auto dSoundspeed = alpaka::allocBuf(devAcc, extent2D); // Additional buffer for soundspeed + auto dDensity = alpaka::allocBuf(devAcc, extent3D); + auto dEnergy = alpaka::allocBuf(devAcc, extent3D); + auto dPressure = alpaka::allocBuf(devAcc, extent3D); + auto dVelocityX = alpaka::allocBuf(devAcc, extent3D); + auto dVelocityY = alpaka::allocBuf(devAcc, extent3D); + auto dVelocityZ = alpaka::allocBuf(devAcc, extent3D); + auto dSoundspeed = alpaka::allocBuf(devAcc, extent3D); // Additional buffer for soundspeed // Convert directly to mdspan auto mdDensity = alpaka::experimental::getMdSpan(dDensity); @@ -58,75 +82,85 @@ void testKernels() auto mdPressure = alpaka::experimental::getMdSpan(dPressure); auto mdVelocityX = alpaka::experimental::getMdSpan(dVelocityX); auto mdVelocityY = alpaka::experimental::getMdSpan(dVelocityY); + auto mdVelocityZ = alpaka::experimental::getMdSpan(dVelocityZ); auto mdSoundspeed = alpaka::experimental::getMdSpan(dSoundspeed); // Additional mdspan for soundspeed - auto hFluxDensity = alpaka::allocBuf(devHost, extent2D); - auto hFluxEnergy = alpaka::allocBuf(devHost, extent2D); - auto hFluxVelocityX = alpaka::allocBuf(devHost, extent2D); - auto hFluxVelocityY = alpaka::allocBuf(devHost, extent2D); + auto hFluxDensity = alpaka::allocBuf(devHost, extent3D); + auto hFluxEnergy = alpaka::allocBuf(devHost, extent3D); + auto hFluxVelocityX = alpaka::allocBuf(devHost, extent3D); + auto hFluxVelocityY = alpaka::allocBuf(devHost, extent3D); + auto hFluxVelocityZ = alpaka::allocBuf(devHost, extent3D); - auto dFluxDensity = alpaka::allocBuf(devAcc, extent2D); - auto dFluxEnergy = alpaka::allocBuf(devAcc, extent2D); - auto dFluxVelocityX = alpaka::allocBuf(devAcc, extent2D); - auto dFluxVelocityY = alpaka::allocBuf(devAcc, extent2D); + auto dFluxDensity = alpaka::allocBuf(devAcc, extent3D); + auto dFluxEnergy = alpaka::allocBuf(devAcc, extent3D); + auto dFluxVelocityX = alpaka::allocBuf(devAcc, extent3D); + auto dFluxVelocityY = alpaka::allocBuf(devAcc, extent3D); + auto dFluxVelocityZ = alpaka::allocBuf(devAcc, extent3D); auto mdFluxDensity = alpaka::experimental::getMdSpan(dFluxDensity); auto mdFluxEnergy = alpaka::experimental::getMdSpan(dFluxEnergy); auto mdFluxVelocityX = alpaka::experimental::getMdSpan(dFluxVelocityX); auto mdFluxVelocityY = alpaka::experimental::getMdSpan(dFluxVelocityY); + auto mdFluxVelocityZ = alpaka::experimental::getMdSpan(dFluxVelocityZ); // Allocate host memory for viscosity - auto hViscosity = alpaka::allocBuf(devHost, extent2D); + auto hViscosity = alpaka::allocBuf(devHost, extent3D); // Allocate device memory for viscosity - auto dViscosity = alpaka::allocBuf(devAcc, extent2D); + auto dViscosity = alpaka::allocBuf(devAcc, extent3D); // Create mdspan view for viscosity using alpaka::experimental::getMdSpan auto mdViscosity = alpaka::experimental::getMdSpan(dViscosity); // Define work divisions - const alpaka::Vec size{nx, ny}; + const alpaka::Vec size{nx, ny, nz}; - auto const bundledInitKernel - = alpaka::KernelBundle(InitializerKernel{}, mdDensity, mdEnergy, mdPressure, mdVelocityX, mdVelocityY); + auto const bundeledInitKernel = alpaka::KernelBundle( + InitializerKernel{}, + mdDensity, + mdEnergy, + mdPressure, + mdVelocityX, + mdVelocityY, + mdVelocityZ); auto workDiv = alpaka::getValidWorkDivForKernel( devAcc, - bundledInitKernel, + bundeledInitKernel, size, - alpaka::Vec::ones(), + alpaka::Vec::ones(), false, alpaka::GridBlockExtentSubDivRestrictions::Unrestricted); - // Simulation parameters float totalTime = 0.0f; // Total simulation time - float endTime = 16.0f; // End time of the simulation // Exec the initialization kernel - alpaka::exec(queue, workDiv, InitializerKernel{}, mdDensity, mdEnergy, mdPressure, mdVelocityX, mdVelocityY); - - std::cout << "InitializerKernel finished" << std::endl; - + alpaka::exec( + queue, + workDiv, + InitializerKernel{}, + mdDensity, + mdEnergy, + mdPressure, + mdVelocityX, + mdVelocityY, + mdVelocityZ); // Wait for the kernel to finish alpaka::wait(queue); // Variables to calculate the dt at each step - float dx = 10.0f / nx; - float dy = 10.0f / ny; - float cMax = 0.5f; - float dt = 0.04f; + float dt = initialTimestep; alpaka::Vec, Idx> const extent1D = alpaka::Vec, Idx>::all(1u); auto resultGpuHost = alpaka::allocBuf(devHost, extent1D); resultGpuHost[0] = static_cast(1.0f); auto outputDeviceMemory = alpaka::allocBuf(devAcc, extent1D); alpaka::memcpy(queue, outputDeviceMemory, resultGpuHost); - std::cout << "Workdiv:" << workDiv << std::endl; - std::cout << "Running cloverleaf loop." << std::endl; - int iteration = 0; - - while(totalTime < endTime) + std::cout << "Workdiv: " << workDiv << std::endl; + std::cout << "Running cloverleaf loop." << std::endl; + int step = 0; + while(totalTime < endTime && step < endStep) { // Calculate the time step (dt), max velocity is needed. // Launch the kernel to compute the maximum velocity @@ -136,23 +170,19 @@ void testKernels() MaxVelocityKernel{}, mdVelocityX, mdVelocityY, + mdVelocityZ, std::data(outputDeviceMemory)); // Copy result from device memory to host alpaka::memcpy(queue, resultGpuHost, outputDeviceMemory, extent1D); - - // std::cout << "MaxVelocityKernel finished" << std::endl; - alpaka::wait(queue); // Now use maxVelocity to compute the time step - dt = calculateTimeStep(dx, dy, resultGpuHost[0], cMax); + dt = std::min((float) maxTimestep, (float) (initialTimestep * std::pow(timestepRise, step))); // Exec the halo update kernel alpaka::exec(queue, workDiv, UpdateHaloKernel{}, mdDensity); - // std::cout << "Halo kernel finished" << std::endl; - // Wait for the halo update kernel to finish alpaka::wait(queue); @@ -167,17 +197,22 @@ void testKernels() mdSoundspeed, 1.4f); // gamma - // std::cout << "IdealGasKernel finished" << std::endl; - // Wait for the ideal gas kernel to finish alpaka::wait(queue); // Exec the EOS kernel float gamma = 1.4f; // Specific heat ratio - alpaka::exec< - Acc>(queue, workDiv, EOSKernel{}, mdDensity, mdEnergy, mdPressure, mdVelocityX, mdVelocityY, gamma); - - // std::cout << "EOSKernel finished" << std::endl; + alpaka::exec( + queue, + workDiv, + EOSKernel{}, + mdDensity, + mdEnergy, + mdPressure, + mdVelocityX, + mdVelocityY, + mdVelocityZ, + gamma); // Wait for the EOS kernel to finish alpaka::wait(queue); @@ -192,40 +227,22 @@ void testKernels() mdPressure, mdVelocityX, mdVelocityY, + mdVelocityZ, mdFluxDensity, mdFluxEnergy, mdFluxVelocityX, - mdFluxVelocityY); - - // std::cout << "FluxKernel finished" << std::endl; + mdFluxVelocityY, + mdFluxVelocityZ); // Wait for the Flux kernel to finish alpaka::wait(queue); // Exec the advection kernel (fourth step) - alpaka::exec(queue, workDiv, AdvectionKernel{}, mdDensity, mdVelocityX, mdVelocityY); - - // std::cout << "AdvectionKernel finished" << std::endl; + alpaka::exec(queue, workDiv, AdvectionKernel{}, mdDensity, mdVelocityX, mdVelocityY, mdVelocityZ); // Wait for the Advection kernel to finish alpaka::wait(queue); - // Copy data back to host for verification (optional) - alpaka::memcpy(queue, hDensity, dDensity); - alpaka::memcpy(queue, hEnergy, dEnergy); - alpaka::memcpy(queue, hPressure, dPressure); - alpaka::memcpy(queue, hVelocityX, dVelocityX); - alpaka::memcpy(queue, hVelocityY, dVelocityY); - - // Flux densities - alpaka::memcpy(queue, hFluxDensity, dFluxDensity); - alpaka::memcpy(queue, hFluxEnergy, dFluxEnergy); - alpaka::memcpy(queue, hFluxVelocityX, dFluxVelocityX); - alpaka::memcpy(queue, hFluxVelocityY, dFluxVelocityY); - - // Wait for data transfer to complete - alpaka::wait(queue); - // Exec the Lagrangian kernel alpaka::exec( queue, @@ -235,53 +252,57 @@ void testKernels() mdEnergy, mdVelocityX, mdVelocityY, + mdVelocityZ, mdFluxDensity, mdFluxEnergy, mdFluxVelocityX, - mdFluxVelocityY); - - // std::cout << "LagrangianKernel finished" << std::endl; + mdFluxVelocityY, + mdFluxVelocityZ); // Wait for the Lagrangian kernel to finish alpaka::wait(queue); // Exec the Viscosity kernel - alpaka::exec< - Acc>(queue, workDiv, ViscosityKernel{}, mdDensity, mdVelocityX, mdVelocityY, mdPressure, mdViscosity); - - // std::cout << "ViscosityKernel finished" << std::endl; + alpaka::exec( + queue, + workDiv, + ViscosityKernel{}, + mdDensity, + mdVelocityX, + mdVelocityY, + mdVelocityZ, + mdPressure, + mdViscosity); // Wait for the Viscosity kernel to finish alpaka::wait(queue); // Update the simulation time totalTime += dt; + step++; - if(iteration % 2000 == 0) + if(step % 100 == 0) { - std::cout << "Current Time: " << totalTime << ", End Time: " << endTime << ", Step Size: " << dt - << ", iteration: " << iteration << std::endl; + std::cout << "Current time: " << totalTime << ", End time: " << endTime << ", Step: " << step << std::endl; } - - iteration++; } std::cout << "Copying results back." << std::endl; - // Copy final data back to host for verification alpaka::memcpy(queue, hDensity, dDensity); alpaka::memcpy(queue, hEnergy, dEnergy); alpaka::memcpy(queue, hPressure, dPressure); alpaka::memcpy(queue, hVelocityX, dVelocityX); alpaka::memcpy(queue, hVelocityY, dVelocityY); + alpaka::memcpy(queue, hVelocityZ, dVelocityZ); alpaka::memcpy(queue, hFluxDensity, dFluxDensity); alpaka::memcpy(queue, hFluxEnergy, dFluxEnergy); alpaka::memcpy(queue, hFluxVelocityX, dFluxVelocityX); alpaka::memcpy(queue, hFluxVelocityY, dFluxVelocityY); + alpaka::memcpy(queue, hFluxVelocityZ, dFluxVelocityZ); alpaka::memcpy(queue, hViscosity, dViscosity); std::cout << "Data copy to host finished" << std::endl; - // Wait for all data transfers to complete alpaka::wait(queue); @@ -291,20 +312,25 @@ void testKernels() auto mdHostPressure = alpaka::experimental::getMdSpan(hPressure); auto mdHostVelocityX = alpaka::experimental::getMdSpan(hVelocityX); auto mdHostVelocityY = alpaka::experimental::getMdSpan(hVelocityY); + auto mdHostVelocityZ = alpaka::experimental::getMdSpan(hVelocityZ); bool success = true; for(Idx i = 0; i < nx; ++i) { for(Idx j = 0; j < ny; ++j) { - if(std::isnan(mdHostDensity(i, j)) || std::isinf(mdHostDensity(i, j)) || std::isnan(mdHostEnergy(i, j)) - || std::isinf(mdHostEnergy(i, j)) || std::isnan(mdHostPressure(i, j)) - || std::isinf(mdHostPressure(i, j)) || std::isnan(mdHostVelocityX(i, j)) - || std::isinf(mdHostVelocityX(i, j)) || std::isnan(mdHostVelocityY(i, j)) - || std::isinf(mdHostVelocityY(i, j))) + for(Idx k = 0; k < nz; ++k) { - success = false; - break; + if(std::isnan(mdHostDensity(i, j, k)) || std::isinf(mdHostDensity(i, j, k)) + || std::isnan(mdHostEnergy(i, j, k)) || std::isinf(mdHostEnergy(i, j, k)) + || std::isnan(mdHostPressure(i, j, k)) || std::isinf(mdHostPressure(i, j, k)) + || std::isnan(mdHostVelocityX(i, j, k)) || std::isinf(mdHostVelocityX(i, j, k)) + || std::isnan(mdHostVelocityY(i, j, k)) || std::isinf(mdHostVelocityY(i, j, k)) + || std::isnan(mdHostVelocityZ(i, j, k)) || std::isinf(mdHostVelocityZ(i, j, k))) + { + success = false; + break; + } } } } @@ -319,9 +345,9 @@ void testKernels() } } -using TestAccs2D = alpaka::test::EnabledAccs, std::uint32_t>; +using TestAccs3D = alpaka::test::EnabledAccs, std::uint32_t>; -TEMPLATE_LIST_TEST_CASE("TEST: Cloverleaf", "[benchmark-test]", TestAccs2D) +TEMPLATE_LIST_TEST_CASE("TEST: Cloverleaf", "[benchmark-test]", TestAccs3D) { using Acc = TestType; if constexpr(alpaka::accMatchesTags) From 1c07b326413a26a04d7728ac3d6bea1db63ddf02 Mon Sep 17 00:00:00 2001 From: mehmet yusufoglu Date: Tue, 8 Oct 2024 20:36:35 +0200 Subject: [PATCH 5/5] make runnable --- benchmarks/cloverleaf/src/cloverLeafMainTest.cpp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/benchmarks/cloverleaf/src/cloverLeafMainTest.cpp b/benchmarks/cloverleaf/src/cloverLeafMainTest.cpp index 0bddf81207c..d84362a4189 100644 --- a/benchmarks/cloverleaf/src/cloverLeafMainTest.cpp +++ b/benchmarks/cloverleaf/src/cloverLeafMainTest.cpp @@ -115,7 +115,11 @@ void testKernels() // Define work divisions const alpaka::Vec size{nx, ny, nz}; - auto const bundeledInitKernel = alpaka::KernelBundle( + alpaka::KernelCfg const kernelCfg = {size, alpaka::Vec::ones()}; + + auto workDiv = alpaka::getValidWorkDiv( + kernelCfg, + devAcc, InitializerKernel{}, mdDensity, mdEnergy, @@ -123,14 +127,6 @@ void testKernels() mdVelocityX, mdVelocityY, mdVelocityZ); - - auto workDiv = alpaka::getValidWorkDivForKernel( - devAcc, - bundeledInitKernel, - size, - alpaka::Vec::ones(), - false, - alpaka::GridBlockExtentSubDivRestrictions::Unrestricted); // Simulation parameters float totalTime = 0.0f; // Total simulation time