-
Notifications
You must be signed in to change notification settings - Fork 75
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add 2D heat equation example with shared memory and validation
- Loading branch information
1 parent
546f267
commit 53392f4
Showing
8 changed files
with
594 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
# | ||
# Copyright 2023 Benjamin Worpitz, Jan Stephan | ||
# SPDX-License-Identifier: ISC | ||
# | ||
|
||
################################################################################ | ||
# Required CMake version. | ||
|
||
cmake_minimum_required(VERSION 3.22) | ||
|
||
set_property(GLOBAL PROPERTY USE_FOLDERS ON) | ||
|
||
################################################################################ | ||
# Project. | ||
|
||
set(_TARGET_NAME heatEquation2D) | ||
|
||
project(${_TARGET_NAME} LANGUAGES CXX) | ||
|
||
|
||
################################################################################ | ||
# PNGwriter | ||
################################################################################ | ||
|
||
# find PNGwriter installation | ||
find_package(PNGwriter 0.7.0 CONFIG) | ||
|
||
if(PNGwriter_FOUND) | ||
set(PNGWRITER_ENABLED True) | ||
else() | ||
set(PNGWRITER_ENABLED False) | ||
endif() | ||
|
||
#------------------------------------------------------------------------------- | ||
# Find alpaka. | ||
|
||
if(NOT TARGET alpaka::alpaka) | ||
option(alpaka_USE_SOURCE_TREE "Use alpaka's source tree instead of an alpaka installation" OFF) | ||
|
||
if(alpaka_USE_SOURCE_TREE) | ||
# Don't build the examples recursively | ||
set(alpaka_BUILD_EXAMPLES OFF) | ||
add_subdirectory("${CMAKE_CURRENT_LIST_DIR}/../.." "${CMAKE_BINARY_DIR}/alpaka") | ||
else() | ||
find_package(alpaka REQUIRED) | ||
endif() | ||
endif() | ||
|
||
#------------------------------------------------------------------------------- | ||
# Add executable. | ||
|
||
alpaka_add_executable( | ||
${_TARGET_NAME} | ||
src/heatEquation2D.cpp) | ||
target_link_libraries( | ||
${_TARGET_NAME} | ||
PUBLIC alpaka::alpaka) | ||
if(PNGwriter_FOUND) | ||
target_link_libraries( | ||
${_TARGET_NAME} | ||
PRIVATE PNGwriter::PNGwriter) | ||
target_compile_definitions(${_TARGET_NAME} PRIVATE PNGWRITER_ENABLED) | ||
endif() | ||
|
||
set_target_properties(${_TARGET_NAME} PROPERTIES FOLDER example) | ||
|
||
add_test(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
/* Copyright 2024 Tapish Narwal | ||
* SPDX-License-Identifier: ISC | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include "analyticalSolution.hpp" | ||
#include "helpers.hpp" | ||
|
||
#include <alpaka/alpaka.hpp> | ||
|
||
//! alpaka version of explicit finite-difference 1d heat equation solver | ||
//! | ||
//! Applies boundary conditions | ||
//! forward difference in t and second-order central difference in x | ||
//! | ||
//! \param uBuf grid values of u for each x, y and the current value of t: | ||
//! u(x, y, t) | t = t_current | ||
//! \param chunkSize | ||
//! \param pitch | ||
//! \param dx step in x | ||
//! \param dy step in y | ||
//! \param dt step in t | ||
struct BoundaryKernel | ||
{ | ||
template<typename TAcc, typename TChunk> | ||
ALPAKA_FN_ACC auto operator()( | ||
TAcc const& acc, | ||
double* const uBuf, | ||
TChunk const chunkSize, | ||
TChunk const pitch, | ||
uint32_t step, | ||
double const dx, | ||
double const dy, | ||
double const dt) const -> void | ||
{ | ||
using Dim = alpaka::DimInt<2u>; | ||
using Idx = uint32_t; | ||
|
||
// Get extents(dimensions) | ||
auto const gridBlockExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc); | ||
auto const blockThreadExtent = alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc); | ||
auto const numThreadsPerBlock = blockThreadExtent.prod(); | ||
|
||
// Get indexes | ||
auto const gridBlockIdx = alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc); | ||
auto const blockThreadIdx = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc); | ||
auto const threadIdx1D = alpaka::mapIdx<1>(blockThreadIdx, blockThreadExtent)[0u]; | ||
auto const blockStartIdx = gridBlockIdx * chunkSize; | ||
|
||
// Lambda function to apply boundary conditions | ||
auto applyBoundary = [&](auto const& globalIdxStart, auto const length, bool isRow) | ||
{ | ||
for(auto i = threadIdx1D; i < length; i += numThreadsPerBlock) | ||
{ | ||
auto idx2D = globalIdxStart + (isRow ? alpaka::Vec<Dim, Idx>{0, i} : alpaka::Vec<Dim, Idx>{i, 0}); | ||
auto elem = getElementPtr(uBuf, idx2D, pitch); | ||
*elem = exactSolution(idx2D[1] * dx, idx2D[0] * dy, step * dt); | ||
} | ||
}; | ||
|
||
// Apply boundary conditions for the top row | ||
if(gridBlockIdx[0] == 0) | ||
{ | ||
applyBoundary(blockStartIdx + alpaka::Vec<Dim, Idx>{0, 1}, chunkSize[1], true); | ||
} | ||
|
||
// Apply boundary conditions for the bottom row | ||
if(gridBlockIdx[0] == gridBlockExtent[0] - 1) | ||
{ | ||
applyBoundary(blockStartIdx + alpaka::Vec<Dim, Idx>{chunkSize[0] + 1, 1}, chunkSize[1], true); | ||
} | ||
|
||
// Apply boundary conditions for the left column | ||
if(gridBlockIdx[1] == 0) | ||
{ | ||
applyBoundary(blockStartIdx + alpaka::Vec<Dim, Idx>{1, 0}, chunkSize[0], false); | ||
} | ||
|
||
// Apply boundary conditions for the right column | ||
if(gridBlockIdx[1] == gridBlockExtent[1] - 1) | ||
{ | ||
applyBoundary(blockStartIdx + alpaka::Vec<Dim, Idx>{1, chunkSize[1] + 1}, chunkSize[0], false); | ||
} | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
/* Copyright 2024 Tapish Narwal | ||
* SPDX-License-Identifier: ISC | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include "helpers.hpp" | ||
|
||
#include <alpaka/alpaka.hpp> | ||
|
||
//! alpaka version of explicit finite-difference 2D heat equation solver | ||
//! | ||
//! \tparam T_SharedMemSize1D size of the shared memory box | ||
//! | ||
//! Solving equation u_t(x, t) = u_xx(x, t) + u_yy(y, t) using a simple explicit scheme with | ||
//! forward difference in t and second-order central difference in x and y | ||
//! | ||
//! \param uCurrBuf Current buffer with grid values of u for each x, y pair and the current value of t: | ||
//! u(x, y, t) | t = t_current | ||
//! \param uNextBuf resulting grid values of u for each x, y pair and the next value of t: | ||
//! u(x, y, t) | t = t_current + dt | ||
//! \param chunkSize The size of the chunk or tile that the user divides the problem into. This defines the size of the | ||
//! workload handled by each thread block. | ||
//! \param pitchCurr The pitch (or stride) in memory corresponding to the TDim grid in the accelerator's memory. | ||
//! This is used to calculate memory offsets when accessing elements in the current buffer. | ||
//! \param pitchNext The pitch used to calculate memory offsets when accessing elements in the next buffer. | ||
//! \param dx step in x | ||
//! \param dy step in y | ||
//! \param dt step in t | ||
template<size_t T_SharedMemSize1D> | ||
struct StencilKernel | ||
{ | ||
template<typename TAcc, typename TDim, typename TIdx> | ||
ALPAKA_FN_ACC auto operator()( | ||
TAcc const& acc, | ||
double const* const uCurrBuf, | ||
double* const uNextBuf, | ||
alpaka::Vec<TDim, TIdx> const chunkSize, | ||
alpaka::Vec<TDim, TIdx> const pitchCurr, | ||
alpaka::Vec<TDim, TIdx> const pitchNext, | ||
double const dx, | ||
double const dy, | ||
double const dt) const -> void | ||
{ | ||
auto& sdata = alpaka::declareSharedVar<double[T_SharedMemSize1D], __COUNTER__>(acc); | ||
|
||
// Get extents(dimensions) | ||
auto const blockThreadExtent = alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc); | ||
auto const numThreadsPerBlock = blockThreadExtent.prod(); | ||
|
||
// Get indexes | ||
auto const gridBlockIdx = alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc); | ||
auto const blockThreadIdx = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc); | ||
auto const threadIdx1D = alpaka::mapIdx<1>(blockThreadIdx, blockThreadExtent)[0u]; | ||
auto const blockStartIdx = gridBlockIdx * chunkSize; | ||
|
||
constexpr alpaka::Vec<TDim, TIdx> halo{2, 2}; | ||
|
||
for(auto i = threadIdx1D; i < T_SharedMemSize1D; i += numThreadsPerBlock) | ||
{ | ||
auto idx2d = alpaka::mapIdx<2>(alpaka::Vec(i), chunkSize + halo); | ||
idx2d = idx2d + blockStartIdx; | ||
auto elem = getElementPtr(uCurrBuf, idx2d, pitchCurr); | ||
sdata[i] = *elem; | ||
} | ||
|
||
alpaka::syncBlockThreads(acc); | ||
|
||
// Each kernel executes one element | ||
double const rX = dt / (dx * dx); | ||
double const rY = dt / (dy * dy); | ||
|
||
// go over only core cells | ||
for(auto i = threadIdx1D; i < chunkSize.prod(); i += numThreadsPerBlock) | ||
{ | ||
auto idx2D = alpaka::mapIdx<2>(alpaka::Vec(i), chunkSize); | ||
idx2D = idx2D + alpaka::Vec<TDim, TIdx>{1, 1}; // offset for halo above and to the left | ||
auto localIdx1D = alpaka::mapIdx<1>(idx2D, chunkSize + halo)[0u]; | ||
|
||
|
||
auto bufIdx = idx2D + blockStartIdx; | ||
auto elem = getElementPtr(uNextBuf, bufIdx, pitchNext); | ||
|
||
*elem = sdata[localIdx1D] * (1.0 - 2.0 * rX - 2.0 * rY) + sdata[localIdx1D - 1] * rX | ||
+ sdata[localIdx1D + 1] * rX + sdata[localIdx1D - chunkSize[1] - halo[1]] * rY | ||
+ sdata[localIdx1D + chunkSize[1] + halo[1]] * rY; | ||
} | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
/* Copyright 2020 Tapish Narwal | ||
* SPDX-License-Identifier: ISC | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <alpaka/alpaka.hpp> | ||
|
||
#include <cmath> | ||
|
||
//! Exact solution to the test problem | ||
//! u_t(x, y, t) = u_xx(x, t) + u_yy(y, t), x in [0, 1], y in [0, 1], t in [0, T] | ||
//! | ||
//! \param x value of x | ||
//! \param x value of y | ||
//! \param t value of t | ||
ALPAKA_FN_HOST_ACC auto exactSolution(double const x, double const y, double const t) -> double | ||
{ | ||
constexpr double pi = alpaka::math::constants::pi; | ||
return std::exp(-pi * pi * t) * (std::sin(pi * x) + std::sin(pi * y)); | ||
} | ||
|
||
//! Valdidate calculated solution in the buffer to the analytical solution at t=tMax | ||
//! | ||
//! \param buffer buffer holding the solution at t=tMax | ||
//! \param extent extents of the buffer | ||
//! \param dx | ||
//! \param dy | ||
//! \param tMax time at simulation end | ||
template<typename T_Buffer, typename T_Extent> | ||
auto validateSolution( | ||
T_Buffer const& buffer, | ||
T_Extent const& extent, | ||
double const dx, | ||
double const dy, | ||
double const tMax) -> std::pair<bool, double> | ||
{ | ||
// Calculate error | ||
double maxError = 0.0; | ||
for(uint32_t j = 1; j < extent[0] - 1; ++j) | ||
{ | ||
for(uint32_t i = 0; i < extent[1]; ++i) | ||
{ | ||
auto const error = std::abs(buffer.data()[j * extent[1] + i] - exactSolution(i * dx, j * dy, tMax)); | ||
maxError = std::max(maxError, error); | ||
} | ||
} | ||
|
||
constexpr double errorThreshold = 1e-5; | ||
return std::make_pair(maxError < errorThreshold, maxError); | ||
} | ||
|
||
//! Initialize the buffer to the analytical solution at t=0 | ||
//! | ||
//! \param buffer buffer holding the solution at tMax | ||
//! \param extent extents of the buffer | ||
//! \param dx | ||
//! \param dy | ||
template<typename TBuffer> | ||
auto initalizeBuffer(TBuffer& buffer, double const dx, double const dy) -> void | ||
{ | ||
auto extents = alpaka::getExtents(buffer); | ||
// Apply initial conditions for the test problem | ||
for(uint32_t j = 0; j < extents[0]; ++j) | ||
{ | ||
for(uint32_t i = 0; i < extents[1]; ++i) | ||
{ | ||
buffer.data()[j * extents[1] + i] = exactSolution(i * dx, j * dy, 0.0); | ||
} | ||
} | ||
} |
Oops, something went wrong.