Skip to content

Commit

Permalink
Remove the usage of ExampleDefaultAcc in all examples.
Browse files Browse the repository at this point in the history
Examples will now be executed for all enabled accelerators.
Fix that `ExampleDefaultAcc` was used in the test `MapIdxPitchBytes`.
  • Loading branch information
psychocoderHPC authored and fwyzard committed Jun 17, 2024
1 parent 2d80c0b commit 20fbb60
Show file tree
Hide file tree
Showing 21 changed files with 439 additions and 192 deletions.
2 changes: 1 addition & 1 deletion cmake/alpakaCommon.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -581,7 +581,7 @@ if(alpaka_ACC_GPU_HIP_ENABLE)
# https://github.com/llvm/llvm-project/commit/b86e0992bfa6
# https://www.open-std.org/jtc1/sc22/wg21/docs/cwg_defects.html#150
# for example, is required to create alpaka::EnabledAccTags
# TODO(SimeonEhrig): restict HIP version, if first HIP version is release using Clang 19
# TODO(SimeonEhrig): restict HIP version, if first HIP version is release using Clang 19
alpaka_set_compiler_options(HOST_DEVICE target alpaka "$<$<COMPILE_LANGUAGE:HIP>:SHELL:-frelaxed-template-template-args>")

alpaka_compiler_option(HIP_KEEP_FILES "Keep all intermediate files that are generated during internal compilation steps 'CMakeFiles/<targetname>.dir'" OFF)
Expand Down
2 changes: 1 addition & 1 deletion docs/source/basic/library.rst
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ Kernels can also be defined via lambda expressions.
int main() {
// ...
using Acc = alpaka::ExampleDefaultAcc<Dim, Idx>;
using Acc = alpaka::AccGpuCudaRt<Dim, Idx>;

auto kernel = [] ALPAKA_FN_ACC (Acc const & acc /* , ... */) -> void {
// ...
Expand Down
43 changes: 24 additions & 19 deletions example/bufferCopy/src/bufferCopy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
*/

#include <alpaka/alpaka.hpp>
#include <alpaka/example/ExampleDefaultAcc.hpp>
#include <alpaka/example/ExecuteForEachAccTag.hpp>

#include <cstdint>
#include <iostream>
Expand Down Expand Up @@ -64,24 +64,19 @@ struct FillBufferKernel
}
};

auto main() -> int
// In standard projects, you typically do not execute the code with any available accelerator.
// Instead, a single accelerator is selected once from the active accelerators and the kernels are executed with the
// selected accelerator only. If you use the example as the starting point for your project, you can rename the
// example() function to main() and move the accelerator tag to the function body.
template<typename TAccTag>
auto example(TAccTag const&) -> int
{
// Define the index domain
using Dim = alpaka::DimInt<3u>;
using Idx = std::size_t;

// Define the device accelerator
//
// It is possible to choose from a set of accelerators:
// - AccGpuCudaRt
// - AccGpuHipRt
// - AccCpuThreads
// - AccCpuOmp2Threads
// - AccCpuOmp2Blocks
// - AccCpuTbbBlocks
// - AccCpuSerial
// using Acc = alpaka::AccCpuSerial<Dim, Idx>;
using Acc = alpaka::ExampleDefaultAcc<Dim, Idx>;
using Acc = alpaka::TagToAcc<TAccTag, Dim, Idx>;
std::cout << "Using alpaka accelerator: " << alpaka::getAccName<Acc>() << std::endl;
// Defines the synchronization behavior of a queue
//
Expand All @@ -90,12 +85,6 @@ auto main() -> int
using DevQueue = alpaka::Queue<Acc, AccQueueProperty>;

// Define the device accelerator
//
// It is possible to choose from a set of accelerators:
// - AccCpuThreads
// - AccCpuOmp2Threads
// - AccCpuOmp2Blocks
// - AccCpuSerial
using Host = alpaka::AccCpuSerial<Dim, Idx>;
// Defines the synchronization behavior of a queue
//
Expand Down Expand Up @@ -257,3 +246,19 @@ auto main() -> int

return EXIT_SUCCESS;
}

auto main() -> int
{
// Execute the example once for each enabled accelerator.
// If you would like to execute it for a single accelerator only you can use the following code.
// \code{.cpp}
// auto tag = TagCpuSerial;
// return example(tag);
// \endcode
//
// valid tags:
// TagCpuSerial, TagGpuHipRt, TagGpuCudaRt, TagCpuOmp2Blocks, TagCpuTbbBlocks,
// TagCpuOmp2Threads, TagCpuSycl, TagCpuTbbBlocks, TagCpuThreads,
// TagFpgaSyclIntel, TagGenericSycl, TagGpuSyclIntel
return alpaka::executeForEachAccTag([=](auto const& tag) { return example(tag); });
}
44 changes: 24 additions & 20 deletions example/complex/src/complex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
*/

#include <alpaka/alpaka.hpp>
#include <alpaka/example/ExampleDefaultAcc.hpp>
#include <alpaka/example/ExecuteForEachAccTag.hpp>

#include <cstdint>
#include <iostream>
Expand All @@ -28,29 +28,17 @@ struct ComplexKernel
}
};

auto main() -> int
// In standard projects, you typically do not execute the code with any available accelerator.
// Instead, a single accelerator is selected once from the active accelerators and the kernels are executed with the
// selected accelerator only. If you use the example as the starting point for your project, you can rename the
// example() function to main() and move the accelerator tag to the function body.
template<typename TAccTag>
auto example(TAccTag const&) -> int
{
using Idx = std::size_t;

// Define the accelerator
//
// It is possible to choose from a set of accelerators:
// - AccGpuCudaRt
// - AccGpuHipRt
// - AccCpuThreads
// - AccCpuOmp2Threads
// - AccCpuOmp2Blocks
// - AccCpuTbbBlocks
// - AccCpuSerial
//
// Each accelerator has strengths and weaknesses. Therefore,
// they need to be choosen carefully depending on the actual
// use case. Furthermore, some accelerators only support a
// particular workdiv, but workdiv can also be generated
// automatically.

// By exchanging the Acc and Queue types you can select where to execute the kernel.
using Acc = alpaka::ExampleDefaultAcc<alpaka::DimInt<1>, Idx>;
using Acc = alpaka::TagToAcc<TAccTag, alpaka::DimInt<1>, Idx>;
std::cout << "Using alpaka accelerator: " << alpaka::getAccName<Acc>() << std::endl;

// Defines the synchronization behavior of a queue
Expand Down Expand Up @@ -88,3 +76,19 @@ auto main() -> int

return EXIT_SUCCESS;
}

auto main() -> int
{
// Execute the example once for each enabled accelerator.
// If you would like to execute it for a single accelerator only you can use the following code.
// \code{.cpp}
// auto tag = TagCpuSerial;
// return example(tag);
// \endcode
//
// valid tags:
// TagCpuSerial, TagGpuHipRt, TagGpuCudaRt, TagCpuOmp2Blocks, TagCpuTbbBlocks,
// TagCpuOmp2Threads, TagCpuSycl, TagCpuTbbBlocks, TagCpuThreads,
// TagFpgaSyclIntel, TagGenericSycl, TagGpuSyclIntel
return alpaka::executeForEachAccTag([=](auto const& tag) { return example(tag); });
}
27 changes: 24 additions & 3 deletions example/convolution1D/src/convolution1D.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
*/

#include <alpaka/alpaka.hpp>
#include <alpaka/example/ExampleDefaultAcc.hpp>
#include <alpaka/example/ExecuteForEachAccTag.hpp>

#include <cmath>
#include <iomanip>
Expand Down Expand Up @@ -64,7 +64,12 @@ auto FuzzyEqual(float a, float b) -> bool
return std::fabs(a - b) < std::numeric_limits<float>::epsilon() * 10.0f;
}

auto main() -> int
// In standard projects, you typically do not execute the code with any available accelerator.
// Instead, a single accelerator is selected once from the active accelerators and the kernels are executed with the
// selected accelerator only. If you use the example as the starting point for your project, you can rename the
// example() function to main() and move the accelerator tag to the function body.
template<typename TAccTag>
auto example(TAccTag const&) -> int
{
// Size of 1D arrays to be used in convolution integral
// Here instead of "convolution kernel" the term "filter" is used because kernel has a different meaning in GPU
Expand All @@ -80,7 +85,7 @@ auto main() -> int
using Idx = std::size_t;

// Define the accelerator
using DevAcc = alpaka::ExampleDefaultAcc<Dim, Idx>;
using DevAcc = alpaka::TagToAcc<TAccTag, Dim, Idx>;
using QueueProperty = alpaka::Blocking;
using QueueAcc = alpaka::Queue<DevAcc, QueueProperty>;
using BufAcc = alpaka::Buf<DevAcc, DataType, Dim, Idx>;
Expand Down Expand Up @@ -176,3 +181,19 @@ auto main() -> int
std::cout << "All results are correct!\n";
return EXIT_SUCCESS;
}

auto main() -> int
{
// Execute the example once for each enabled accelerator.
// If you would like to execute it for a single accelerator only you can use the following code.
// \code{.cpp}
// auto tag = TagCpuSerial;
// return example(tag);
// \endcode
//
// valid tags:
// TagCpuSerial, TagGpuHipRt, TagGpuCudaRt, TagCpuOmp2Blocks, TagCpuTbbBlocks,
// TagCpuOmp2Threads, TagCpuSycl, TagCpuTbbBlocks, TagCpuThreads,
// TagFpgaSyclIntel, TagGenericSycl, TagGpuSyclIntel
return alpaka::executeForEachAccTag([=](auto const& tag) { return example(tag); });
}
27 changes: 24 additions & 3 deletions example/convolution2D/src/convolution2D.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
*/

#include <alpaka/alpaka.hpp>
#include <alpaka/example/ExampleDefaultAcc.hpp>
#include <alpaka/example/ExecuteForEachAccTag.hpp>

#include <iomanip>
#include <iostream>
Expand Down Expand Up @@ -208,15 +208,20 @@ auto FuzzyEqual(float a, float b) -> bool
return std::fabs(a - b) < std::numeric_limits<float>::epsilon() * 1000.0f;
}

auto main() -> int
// In standard projects, you typically do not execute the code with any available accelerator.
// Instead, a single accelerator is selected once from the active accelerators and the kernels are executed with the
// selected accelerator only. If you use the example as the starting point for your project, you can rename the
// example() function to main() and move the accelerator tag to the function body.
template<typename TAccTag>
auto example(TAccTag const&) -> int
{
// Define the index domain
using Dim = alpaka::DimInt<2>;
// Index type
using Idx = std::uint32_t;
using Vec = alpaka::Vec<Dim, Idx>;
// Define the accelerator
using DevAcc = alpaka::ExampleDefaultAcc<Dim, Idx>;
using DevAcc = alpaka::TagToAcc<TAccTag, Dim, Idx>;
using QueueAcc = alpaka::Queue<DevAcc, alpaka::NonBlocking>;

using DataType = float;
Expand Down Expand Up @@ -379,3 +384,19 @@ auto main() -> int
std::cout << "Sampled result checks are correct!\n";
return EXIT_SUCCESS;
}

auto main() -> int
{
// Execute the example once for each enabled accelerator.
// If you would like to execute it for a single accelerator only you can use the following code.
// \code{.cpp}
// auto tag = TagCpuSerial;
// return example(tag);
// \endcode
//
// valid tags:
// TagCpuSerial, TagGpuHipRt, TagGpuCudaRt, TagCpuOmp2Blocks, TagCpuTbbBlocks,
// TagCpuOmp2Threads, TagCpuSycl, TagCpuTbbBlocks, TagCpuThreads,
// TagFpgaSyclIntel, TagGenericSycl, TagGpuSyclIntel
return alpaka::executeForEachAccTag([=](auto const& tag) { return example(tag); });
}
38 changes: 24 additions & 14 deletions example/counterBasedRng/src/counterBasedRng.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
*/

#include <alpaka/alpaka.hpp>
#include <alpaka/example/ExampleDefaultAcc.hpp>
#include <alpaka/example/ExecuteForEachAccTag.hpp>
#include <alpaka/rand/RandPhiloxStateless.hpp>

#include <chrono>
Expand Down Expand Up @@ -92,25 +92,19 @@ class CounterBasedRngKernel
}
};

auto main() -> int
// In standard projects, you typically do not execute the code with any available accelerator.
// Instead, a single accelerator is selected once from the active accelerators and the kernels are executed with the
// selected accelerator only. If you use the example as the starting point for your project, you can rename the
// example() function to main() and move the accelerator tag to the function body.
template<typename TAccTag>
auto example(TAccTag const&) -> int
{
// Define the index domain
using Dim = alpaka::DimInt<3u>;
using Idx = std::size_t;

// Define the accelerator
//
// It is possible to choose from a set of accelerators:
// - AccGpuCudaRt
// - AccGpuHipRt
// - AccCpuThreads
// - AccCpuFibers
// - AccCpuOmp2Threads
// - AccCpuOmp2Blocks
// - AccCpuTbbBlocks
// - AccCpuSerial
// using Acc = alpaka::AccCpuSerial<Dim, Idx>;
using Acc = alpaka::ExampleDefaultAcc<Dim, Idx>;
using Acc = alpaka::TagToAcc<TAccTag, Dim, Idx>;
std::cout << "Using alpaka accelerator: " << alpaka::getAccName<Acc>() << std::endl;

using AccHost = alpaka::AccCpuSerial<Dim, Idx>;
Expand Down Expand Up @@ -222,3 +216,19 @@ auto main() -> int
return EXIT_FAILURE;
}
}

auto main() -> int
{
// Execute the example once for each enabled accelerator.
// If you would like to execute it for a single accelerator only you can use the following code.
// \code{.cpp}
// auto tag = TagCpuSerial;
// return example(tag);
// \endcode
//
// valid tags:
// TagCpuSerial, TagGpuHipRt, TagGpuCudaRt, TagCpuOmp2Blocks, TagCpuTbbBlocks,
// TagCpuOmp2Threads, TagCpuSycl, TagCpuTbbBlocks, TagCpuThreads,
// TagFpgaSyclIntel, TagGenericSycl, TagGpuSyclIntel
return alpaka::executeForEachAccTag([=](auto const& tag) { return example(tag); });
}
31 changes: 26 additions & 5 deletions example/heatEquation/src/heatEquation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
*/

#include <alpaka/alpaka.hpp>
#include <alpaka/example/ExampleDefaultAcc.hpp>
#include <alpaka/example/ExecuteForEachAccTag.hpp>

#include <algorithm>
#include <cmath>
Expand Down Expand Up @@ -62,7 +62,13 @@ auto exactSolution(double const x, double const t) -> double
//! Every time step the kernel will be executed numNodesX-times
//! After every step the curr-buffer will be set to the calculated values
//! from the next-buffer.
auto main() -> int
//!
//! In standard projects, you typically do not execute the code with any available accelerator.
//! Instead, a single accelerator is selected once from the active accelerators and the kernels are executed with the
//! selected accelerator only. If you use the example as the starting point for your project, you can rename the
//! example() function to main() and move the accelerator tag to the function body.
template<typename TAccTag>
auto example(TAccTag const&) -> int
{
// Parameters (a user is supposed to change numNodesX, numTimeSteps)
uint32_t const numNodesX = 1000;
Expand All @@ -84,9 +90,8 @@ auto main() -> int
using Dim = alpaka::DimInt<1u>;
using Idx = uint32_t;

// Select accelerator-types for host and device
// using Acc = alpaka::AccCpuSerial<Dim, Idx>;
using Acc = alpaka::ExampleDefaultAcc<Dim, Idx>;
// Define the accelerator
using Acc = alpaka::TagToAcc<TAccTag, Dim, Idx>;
std::cout << "Using alpaka accelerator: " << alpaka::getAccName<Acc>() << std::endl;

// Select specific devices
Expand Down Expand Up @@ -179,3 +184,19 @@ auto main() -> int
return EXIT_FAILURE;
}
}

auto main() -> int
{
// Execute the example once for each enabled accelerator.
// If you would like to execute it for a single accelerator only you can use the following code.
// \code{.cpp}
// auto tag = TagCpuSerial;
// return example(tag);
// \endcode
//
// valid tags:
// TagCpuSerial, TagGpuHipRt, TagGpuCudaRt, TagCpuOmp2Blocks, TagCpuTbbBlocks,
// TagCpuOmp2Threads, TagCpuSycl, TagCpuTbbBlocks, TagCpuThreads,
// TagFpgaSyclIntel, TagGenericSycl, TagGpuSyclIntel
return alpaka::executeForEachAccTag([=](auto const& tag) { return example(tag); });
}
Loading

0 comments on commit 20fbb60

Please sign in to comment.