-
Notifications
You must be signed in to change notification settings - Fork 75
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft: Catch2 Benchmarking #1723
Draft
sliwowitz
wants to merge
1
commit into
alpaka-group:develop
Choose a base branch
from
sliwowitz:benchmark
base: develop
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
118 changes: 118 additions & 0 deletions
118
include/alpaka/test/KernelExecutionBenchmarkFixture.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,118 @@ | ||||||
/* Copyright 2022 Benjamin Worpitz, Andrea Bocci, Bernhard Manfred Gruber | ||||||
* | ||||||
* This file is part of alpaka. | ||||||
* | ||||||
* This Source Code Form is subject to the terms of the Mozilla Public | ||||||
* License, v. 2.0. If a copy of the MPL was not distributed with this | ||||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||||||
*/ | ||||||
|
||||||
#pragma once | ||||||
|
||||||
#include <alpaka/alpaka.hpp> | ||||||
|
||||||
#if defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !BOOST_LANG_CUDA | ||||||
# error If ALPAKA_ACC_GPU_CUDA_ENABLED is set, the compiler has to support CUDA! | ||||||
#endif | ||||||
|
||||||
#if defined(ALPAKA_ACC_GPU_HIP_ENABLED) && !BOOST_LANG_HIP | ||||||
# error If ALPAKA_ACC_GPU_HIP_ENABLED is set, the compiler has to support HIP! | ||||||
#endif | ||||||
|
||||||
#include <alpaka/test/Check.hpp> | ||||||
#include <alpaka/test/queue/Queue.hpp> | ||||||
|
||||||
#include <catch2/benchmark/catch_benchmark.hpp> | ||||||
|
||||||
#include <string> | ||||||
#include <utility> | ||||||
|
||||||
namespace alpaka::test | ||||||
{ | ||||||
//! The fixture for executing a kernel on a given accelerator. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
template<typename TAcc> | ||||||
class KernelExecutionBenchmarkFixture | ||||||
{ | ||||||
public: | ||||||
using Acc = TAcc; | ||||||
using Dim = alpaka::Dim<Acc>; | ||||||
using Idx = alpaka::Idx<Acc>; | ||||||
using Device = Dev<Acc>; | ||||||
using Platform = alpaka::Platform<Acc>; | ||||||
using Queue = test::DefaultQueue<Device>; | ||||||
using WorkDiv = WorkDivMembers<Dim, Idx>; | ||||||
|
||||||
KernelExecutionBenchmarkFixture(WorkDiv workDiv) : m_workDiv(std::move(workDiv)) | ||||||
{ | ||||||
} | ||||||
|
||||||
template<typename TExtent> | ||||||
KernelExecutionBenchmarkFixture(TExtent const& extent) | ||||||
: KernelExecutionBenchmarkFixture(getValidWorkDiv<Acc>( | ||||||
getDevByIdx<Acc>(0u), | ||||||
extent, | ||||||
Vec<Dim, Idx>::ones(), | ||||||
false, | ||||||
GridBlockExtentSubDivRestrictions::Unrestricted)) | ||||||
{ | ||||||
} | ||||||
|
||||||
template<typename TKernelFnObj, typename... TArgs> | ||||||
auto operator()( | ||||||
TKernelFnObj const& kernelFnObj, | ||||||
std::string const& benchmarkName, | ||||||
float& result, | ||||||
TArgs&&... args) -> bool | ||||||
{ | ||||||
// Allocate result buffers | ||||||
auto bufAccResult = allocBuf<float, Idx>(m_device, static_cast<Idx>(1u)); | ||||||
auto bufHostResult = allocBuf<float, Idx>(m_devHost, static_cast<Idx>(1u)); | ||||||
|
||||||
int numRuns = 0; | ||||||
result = 0.0f; | ||||||
|
||||||
// The following block is executed unknown times during estimation phase, then once per benchmark sample | ||||||
BENCHMARK_ADVANCED(std::string(benchmarkName))(Catch::Benchmark::Chronometer meter) | ||||||
{ | ||||||
numRuns++; | ||||||
memset(m_queue, bufAccResult, 0); | ||||||
wait(m_queue); | ||||||
|
||||||
// Only the following part is measured as the benchmark part | ||||||
meter.measure( | ||||||
[&] | ||||||
{ | ||||||
exec<Acc>( | ||||||
m_queue, | ||||||
m_workDiv, | ||||||
kernelFnObj, | ||||||
getPtrNative(bufAccResult), | ||||||
std::forward<TArgs>(args)...); // run the measured kernel | ||||||
wait(m_queue); // wait for the kernel to actually run | ||||||
}); | ||||||
|
||||||
// Copy the result value to the host | ||||||
memcpy(m_queue, bufHostResult, bufAccResult); | ||||||
wait(m_queue); | ||||||
|
||||||
auto const resultLocal = *getPtrNative(bufHostResult); | ||||||
result += resultLocal; | ||||||
return resultLocal; // make sure the benchmark call is not optimized away | ||||||
}; | ||||||
result /= static_cast<float>(numRuns); | ||||||
|
||||||
return true; | ||||||
// TODO: Can we return the result here and read it from Catch2's REQUIRE or something similar? Or are the | ||||||
// returns limited to bools? | ||||||
// return result; | ||||||
} | ||||||
|
||||||
protected: | ||||||
PlatformCpu m_platformHost{}; | ||||||
DevCpu m_devHost{getDevByIdx(m_platformHost, 0)}; | ||||||
Platform m_platform{}; | ||||||
Device m_device{getDevByIdx(m_platform, 0)}; | ||||||
Queue m_queue{m_device}; | ||||||
WorkDiv m_workDiv; | ||||||
}; | ||||||
} // namespace alpaka::test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# | ||
# Copyright 2022 Jiri Vyskocil | ||
# | ||
# This file is part of alpaka. | ||
# | ||
# This Source Code Form is subject to the terms of the Mozilla Public | ||
# License, v. 2.0. If a copy of the MPL was not distributed with this | ||
# file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||
# | ||
|
||
cmake_minimum_required(VERSION 3.18) | ||
|
||
add_subdirectory("rand/") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# | ||
# Copyright 2022 Jiri Vyskocil | ||
# | ||
# This file is part of alpaka. | ||
# | ||
# This Source Code Form is subject to the terms of the Mozilla Public | ||
# License, v. 2.0. If a copy of the MPL was not distributed with this | ||
# file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||
# | ||
|
||
set(_TARGET_NAME "randBenchmark") | ||
|
||
append_recursive_files_add_to_src_group("src/" "src/" "cpp" _FILES_SOURCE) | ||
|
||
alpaka_add_executable( | ||
${_TARGET_NAME} | ||
${_FILES_SOURCE}) | ||
target_link_libraries( | ||
${_TARGET_NAME} | ||
PRIVATE common) | ||
|
||
set_target_properties(${_TARGET_NAME} PROPERTIES FOLDER "test/benchmark") | ||
target_compile_definitions(${_TARGET_NAME} PUBLIC CATCH_CONFIG_ENABLE_BENCHMARKING) | ||
|
||
if(alpaka_CI) | ||
# For non-benchmarking CI test runs - It will only run the benchmark once to see if it works at all. | ||
add_test(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME} ${_alpaka_TEST_OPTIONS} --benchmark-samples 1) | ||
# Real automated benchmark runs will need to collect more samples (the default 100 is fine). The CI will then | ||
# have to set another variable to indicate if it is only testing, or if it wants to do a full benchmark. | ||
else() | ||
# For full benchmark run - will collect 100 samples for good benchmark statistics. | ||
add_test(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME} ${_alpaka_TEST_OPTIONS}) | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
/* Copyright 2022 Jiri Vyskocil | ||
* | ||
* This file is part of alpaka. | ||
* | ||
* This Source Code Form is subject to the terms of the Mozilla Public | ||
* License, v. 2.0. If a copy of the MPL was not distributed with this | ||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. | ||
*/ | ||
|
||
#include <alpaka/example/ExampleDefaultAcc.hpp> | ||
#include <alpaka/rand/Traits.hpp> | ||
#include <alpaka/test/KernelExecutionBenchmarkFixture.hpp> | ||
#include <alpaka/test/acc/TestAccs.hpp> | ||
|
||
#include <catch2/catch_template_test_macros.hpp> | ||
#include <catch2/catch_test_macros.hpp> | ||
#include <catch2/generators/catch_generators.hpp> | ||
|
||
class RandBenchmarkKernel | ||
{ | ||
public: | ||
ALPAKA_NO_HOST_ACC_WARNING | ||
template<typename TAcc, typename TIdx> | ||
ALPAKA_FN_ACC void operator()(TAcc const& acc, float* result, TIdx numPoints) const | ||
{ | ||
// Get the global linearized thread idx. | ||
auto const globalThreadIdx = alpaka::getIdx<alpaka::Grid, alpaka::Threads>(acc); | ||
auto const globalThreadExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Threads>(acc); | ||
|
||
auto const linearizedGlobalThreadIdx | ||
= static_cast<TIdx>(alpaka::mapIdx<1u>(globalThreadIdx, globalThreadExtent)[0]); | ||
|
||
// Setup generator engine and distribution. | ||
auto engine = alpaka::rand::engine::createDefault(acc, 42, linearizedGlobalThreadIdx); | ||
auto dist(alpaka::rand::distribution::createUniformReal<float>(acc)); | ||
|
||
float number = 0; | ||
for(TIdx i = linearizedGlobalThreadIdx; i < numPoints; i += static_cast<TIdx>(globalThreadExtent.prod())) | ||
{ | ||
number += dist(engine); | ||
} | ||
|
||
alpaka::atomicAdd( | ||
acc, | ||
result, | ||
number); // TODO: we're measuring the atomicAdd time too, this is not what we want | ||
} | ||
}; | ||
|
||
// TODO: This takes an enormous time to finish and is probably useless anyway: | ||
// TEMPLATE_LIST_TEST_CASE("defaultRandomGeneratorBenchmark", "[randBenchmark]", alpaka::test::TestAccs) | ||
// Running the benchmark on a single default accelerator instead | ||
TEST_CASE("defaultRandomGeneratorBenchmark", "[randBenchmark]") | ||
{ | ||
// using Acc = TestType; | ||
using Acc = alpaka::ExampleDefaultAcc<alpaka::DimInt<1>, std::size_t>; | ||
using Dim = alpaka::Dim<Acc>; | ||
using Idx = alpaka::Idx<Acc>; | ||
using Vec = alpaka::Vec<Dim, Idx>; | ||
using WorkDiv = alpaka::WorkDivMembers<Dim, Idx>; | ||
|
||
auto const platform = alpaka::Platform<Acc>{}; | ||
auto const dev = alpaka::getDevByIdx(platform, 0); | ||
|
||
Idx const numThreads = std::thread::hardware_concurrency(); // TODO: GPU? | ||
std::cout << "Hardware threads: " << numThreads << std::endl; | ||
|
||
#ifdef ALPAKA_CI // Reduced benchmark set for automated test runs. | ||
unsigned const numPoints = GENERATE(10u, 1'000'000u); | ||
#else | ||
unsigned const numPoints = GENERATE(10u, 100000u, 1'000'000u, 10'000'000u, 100'000'000u, 1'000'000'000u); | ||
#endif | ||
|
||
WorkDiv workdiv{alpaka::getValidWorkDiv<Acc>( | ||
dev, | ||
Vec::all(numThreads * numThreads), | ||
Vec::all(numThreads), | ||
false, | ||
alpaka::GridBlockExtentSubDivRestrictions::Unrestricted)}; | ||
|
||
alpaka::test::KernelExecutionBenchmarkFixture<Acc> fixture(workdiv); | ||
|
||
RandBenchmarkKernel kernel; | ||
|
||
float result = 0.0f; | ||
|
||
REQUIRE(fixture(kernel, "Random sequence N=" + std::to_string(numPoints), result, numPoints)); | ||
// TODO: Actually check the result | ||
std::cout << "\ntemp debug normalized result = " << result / static_cast<float>(numPoints) | ||
<< " should probably converge to 0.5." << std::flush; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I dislike those. Can't we just have a prelude in
alpaka.hpp
afterBoostPredef
that checks those in one place?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As long as it takes
ALPAKA_HOST_ONLY
into account.