From baf80fd68263b6d882dece6d7b32fc89b5d08e90 Mon Sep 17 00:00:00 2001 From: Anton Gorenko Date: Tue, 9 Jul 2019 13:28:18 +0600 Subject: [PATCH 01/25] Add info about HIP-clang and AMDGPU_TARGETS to README --- README.md | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index f586643e8..ccc3a3805 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,8 @@ performant GPU-accelerated code on AMD ROCm platform. * Git * CMake (3.5.1 or later) * AMD [ROCm](https://rocm.github.io/install.html) platform (1.8.2 or later) - * Including [HCC](https://github.com/RadeonOpenCompute/hcc) compiler, which must be - set as C++ compiler on ROCm platform. + * Including [HCC](https://github.com/RadeonOpenCompute/hcc) compiler + * Alternatively [HIP-clang](https://github.com/ROCm-Developer-Tools/HIP/blob/master/INSTALL.md#hip-clang) compiler Optional: @@ -32,12 +32,17 @@ cd rocPRIM; mkdir build; cd build # Build options: # BUILD_TEST - on by default, # BUILD_BENCHMARK - off by default. +# AMDGPU_TARGETS - list of AMD architectures, default: gfx803;gfx900;gfx906. +# You can make compilation faster if you want to test/benchmark only on one architecture, +# for example, add -DAMDGPU_TARGETS=gfx906 to 'cmake' parameters. # # ! IMPORTANT ! -# On ROCm platform set C++ compiler to HCC. You can do it by adding 'CXX=' -# before 'cmake' or setting cmake option 'CMAKE_CXX_COMPILER' to path to the HCC compiler. -# +# Set C++ compiler to HCC or HIP-clang. You can do it by adding 'CXX=' +# before 'cmake' or setting cmake option 'CMAKE_CXX_COMPILER' to path to the compiler. +# Using HCC: [CXX=hcc] cmake -DBUILD_BENCHMARK=ON ../. # or cmake-gui ../. +# or using HIP-clang: +[CXX=hipcc] cmake -DBUILD_BENCHMARK=ON ../. # Build make -j4 From c98faff061ce0ca76af62a5effdaec3b765ac515 Mon Sep 17 00:00:00 2001 From: mhbliao <47895780+mhbliao@users.noreply.github.com> Date: Mon, 7 Oct 2019 15:49:43 -0400 Subject: [PATCH 02/25] Revert previous workaround for C++14 compilation with HIP-clang. (#105) - That workaround is not required as the bug in clang is fixed. --- benchmark/benchmark_device_binary_search.cpp | 7 ------- benchmark/benchmark_device_merge.cpp | 7 ------- test/rocprim/test_block_radix_sort.cpp | 7 ------- test/rocprim/test_block_sort.cpp | 7 ------- test/rocprim/test_device_binary_search.cpp | 7 ------- test/rocprim/test_device_merge.cpp | 7 ------- test/rocprim/test_device_merge_sort.cpp | 7 ------- test/rocprim/test_device_radix_sort.cpp | 7 ------- test/rocprim/test_device_segmented_radix_sort.cpp | 7 ------- test/rocprim/test_warp_sort.cpp | 7 ------- 10 files changed, 70 deletions(-) diff --git a/benchmark/benchmark_device_binary_search.cpp b/benchmark/benchmark_device_binary_search.cpp index 271661291..0cce76b18 100644 --- a/benchmark/benchmark_device_binary_search.cpp +++ b/benchmark/benchmark_device_binary_search.cpp @@ -20,13 +20,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -// This is compatiblity code for hip-clang and will be removed in the future -// Please see https://github.com/ROCmSoftwarePlatform/rocPRIM/issues/100 -#if defined(__HIPCC__) && __HIP_DEVICE_COMPILE__ -#undef _GLIBCXX14_CONSTEXPR -#define _GLIBCXX14_CONSTEXPR -#endif // defined(__HIPCC__) && __HIP_DEVICE_COMPILE__ - #include #include #include diff --git a/benchmark/benchmark_device_merge.cpp b/benchmark/benchmark_device_merge.cpp index 0c354c83d..1a740964d 100644 --- a/benchmark/benchmark_device_merge.cpp +++ b/benchmark/benchmark_device_merge.cpp @@ -20,13 +20,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -// This is compatiblity code for hip-clang and will be removed in the future -// Please see https://github.com/ROCmSoftwarePlatform/rocPRIM/issues/100 -#if defined(__HIPCC__) && __HIP_DEVICE_COMPILE__ -#undef _GLIBCXX14_CONSTEXPR -#define _GLIBCXX14_CONSTEXPR -#endif // defined(__HIPCC__) && __HIP_DEVICE_COMPILE__ - #include #include #include diff --git a/test/rocprim/test_block_radix_sort.cpp b/test/rocprim/test_block_radix_sort.cpp index ac2fd4ae8..3c847b787 100644 --- a/test/rocprim/test_block_radix_sort.cpp +++ b/test/rocprim/test_block_radix_sort.cpp @@ -20,13 +20,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -// This is compatiblity code for hip-clang and will be removed in the future -// Please see https://github.com/ROCmSoftwarePlatform/rocPRIM/issues/100 -#if defined(__HIPCC__) && __HIP_DEVICE_COMPILE__ -#undef _GLIBCXX14_CONSTEXPR -#define _GLIBCXX14_CONSTEXPR -#endif // defined(__HIPCC__) && __HIP_DEVICE_COMPILE__ - #include #include #include diff --git a/test/rocprim/test_block_sort.cpp b/test/rocprim/test_block_sort.cpp index 5a2e7ce4e..53e9113ee 100644 --- a/test/rocprim/test_block_sort.cpp +++ b/test/rocprim/test_block_sort.cpp @@ -20,13 +20,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -// This is compatiblity code for hip-clang and will be removed in the future -// Please see https://github.com/ROCmSoftwarePlatform/rocPRIM/issues/100 -#if defined(__HIPCC__) && __HIP_DEVICE_COMPILE__ -#undef _GLIBCXX14_CONSTEXPR -#define _GLIBCXX14_CONSTEXPR -#endif // defined(__HIPCC__) && __HIP_DEVICE_COMPILE__ - #include #include #include diff --git a/test/rocprim/test_device_binary_search.cpp b/test/rocprim/test_device_binary_search.cpp index c6e99097a..cc7c25ec7 100644 --- a/test/rocprim/test_device_binary_search.cpp +++ b/test/rocprim/test_device_binary_search.cpp @@ -20,13 +20,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -// This is compatiblity code for hip-clang and will be removed in the future -// Please see https://github.com/ROCmSoftwarePlatform/rocPRIM/issues/100 -#if defined(__HIPCC__) && __HIP_DEVICE_COMPILE__ -#undef _GLIBCXX14_CONSTEXPR -#define _GLIBCXX14_CONSTEXPR -#endif // defined(__HIPCC__) && __HIP_DEVICE_COMPILE__ - #include #include #include diff --git a/test/rocprim/test_device_merge.cpp b/test/rocprim/test_device_merge.cpp index d63ee6fe6..d81a60030 100644 --- a/test/rocprim/test_device_merge.cpp +++ b/test/rocprim/test_device_merge.cpp @@ -20,13 +20,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -// This is compatiblity code for hip-clang and will be removed in the future -// Please see https://github.com/ROCmSoftwarePlatform/rocPRIM/issues/100 -#if defined(__HIPCC__) && __HIP_DEVICE_COMPILE__ -#undef _GLIBCXX14_CONSTEXPR -#define _GLIBCXX14_CONSTEXPR -#endif // defined(__HIPCC__) && __HIP_DEVICE_COMPILE__ - #include #include #include diff --git a/test/rocprim/test_device_merge_sort.cpp b/test/rocprim/test_device_merge_sort.cpp index 7ebddcb42..878d6a499 100644 --- a/test/rocprim/test_device_merge_sort.cpp +++ b/test/rocprim/test_device_merge_sort.cpp @@ -20,13 +20,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -// This is compatiblity code for hip-clang and will be removed in the future -// Please see https://github.com/ROCmSoftwarePlatform/rocPRIM/issues/100 -#if defined(__HIPCC__) && __HIP_DEVICE_COMPILE__ -#undef _GLIBCXX14_CONSTEXPR -#define _GLIBCXX14_CONSTEXPR -#endif // defined(__HIPCC__) && __HIP_DEVICE_COMPILE__ - #include #include #include diff --git a/test/rocprim/test_device_radix_sort.cpp b/test/rocprim/test_device_radix_sort.cpp index 4211202bb..0604d6418 100644 --- a/test/rocprim/test_device_radix_sort.cpp +++ b/test/rocprim/test_device_radix_sort.cpp @@ -20,13 +20,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -// This is compatiblity code for hip-clang and will be removed in the future -// Please see https://github.com/ROCmSoftwarePlatform/rocPRIM/issues/100 -#if defined(__HIPCC__) && __HIP_DEVICE_COMPILE__ -#undef _GLIBCXX14_CONSTEXPR -#define _GLIBCXX14_CONSTEXPR -#endif // defined(__HIPCC__) && __HIP_DEVICE_COMPILE__ - #include #include #include diff --git a/test/rocprim/test_device_segmented_radix_sort.cpp b/test/rocprim/test_device_segmented_radix_sort.cpp index 17dd3a2a1..cbe3e45fa 100644 --- a/test/rocprim/test_device_segmented_radix_sort.cpp +++ b/test/rocprim/test_device_segmented_radix_sort.cpp @@ -20,13 +20,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -// This is compatiblity code for hip-clang and will be removed in the future -// Please see https://github.com/ROCmSoftwarePlatform/rocPRIM/issues/100 -#if defined(__HIPCC__) && __HIP_DEVICE_COMPILE__ -#undef _GLIBCXX14_CONSTEXPR -#define _GLIBCXX14_CONSTEXPR -#endif // defined(__HIPCC__) && __HIP_DEVICE_COMPILE__ - #include #include #include diff --git a/test/rocprim/test_warp_sort.cpp b/test/rocprim/test_warp_sort.cpp index 3da5d8816..de455f454 100644 --- a/test/rocprim/test_warp_sort.cpp +++ b/test/rocprim/test_warp_sort.cpp @@ -20,13 +20,6 @@ // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -// This is compatiblity code for hip-clang and will be removed in the future -// Please see https://github.com/ROCmSoftwarePlatform/rocPRIM/issues/100 -#if defined(__HIPCC__) && __HIP_DEVICE_COMPILE__ -#undef _GLIBCXX14_CONSTEXPR -#define _GLIBCXX14_CONSTEXPR -#endif // defined(__HIPCC__) && __HIP_DEVICE_COMPILE__ - #include #include #include From d989fb0a56546b763c919afb01d02ac1e7766000 Mon Sep 17 00:00:00 2001 From: saadrahim <44449863+saadrahim@users.noreply.github.com> Date: Mon, 7 Oct 2019 16:22:57 -0600 Subject: [PATCH 03/25] Fixing SLES tests (#107) --- Jenkinsfile | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 55385e8ec..ff0485787 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -47,28 +47,15 @@ rocprimCI: { platform, project-> + String sudo = auxiliary.sudo(platform.jenkinsLabel) def testCommand = 'ctest --output-on-failure -E rocprim.hip.device_merge_sort' - def command - - if(platform.jenkinsLabel.contains('centos') || platform.jenkinsLabel.contains('sles')) - { - command = """#!/usr/bin/env bash + def command = """#!/usr/bin/env bash set -x cd ${project.paths.project_build_prefix} cd ${project.testDirectory} - LD_LIBRARY_PATH=/opt/rocm/hcc/lib sudo ${testCommand} + ${sudo} LD_LIBRARY_PATH=/opt/rocm/lib ${testCommand} """ - } - else - { - command = """#!/usr/bin/env bash - set -x - cd ${project.paths.project_build_prefix} - cd ${project.testDirectory} - LD_LIBRARY_PATH=/opt/rocm/hcc/lib ${testCommand} - """ - } platform.runCommand(this, command) } From 246d120d4a2765dca92f5b79e83a7cb6b5f75c02 Mon Sep 17 00:00:00 2001 From: saadrahim <44449863+saadrahim@users.noreply.github.com> Date: Mon, 7 Oct 2019 16:46:10 -0600 Subject: [PATCH 04/25] Adding gfx908 (#106) --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index ff0485787..a39dbf865 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -11,7 +11,7 @@ rocprimCI: def rocprim = new rocProject('rocPRIM') - def nodes = new dockerNodes(['gfx803 && centos7', 'ubuntu', 'gfx900 && centos7', 'gfx906 && centos7', 'sles'], rocprim) + def nodes = new dockerNodes(['gfx803 && centos7', 'ubuntu && gfx908', 'gfx900 && centos7', 'gfx906 && centos7', 'sles'], rocprim) boolean formatCheck = false From 3e8dba17edd16bc5dee79ddce41109cb273f3969 Mon Sep 17 00:00:00 2001 From: saadrahim <44449863+saadrahim@users.noreply.github.com> Date: Tue, 8 Oct 2019 11:22:47 -0600 Subject: [PATCH 05/25] Restoring test coverage as fix is in (#108) --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index a39dbf865..a536d2b65 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -48,7 +48,7 @@ rocprimCI: platform, project-> String sudo = auxiliary.sudo(platform.jenkinsLabel) - def testCommand = 'ctest --output-on-failure -E rocprim.hip.device_merge_sort' + def testCommand = 'ctest --output-on-failure' def command = """#!/usr/bin/env bash set -x From 22d413d53f181733d2b09020fc9dcc52c7d948c8 Mon Sep 17 00:00:00 2001 From: saadrahim <44449863+saadrahim@users.noreply.github.com> Date: Wed, 9 Oct 2019 13:38:15 -0600 Subject: [PATCH 06/25] Increamenting version number for rocm 2.10 release (#109) --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 918f1493d..cd587981f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,7 +73,7 @@ include(cmake/Dependencies.cmake) set(AMDGPU_TARGETS gfx803;gfx900;gfx906;gfx908 CACHE STRING "List of specific machine types for library to target") # Setup VERSION -set(VERSION_STRING "2.8.0") +set(VERSION_STRING "2.9.0") rocm_setup_version(VERSION ${VERSION_STRING}) # Print configuration summary From f76ae15cbdc863525fb5c7a24b9b8157bee4213a Mon Sep 17 00:00:00 2001 From: amdkila <47991923+amdkila@users.noreply.github.com> Date: Mon, 11 Nov 2019 19:26:33 -0700 Subject: [PATCH 07/25] Remove zypper update from dockerfile (#111) --- docker/dockerfile-build-sles | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/dockerfile-build-sles b/docker/dockerfile-build-sles index 9d74cb605..1ac006d83 100644 --- a/docker/dockerfile-build-sles +++ b/docker/dockerfile-build-sles @@ -13,7 +13,7 @@ ARG user_uid # * rocfft-test: gfortran, googletest # * rocfft-bench: libboost-program-options-dev # * libhsakmt.so: libnuma1 -RUN zypper -n update && zypper -n install\ +RUN zypper refresh && zypper -n install\ rock-dkms \ sudo \ ca-certificates \ From 1159103618039b213816eb7a307c680fbb4b8a67 Mon Sep 17 00:00:00 2001 From: Eiden Yoshida <47196116+eidenyoshida@users.noreply.github.com> Date: Fri, 29 Nov 2019 15:58:02 -0700 Subject: [PATCH 08/25] Remove rock-dkms from SLES docker (#114) --- docker/dockerfile-build-sles | 1 - 1 file changed, 1 deletion(-) diff --git a/docker/dockerfile-build-sles b/docker/dockerfile-build-sles index 1ac006d83..00afab40e 100644 --- a/docker/dockerfile-build-sles +++ b/docker/dockerfile-build-sles @@ -14,7 +14,6 @@ ARG user_uid # * rocfft-bench: libboost-program-options-dev # * libhsakmt.so: libnuma1 RUN zypper refresh && zypper -n install\ - rock-dkms \ sudo \ ca-certificates \ git \ From 31fb9cad5f249260f20c0cddbfcbda17174e3bb8 Mon Sep 17 00:00:00 2001 From: saadrahim <44449863+saadrahim@users.noreply.github.com> Date: Fri, 6 Dec 2019 14:11:29 -0700 Subject: [PATCH 09/25] Changing dependency package name (#115) * Changing dependency package name * Changing to rocm-dev for packgae dependency --- CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cd587981f..ab79cfc84 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -101,8 +101,8 @@ endif() # Package -set(CPACK_DEBIAN_PACKAGE_DEPENDS "hip_hcc (>= 1.5.18263)") # 1.5.18263 is HIP version in ROCm 1.8.2 -set(CPACK_RPM_PACKAGE_REQUIRES "hip_hcc >= 1.5.18263") +set(CPACK_DEBIAN_PACKAGE_DEPENDS "rocm-dev (>= 2.5.27)") # 1.5.18263 is HIP version in ROCm 1.8.2 +set(CPACK_RPM_PACKAGE_REQUIRES "rocm-dev (>= 2.5.27)") set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.txt") if(NOT CPACK_PACKAGING_INSTALL_PREFIX) set(CPACK_PACKAGING_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") @@ -113,5 +113,5 @@ set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "\${CPACK_PACKAGING_INSTALL_PR rocm_create_package( NAME rocprim DESCRIPTION "Radeon Open Compute Parallel Primitives Library" - MAINTAINER "Saad Rahim " + MAINTAINER "rocPRIM Maintainer " ) From b85751baa8f216a16dfca4fdd85c74b2674b18b6 Mon Sep 17 00:00:00 2001 From: saadrahim <44449863+saadrahim@users.noreply.github.com> Date: Fri, 6 Dec 2019 16:07:13 -0700 Subject: [PATCH 10/25] Fixing RHEL packaging for rocm-dev dependency (#116) * Fixing RHEL packaging for rocm-dev dependency * Fixing spaces --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ab79cfc84..02e846e75 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -101,8 +101,8 @@ endif() # Package -set(CPACK_DEBIAN_PACKAGE_DEPENDS "rocm-dev (>= 2.5.27)") # 1.5.18263 is HIP version in ROCm 1.8.2 -set(CPACK_RPM_PACKAGE_REQUIRES "rocm-dev (>= 2.5.27)") +set(CPACK_DEBIAN_PACKAGE_DEPENDS "rocm-dev (>= 2.5.27)") +set(CPACK_RPM_PACKAGE_REQUIRES "rocm-dev >= 2.5.27") set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.txt") if(NOT CPACK_PACKAGING_INSTALL_PREFIX) set(CPACK_PACKAGING_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}") From 88b03e36e13d17a7d3e07d954f59ba31b02f032d Mon Sep 17 00:00:00 2001 From: Eiden Yoshida <47196116+eidenyoshida@users.noreply.github.com> Date: Mon, 23 Dec 2019 13:38:30 -0700 Subject: [PATCH 11/25] Add sleeps to lookback_scan_state (#117) --- .../device/detail/lookback_scan_state.hpp | 48 ++++++++++--- .../rocprim/device/device_partition.hpp | 69 +++++++++++++++---- .../include/rocprim/device/device_scan.hpp | 66 +++++++++++++----- 3 files changed, 143 insertions(+), 40 deletions(-) diff --git a/rocprim/include/rocprim/device/detail/lookback_scan_state.hpp b/rocprim/include/rocprim/device/detail/lookback_scan_state.hpp index a1c5b1174..f76576ec3 100644 --- a/rocprim/include/rocprim/device/detail/lookback_scan_state.hpp +++ b/rocprim/include/rocprim/device/detail/lookback_scan_state.hpp @@ -33,6 +33,10 @@ #include "../../detail/various.hpp" #include "../../detail/binary_op_wrappers.hpp" +extern "C" +{ + void __builtin_amdgcn_s_sleep(int); +} BEGIN_ROCPRIM_NAMESPACE // Single pass prefix scan was implemented based on: @@ -58,12 +62,12 @@ enum prefix_flag // a look-back prefix scan. Initially every prefix can be either // invalid (padding values) or empty. One thread in a block should // later set it to partial, and later to complete. -template +template struct lookback_scan_state; // Packed flag and prefix value are loaded/stored in one atomic operation. -template -struct lookback_scan_state +template +struct lookback_scan_state { private: using flag_type_ = char; @@ -148,12 +152,25 @@ struct lookback_scan_state void get(const unsigned int block_id, flag_type& flag, T& value) { prefix_type prefix; - do + + const uint SLEEP_MAX = 32; + uint times_through = 1; + + prefix_underlying_type p = ::rocprim::detail::atomic_add(&prefixes[padding + block_id], 0); + __builtin_memcpy(&prefix, &p, sizeof(prefix_type)); + while(prefix.flag == PREFIX_EMPTY) { + if (UseSleep) + { + for (int j = 0; j < times_through; j++) + __builtin_amdgcn_s_sleep(1); + if (times_through < SLEEP_MAX) + times_through++; + } // atomic_add(..., 0) is used to load values atomically prefix_underlying_type p = ::rocprim::detail::atomic_add(&prefixes[padding + block_id], 0); __builtin_memcpy(&prefix, &p, sizeof(prefix_type)); - } while(prefix.flag == PREFIX_EMPTY); + } // return flag = prefix.flag; @@ -175,8 +192,8 @@ struct lookback_scan_state // Flag, partial and final prefixes are stored in separate arrays. // Consistency ensured by memory fences between flag and prefixes load/store operations. -template -struct lookback_scan_state +template +struct lookback_scan_state { private: static constexpr unsigned int padding = ::rocprim::warp_size(); @@ -247,11 +264,24 @@ struct lookback_scan_state ROCPRIM_DEVICE inline void get(const unsigned int block_id, flag_type& flag, T& value) { - do + const uint SLEEP_MAX = 32; + uint times_through = 1; + + flag = load_volatile(&prefixes_flags[padding + block_id]); + ::rocprim::detail::memory_fence_device(); + while(flag == PREFIX_EMPTY) { + if (UseSleep) + { + for (int j = 0; j < times_through; j++) + __builtin_amdgcn_s_sleep(1); + if (times_through < SLEEP_MAX) + times_through++; + } + flag = load_volatile(&prefixes_flags[padding + block_id]); ::rocprim::detail::memory_fence_device(); - } while(flag == PREFIX_EMPTY); + } if(flag == PREFIX_PARTIAL) value = load_volatile(&prefixes_partial_values[padding + block_id]); diff --git a/rocprim/include/rocprim/device/device_partition.hpp b/rocprim/include/rocprim/device/device_partition.hpp index db626901e..96c6e2301 100644 --- a/rocprim/include/rocprim/device/device_partition.hpp +++ b/rocprim/include/rocprim/device/device_partition.hpp @@ -143,7 +143,9 @@ hipError_t partition_impl(void * temporary_storage, >; using offset_scan_state_type = detail::lookback_scan_state; + using offset_scan_state_with_sleep_type = detail::lookback_scan_state; using ordered_block_id_type = detail::ordered_block_id; + constexpr unsigned int block_size = config::block_size; constexpr unsigned int items_per_thread = config::items_per_thread; @@ -153,6 +155,7 @@ hipError_t partition_impl(void * temporary_storage, // Calculate required temporary storage size_t offset_scan_state_bytes = ::rocprim::detail::align_size( + // This is valid even with offset_scan_state_with_sleep_type offset_scan_state_type::get_storage_size(number_of_blocks) ); size_t ordered_block_id_bytes = ordered_block_id_type::get_storage_size(); @@ -177,6 +180,9 @@ hipError_t partition_impl(void * temporary_storage, auto offset_scan_state = offset_scan_state_type::create( temporary_storage, number_of_blocks ); + auto offset_scan_state_with_sleep = offset_scan_state_with_sleep_type::create( + temporary_storage, number_of_blocks + ); // Create ad initialize ordered_block_id obj auto ptr = reinterpret_cast(temporary_storage); auto ordered_bid = ordered_block_id_type::create( @@ -185,25 +191,58 @@ hipError_t partition_impl(void * temporary_storage, if(debug_synchronous) start = std::chrono::high_resolution_clock::now(); auto grid_size = (number_of_blocks + block_size - 1)/block_size; - hipLaunchKernelGGL( - HIP_KERNEL_NAME(init_offset_scan_state_kernel), - dim3(grid_size), dim3(block_size), 0, stream, - offset_scan_state, number_of_blocks, ordered_bid - ); + + hipDeviceProp_t prop; + int deviceId; + hipGetDevice(&deviceId); + hipGetDeviceProperties(&prop, deviceId); + + if (prop.gcnArch == 908) + { + hipLaunchKernelGGL( + HIP_KERNEL_NAME(init_offset_scan_state_kernel), + dim3(grid_size), dim3(block_size), 0, stream, + offset_scan_state_with_sleep, number_of_blocks, ordered_bid + ); + } else + { + hipLaunchKernelGGL( + HIP_KERNEL_NAME(init_offset_scan_state_kernel), + dim3(grid_size), dim3(block_size), 0, stream, + offset_scan_state, number_of_blocks, ordered_bid + ); + } + + ROCPRIM_DETAIL_HIP_SYNC_AND_RETURN_ON_ERROR("init_offset_scan_state_kernel", size, start) if(debug_synchronous) start = std::chrono::high_resolution_clock::now(); grid_size = number_of_blocks; - hipLaunchKernelGGL( - HIP_KERNEL_NAME(partition_kernel< - SelectMethod, OnlySelected, config, - InputIterator, FlagIterator, OutputIterator, SelectedCountOutputIterator, - UnaryPredicate, decltype(inequality_op), offset_scan_state_type - >), - dim3(grid_size), dim3(block_size), 0, stream, - input, flags, output, selected_count_output, size, predicate, - inequality_op, offset_scan_state, number_of_blocks, ordered_bid - ); + if (prop.gcnArch == 908) + { + hipLaunchKernelGGL( + HIP_KERNEL_NAME(partition_kernel< + SelectMethod, OnlySelected, config, + InputIterator, FlagIterator, OutputIterator, SelectedCountOutputIterator, + UnaryPredicate, decltype(inequality_op), offset_scan_state_with_sleep_type + >), + dim3(grid_size), dim3(block_size), 0, stream, + input, flags, output, selected_count_output, size, predicate, + inequality_op, offset_scan_state_with_sleep, number_of_blocks, ordered_bid + ); + } else + { + hipLaunchKernelGGL( + HIP_KERNEL_NAME(partition_kernel< + SelectMethod, OnlySelected, config, + InputIterator, FlagIterator, OutputIterator, SelectedCountOutputIterator, + UnaryPredicate, decltype(inequality_op), offset_scan_state_type + >), + dim3(grid_size), dim3(block_size), 0, stream, + input, flags, output, selected_count_output, size, predicate, + inequality_op, offset_scan_state, number_of_blocks, ordered_bid + ); + } ROCPRIM_DETAIL_HIP_SYNC_AND_RETURN_ON_ERROR("partition_kernel", size, start) return hipSuccess; diff --git a/rocprim/include/rocprim/device/device_scan.hpp b/rocprim/include/rocprim/device/device_scan.hpp index c7a84bf1d..9bd28803b 100644 --- a/rocprim/include/rocprim/device/device_scan.hpp +++ b/rocprim/include/rocprim/device/device_scan.hpp @@ -325,6 +325,7 @@ auto scan_impl(void * temporary_storage, using config = Config; using scan_state_type = detail::lookback_scan_state; + using scan_state_with_sleep_type = detail::lookback_scan_state; using ordered_block_id_type = detail::ordered_block_id; constexpr unsigned int block_size = config::block_size; @@ -334,6 +335,7 @@ auto scan_impl(void * temporary_storage, // Calculate required temporary storage size_t scan_state_bytes = ::rocprim::detail::align_size( + // This is valid even with scan_state_with_sleep_type scan_state_type::get_storage_size(number_of_blocks) ); size_t ordered_block_id_bytes = ordered_block_id_type::get_storage_size(); @@ -358,34 +360,66 @@ auto scan_impl(void * temporary_storage, { // Create and initialize lookback_scan_state obj auto scan_state = scan_state_type::create(temporary_storage, number_of_blocks); + auto scan_state_with_sleep = scan_state_with_sleep_type::create(temporary_storage, number_of_blocks); // Create ad initialize ordered_block_id obj auto ptr = reinterpret_cast(temporary_storage); auto ordered_bid = ordered_block_id_type::create( reinterpret_cast(ptr + scan_state_bytes) ); + hipDeviceProp_t prop; + int deviceId; + hipGetDevice(&deviceId); + hipGetDeviceProperties(&prop, deviceId); + if(debug_synchronous) start = std::chrono::high_resolution_clock::now(); auto grid_size = (number_of_blocks + block_size - 1)/block_size; - hipLaunchKernelGGL( - HIP_KERNEL_NAME(init_lookback_scan_state_kernel), - dim3(grid_size), dim3(block_size), 0, stream, - scan_state, number_of_blocks, ordered_bid - ); + if (prop.gcnArch == 908) + { + hipLaunchKernelGGL( + HIP_KERNEL_NAME(init_lookback_scan_state_kernel), + dim3(grid_size), dim3(block_size), 0, stream, + scan_state_with_sleep, number_of_blocks, ordered_bid + ); + } else + { + hipLaunchKernelGGL( + HIP_KERNEL_NAME(init_lookback_scan_state_kernel), + dim3(grid_size), dim3(block_size), 0, stream, + scan_state, number_of_blocks, ordered_bid + ); + } ROCPRIM_DETAIL_HIP_SYNC_AND_RETURN_ON_ERROR("init_lookback_scan_state_kernel", size, start) if(debug_synchronous) start = std::chrono::high_resolution_clock::now(); grid_size = number_of_blocks; - hipLaunchKernelGGL( - HIP_KERNEL_NAME(lookback_scan_kernel< - Exclusive, // flag for exclusive scan operation - config, // kernel configuration (block size, ipt) - InputIterator, OutputIterator, - BinaryFunction, result_type, scan_state_type - >), - dim3(grid_size), dim3(block_size), 0, stream, - input, output, size, static_cast(initial_value), - scan_op, scan_state, number_of_blocks, ordered_bid - ); + if (prop.gcnArch == 908) + { + hipLaunchKernelGGL( + HIP_KERNEL_NAME(lookback_scan_kernel< + Exclusive, // flag for exclusive scan operation + config, // kernel configuration (block size, ipt) + InputIterator, OutputIterator, + BinaryFunction, result_type, scan_state_with_sleep_type + >), + dim3(grid_size), dim3(block_size), 0, stream, + input, output, size, static_cast(initial_value), + scan_op, scan_state_with_sleep, number_of_blocks, ordered_bid + ); + } else + { + hipLaunchKernelGGL( + HIP_KERNEL_NAME(lookback_scan_kernel< + Exclusive, // flag for exclusive scan operation + config, // kernel configuration (block size, ipt) + InputIterator, OutputIterator, + BinaryFunction, result_type, scan_state_type + >), + dim3(grid_size), dim3(block_size), 0, stream, + input, output, size, static_cast(initial_value), + scan_op, scan_state, number_of_blocks, ordered_bid + ); + } ROCPRIM_DETAIL_HIP_SYNC_AND_RETURN_ON_ERROR("lookback_scan_kernel", size, start) } else From 5fa0c79fbb199f83c5202d9e218311a78335a1f4 Mon Sep 17 00:00:00 2001 From: Pruthvi Madugundu Date: Wed, 8 Jan 2020 10:20:16 -0800 Subject: [PATCH 12/25] rocPRIM changes to support reloctable ROCM installation (#118) - New mode of building is added "-r,--relocatable" which is used for ROCm stack installed in /opt/rocm-ver. - Below CMAKE parameters are set/overwritten in above mode CMAKE_INSTALL_PREFIX CMAKE_MODULE_PATH CMAKE_PREFIX_PATH Signed-off-by: Pruthvi Madugundu --- install | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/install b/install index b99d32826..f8e684774 100755 --- a/install +++ b/install @@ -11,6 +11,7 @@ function display_help() echo " [-h|--help] prints this help message" echo " [-i|--install] install after build" echo " [-p]--package build package" + echo " [-r]--relocatable] create a package to support relocatable ROCm" #Not implemented yet # echo " [-d|--dependencies] install build dependencies" echo " [-c|--clients] build library clients too (combines with -i & -d)" @@ -28,7 +29,9 @@ build_clients=false build_release=true build_hip_clang=false run_tests=false -rocm_path=/opt/rocm/bin +rocm_path=/opt/rocm +build_relocatable=false + # ################################################# # Parameter parsing # ################################################# @@ -36,7 +39,7 @@ rocm_path=/opt/rocm/bin # check if we have a modern version of getopt that can handle whitespace and long parameters getopt -T if [[ $? -eq 4 ]]; then - GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,install,clients,debug,hip-clang,test,package --options hicdtp -- "$@") + GETOPT_PARSE=$(getopt --name "${0}" --longoptions help,install,clients,debug,hip-clang,test,package,relocatable --options hicdtpgr -- "$@") else echo "Need a new version of getopt" exit 1 @@ -62,7 +65,10 @@ while true; do -p|--package) build_package=true shift ;; - -c|--clients) + -r|--relocatable) + build_relocatable=true + shift ;; + -c|--clients) build_clients=true shift ;; -g|--debug) @@ -81,6 +87,11 @@ while true; do esac done +if [[ "${build_relocatable}" == true ]]; then + if ! [ -z ${ROCM_PATH+x} ]; then + rocm_path=${ROCM_PATH} + fi +fi # Instal the pre-commit hook bash ./githooks/install @@ -110,10 +121,17 @@ if [[ "${build_hip_clang}" == true ]]; then compiler="hipcc" fi +cmake_executable="cmake" if [ -e /etc/redhat-release ] ; then - CXX=$rocm_path/${compiler} cmake3 -DBUILD_BENCHMARK=ON ../../. # or cmake-gui ../. + cmake_executable="cmake3" +fi + +if [[ "${build_relocatable}" == true ]]; then + CXX=${rocm_path}/bin/${compiler} ${cmake_executable} -DCMAKE_INSTALL_PREFIX=${rocm_path} -DBUILD_BENCHMARK=ON \ + -DCMAKE_PREFIX_PATH="${rocm_path} ${rocm_path}/hcc ${rocm_path}/hip" \ + -DCMAKE_MODULE_PATH="${rocm_path}/hip/cmake" ../../. # or cmake-gui ../. else - CXX=$rocm_path/${compiler} cmake -DBUILD_BENCHMARK=ON ../../. # or cmake-gui ../. + CXX=${rocm_path}/bin/${compiler} ${cmake_executable} -DBUILD_BENCHMARK=ON ../../. # or cmake-gui ../. fi # Build From f1ab775c716973eaff5fbee70fe2c4403d6ab5b6 Mon Sep 17 00:00:00 2001 From: Adel Johar Date: Thu, 22 Aug 2019 10:11:46 +0200 Subject: [PATCH 13/25] Add half-overload for load/store-volatile --- rocprim/include/rocprim/detail/various.hpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/rocprim/include/rocprim/detail/various.hpp b/rocprim/include/rocprim/detail/various.hpp index 65973d37c..c646b062a 100644 --- a/rocprim/include/rocprim/detail/various.hpp +++ b/rocprim/include/rocprim/detail/various.hpp @@ -180,6 +180,12 @@ auto store_volatile(T * output, T value) } } +ROCPRIM_DEVICE inline +void store_volatile(half * output, half value) +{ + *reinterpret_cast(output) = value; +} + template ROCPRIM_DEVICE inline auto load_volatile(T * input) @@ -209,6 +215,13 @@ auto load_volatile(T * input) return retval; } +ROCPRIM_DEVICE inline +half load_volatile(half * input) +{ + half retval = *reinterpret_cast(input); + return retval; +} + // A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions template struct raw_storage From 9934192c1b89f0e9bfb13d409f24509863aa81e1 Mon Sep 17 00:00:00 2001 From: Istvan Kiss Date: Wed, 15 Jan 2020 17:09:43 +0100 Subject: [PATCH 14/25] Gitlab CI fix --- .gitlab-ci-gputest.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci-gputest.yml b/.gitlab-ci-gputest.yml index 6594d88fd..d73f09d71 100644 --- a/.gitlab-ci-gputest.yml +++ b/.gitlab-ci-gputest.yml @@ -13,6 +13,7 @@ test:s9300: tags: - s9300 - rocm + allow_failure: true test:mi25: extends: .test From 05ec32907f9725ab14e7ec408e64f0e109f98bf0 Mon Sep 17 00:00:00 2001 From: Andres Arpi Date: Tue, 21 Jan 2020 19:25:29 +0100 Subject: [PATCH 15/25] Fix compile warnings --- rocprim/include/rocprim/device/detail/lookback_scan_state.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rocprim/include/rocprim/device/detail/lookback_scan_state.hpp b/rocprim/include/rocprim/device/detail/lookback_scan_state.hpp index f76576ec3..af1124623 100644 --- a/rocprim/include/rocprim/device/detail/lookback_scan_state.hpp +++ b/rocprim/include/rocprim/device/detail/lookback_scan_state.hpp @@ -162,7 +162,7 @@ struct lookback_scan_state { if (UseSleep) { - for (int j = 0; j < times_through; j++) + for (uint j = 0; j < times_through; j++) __builtin_amdgcn_s_sleep(1); if (times_through < SLEEP_MAX) times_through++; @@ -273,7 +273,7 @@ struct lookback_scan_state { if (UseSleep) { - for (int j = 0; j < times_through; j++) + for (uint j = 0; j < times_through; j++) __builtin_amdgcn_s_sleep(1); if (times_through < SLEEP_MAX) times_through++; From 30e90e8598c59ad91b384dabb9e912816a4efea7 Mon Sep 17 00:00:00 2001 From: Andres Arpi Date: Tue, 21 Jan 2020 23:47:11 +0100 Subject: [PATCH 16/25] Use seeds --- test/rocprim/CMakeLists.txt | 4 +- test/rocprim/test_arg_index_iterator.cpp | 192 +-- test/rocprim/test_block_discontinuity.cpp | 447 +++--- test/rocprim/test_block_histogram.cpp | 118 +- test/rocprim/test_block_load_store.cpp | 348 +++-- test/rocprim/test_block_radix_sort.cpp | 292 ++-- test/rocprim/test_block_reduce.cpp | 358 +++-- test/rocprim/test_block_scan.cpp | 1322 +++++++++-------- test/rocprim/test_block_sort.cpp | 441 +++--- test/rocprim/test_constant_iterator.cpp | 93 +- test/rocprim/test_counting_iterator.cpp | 91 +- test/rocprim/test_device_binary_search.cpp | 546 +++---- test/rocprim/test_device_histogram.cpp | 966 ++++++------ test/rocprim/test_device_merge.cpp | 527 +++---- test/rocprim/test_device_merge_sort.cpp | 468 +++--- test/rocprim/test_device_partition.cpp | 488 +++--- test/rocprim/test_device_radix_sort.cpp | 882 +++++------ test/rocprim/test_device_reduce.cpp | 519 +++---- test/rocprim/test_device_reduce_by_key.cpp | 309 ++-- .../rocprim/test_device_run_length_encode.cpp | 504 ++++--- test/rocprim/test_device_scan.cpp | 796 +++++----- .../test_device_segmented_radix_sort.cpp | 1030 ++++++------- test/rocprim/test_device_segmented_reduce.cpp | 197 +-- test/rocprim/test_device_segmented_scan.cpp | 896 +++++------ test/rocprim/test_device_select.cpp | 655 ++++---- test/rocprim/test_device_transform.cpp | 258 ++-- test/rocprim/test_discard_iterator.cpp | 50 +- test/rocprim/test_intrinsics.cpp | 646 ++++---- test/rocprim/test_seed.hpp | 28 + test/rocprim/test_texture_cache_iterator.cpp | 122 +- test/rocprim/test_transform_iterator.cpp | 180 +-- test/rocprim/test_utils.hpp | 30 +- test/rocprim/test_warp_reduce.cpp | 905 +++++------ test/rocprim/test_warp_scan.cpp | 1012 +++++++------ test/rocprim/test_warp_sort.cpp | 282 ++-- test/rocprim/test_zip_iterator.cpp | 446 +++--- 36 files changed, 8611 insertions(+), 7837 deletions(-) create mode 100644 test/rocprim/test_seed.hpp diff --git a/test/rocprim/CMakeLists.txt b/test/rocprim/CMakeLists.txt index 7603e3d2e..ce043244a 100644 --- a/test/rocprim/CMakeLists.txt +++ b/test/rocprim/CMakeLists.txt @@ -51,7 +51,7 @@ endfunction() # **************************************************************************** # HIP basic test, which also checks if there are no linkage problems when there are multiple sources -add_rocprim_test("rocprim.basic_test" "test_basic.cpp;detail/get_rocprim_version.cpp") +add_rocprim_test("rocprim.basic_test" "test_basic.cpp;detail/get_rocprim_version.cpp") add_rocprim_test("rocprim.arg_index_iterator" test_arg_index_iterator.cpp) add_rocprim_test("rocprim.block_discontinuity" test_block_discontinuity.cpp) @@ -86,4 +86,4 @@ add_rocprim_test("rocprim.intrinsics" test_intrinsics.cpp) add_rocprim_test("rocprim.warp_reduce" test_warp_reduce.cpp) add_rocprim_test("rocprim.warp_scan" test_warp_scan.cpp) add_rocprim_test("rocprim.warp_sort" test_warp_sort.cpp) -add_rocprim_test("rocprim.zip_iterator" test_zip_iterator.cpp) +add_rocprim_test("rocprim.zip_iterator" test_zip_iterator.cpp) \ No newline at end of file diff --git a/test/rocprim/test_arg_index_iterator.cpp b/test/rocprim/test_arg_index_iterator.cpp index 5514618c7..aec08929c 100644 --- a/test/rocprim/test_arg_index_iterator.cpp +++ b/test/rocprim/test_arg_index_iterator.cpp @@ -66,23 +66,29 @@ TYPED_TEST(RocprimArgIndexIteratorTests, Equal) using T = typename TestFixture::input_type; using Iterator = typename rocprim::arg_index_iterator; - std::vector input = test_utils::get_random_data(5, 1, 200); - - Iterator x(input.data()); - Iterator y = x; - for(size_t i = 0; i < 5; i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - ASSERT_EQ(x[i].key, i); - ASSERT_EQ(x[i].value, input[i]); + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + std::vector input = test_utils::get_random_data(5, 1, 200, seed_value); + + Iterator x(input.data()); + Iterator y = x; + for(size_t i = 0; i < 5; i++) + { + ASSERT_EQ(x[i].key, i); + ASSERT_EQ(x[i].value, input[i]); + } + ASSERT_EQ(x[2].value, input[2]); + + x += 2; + for(size_t i = 0; i < 2; i++) + { + y++; + } + ASSERT_EQ(x, y); } - ASSERT_EQ(x[2].value, input[2]); - - x += 2; - for(size_t i = 0; i < 2; i++) - { - y++; - } - ASSERT_EQ(x, y); } struct arg_min @@ -112,79 +118,85 @@ TYPED_TEST(RocprimArgIndexIteratorTests, ReduceArgMinimum) hipStream_t stream = 0; // default - // Generate data - std::vector input = test_utils::get_random_data(size, 1, 200); - std::vector output(1); - - T * d_input; - key_value * d_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); - HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(key_value))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - Iterator d_iter(d_input); - - arg_min reduce_op; - const key_value max(std::numeric_limits::max(), std::numeric_limits::max()); - - // Calculate expected results on host - Iterator x(input.data()); - key_value expected = std::accumulate(x, x + size, max, reduce_op); - - // temp storage - size_t temp_storage_size_bytes; - void * d_temp_storage = nullptr; - // Get size of d_temp_storage - HIP_CHECK( - rocprim::reduce( - d_temp_storage, temp_storage_size_bytes, - d_iter, d_output, max, input.size(), - reduce_op, stream, debug_synchronous - ) - ); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0); - - // allocate temporary storage - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - rocprim::reduce( - d_temp_storage, temp_storage_size_bytes, - d_iter, d_output, max, input.size(), - reduce_op, stream, debug_synchronous - ) - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Copy output to host - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(key_value), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if output values are as expected - auto diff = std::max(std::abs(0.01f * expected.value), T(0.01f)); - if(std::is_integral::value) diff = 0; - ASSERT_EQ(output[0].key, expected.key); - ASSERT_NEAR(output[0].value, expected.value, diff); - - hipFree(d_input); - hipFree(d_output); - hipFree(d_temp_storage); + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + { + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector input = test_utils::get_random_data(size, 1, 200, seed_value); + std::vector output(1); + + T * d_input; + key_value * d_output; + HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); + HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(key_value))); + HIP_CHECK( + hipMemcpy( + d_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + Iterator d_iter(d_input); + + arg_min reduce_op; + const key_value max(std::numeric_limits::max(), std::numeric_limits::max()); + + // Calculate expected results on host + Iterator x(input.data()); + key_value expected = std::accumulate(x, x + size, max, reduce_op); + + // temp storage + size_t temp_storage_size_bytes; + void * d_temp_storage = nullptr; + // Get size of d_temp_storage + HIP_CHECK( + rocprim::reduce( + d_temp_storage, temp_storage_size_bytes, + d_iter, d_output, max, input.size(), + reduce_op, stream, debug_synchronous + ) + ); + + // temp_storage_size_bytes must be >0 + ASSERT_GT(temp_storage_size_bytes, 0); + + // allocate temporary storage + HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); + HIP_CHECK(hipDeviceSynchronize()); + + // Run + HIP_CHECK( + rocprim::reduce( + d_temp_storage, temp_storage_size_bytes, + d_iter, d_output, max, input.size(), + reduce_op, stream, debug_synchronous + ) + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Copy output to host + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(key_value), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Check if output values are as expected + auto diff = std::max(std::abs(0.01f * expected.value), T(0.01f)); + if(std::is_integral::value) diff = 0; + ASSERT_EQ(output[0].key, expected.key); + ASSERT_NEAR(output[0].value, expected.value, diff); + + hipFree(d_input); + hipFree(d_output); + hipFree(d_temp_storage); + } } diff --git a/test/rocprim/test_block_discontinuity.cpp b/test/rocprim/test_block_discontinuity.cpp index 7b133ac8a..36ad8f8c3 100644 --- a/test/rocprim/test_block_discontinuity.cpp +++ b/test/rocprim/test_block_discontinuity.cpp @@ -254,76 +254,83 @@ auto test_block_discontinuity() return; } - // Generate data - std::vector input = test_utils::get_random_data(size, 0, 10); - std::vector heads(size); - - // Calculate expected results on host - std::vector expected_heads(size); - flag_op_type flag_op; - for(size_t bi = 0; bi < size / items_per_block; bi++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - for(size_t ii = 0; ii < items_per_block; ii++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector input = test_utils::get_random_data(size, 0, 10, seed_value); + std::vector heads(size); + + // Calculate expected results on host + std::vector expected_heads(size); + flag_op_type flag_op; + for(size_t bi = 0; bi < size / items_per_block; bi++) { - const size_t i = bi * items_per_block + ii; - if(ii == 0) + for(size_t ii = 0; ii < items_per_block; ii++) { - expected_heads[i] = bi % 2 == 1 - ? apply(flag_op, input[i - 1], input[i], ii) - : flag_type(true); - } - else - { - expected_heads[i] = apply(flag_op, input[i - 1], input[i], ii); + const size_t i = bi * items_per_block + ii; + if(ii == 0) + { + expected_heads[i] = bi % 2 == 1 + ? apply(flag_op, input[i - 1], input[i], ii) + : flag_type(true); + } + else + { + expected_heads[i] = apply(flag_op, input[i - 1], input[i], ii); + } } } - } - // Preparing Device - type* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - long long* device_heads; - HIP_CHECK(hipMalloc(&device_heads, heads.size() * sizeof(typename decltype(heads)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(type), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - flag_heads_kernel< - type, flag_type, flag_op_type, - block_size, items_per_thread - > - ), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_heads - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Reading results - HIP_CHECK( - hipMemcpy( - heads.data(), device_heads, - heads.size() * sizeof(typename decltype(heads)::value_type), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(heads[i], expected_heads[i]); - } + // Preparing Device + type* device_input; + HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); + long long* device_heads; + HIP_CHECK(hipMalloc(&device_heads, heads.size() * sizeof(typename decltype(heads)::value_type))); + + HIP_CHECK( + hipMemcpy( + device_input, input.data(), + input.size() * sizeof(type), + hipMemcpyHostToDevice + ) + ); + + // Running kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + flag_heads_kernel< + type, flag_type, flag_op_type, + block_size, items_per_thread + > + ), + dim3(grid_size), dim3(block_size), 0, 0, + device_input, device_heads + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Reading results + HIP_CHECK( + hipMemcpy( + heads.data(), device_heads, + heads.size() * sizeof(typename decltype(heads)::value_type), + hipMemcpyDeviceToHost + ) + ); + + // Validating results + for(size_t i = 0; i < size; i++) + { + ASSERT_EQ(heads[i], expected_heads[i]); + } - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_heads)); + HIP_CHECK(hipFree(device_input)); + HIP_CHECK(hipFree(device_heads)); + } + } template< @@ -359,76 +366,83 @@ auto test_block_discontinuity() return; } - // Generate data - std::vector input = test_utils::get_random_data(size, 0, 10); - std::vector tails(size); - - // Calculate expected results on host - std::vector expected_tails(size); - flag_op_type flag_op; - for(size_t bi = 0; bi < size / items_per_block; bi++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - for(size_t ii = 0; ii < items_per_block; ii++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector input = test_utils::get_random_data(size, 0, 10, seed_value); + std::vector tails(size); + + // Calculate expected results on host + std::vector expected_tails(size); + flag_op_type flag_op; + for(size_t bi = 0; bi < size / items_per_block; bi++) { - const size_t i = bi * items_per_block + ii; - if(ii == items_per_block - 1) + for(size_t ii = 0; ii < items_per_block; ii++) { - expected_tails[i] = bi % 2 == 0 - ? apply(flag_op, input[i], input[i + 1], ii + 1) - : flag_type(true); - } - else - { - expected_tails[i] = apply(flag_op, input[i], input[i + 1], ii + 1); + const size_t i = bi * items_per_block + ii; + if(ii == items_per_block - 1) + { + expected_tails[i] = bi % 2 == 0 + ? apply(flag_op, input[i], input[i + 1], ii + 1) + : flag_type(true); + } + else + { + expected_tails[i] = apply(flag_op, input[i], input[i + 1], ii + 1); + } } } - } - // Preparing Device - type* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - long long* device_tails; - HIP_CHECK(hipMalloc(&device_tails, tails.size() * sizeof(typename decltype(tails)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(type), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - flag_tails_kernel< - type, flag_type, flag_op_type, - block_size, items_per_thread - > - ), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_tails - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Reading results - HIP_CHECK( - hipMemcpy( - tails.data(), device_tails, - tails.size() * sizeof(typename decltype(tails)::value_type), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(tails[i], expected_tails[i]); - } + // Preparing Device + type* device_input; + HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); + long long* device_tails; + HIP_CHECK(hipMalloc(&device_tails, tails.size() * sizeof(typename decltype(tails)::value_type))); + + HIP_CHECK( + hipMemcpy( + device_input, input.data(), + input.size() * sizeof(type), + hipMemcpyHostToDevice + ) + ); + + // Running kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + flag_tails_kernel< + type, flag_type, flag_op_type, + block_size, items_per_thread + > + ), + dim3(grid_size), dim3(block_size), 0, 0, + device_input, device_tails + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Reading results + HIP_CHECK( + hipMemcpy( + tails.data(), device_tails, + tails.size() * sizeof(typename decltype(tails)::value_type), + hipMemcpyDeviceToHost + ) + ); + + // Validating results + for(size_t i = 0; i < size; i++) + { + ASSERT_EQ(tails[i], expected_tails[i]); + } - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_tails)); + HIP_CHECK(hipFree(device_input)); + HIP_CHECK(hipFree(device_tails)); + } + } template< @@ -464,100 +478,107 @@ auto test_block_discontinuity() return; } - // Generate data - std::vector input = test_utils::get_random_data(size, 0, 10); - std::vector heads(size); - std::vector tails(size); - - // Calculate expected results on host - std::vector expected_heads(size); - std::vector expected_tails(size); - flag_op_type flag_op; - for(size_t bi = 0; bi < size / items_per_block; bi++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - for(size_t ii = 0; ii < items_per_block; ii++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector input = test_utils::get_random_data(size, 0, 10, seed_value); + std::vector heads(size); + std::vector tails(size); + + // Calculate expected results on host + std::vector expected_heads(size); + std::vector expected_tails(size); + flag_op_type flag_op; + for(size_t bi = 0; bi < size / items_per_block; bi++) { - const size_t i = bi * items_per_block + ii; - if(ii == 0) - { - expected_heads[i] = (bi % 4 == 1 || bi % 4 == 2) - ? apply(flag_op, input[i - 1], input[i], ii) - : flag_type(true); - } - else - { - expected_heads[i] = apply(flag_op, input[i - 1], input[i], ii); - } - if(ii == items_per_block - 1) + for(size_t ii = 0; ii < items_per_block; ii++) { - expected_tails[i] = (bi % 4 == 0 || bi % 4 == 1) - ? apply(flag_op, input[i], input[i + 1], ii + 1) - : flag_type(true); - } - else - { - expected_tails[i] = apply(flag_op, input[i], input[i + 1], ii + 1); + const size_t i = bi * items_per_block + ii; + if(ii == 0) + { + expected_heads[i] = (bi % 4 == 1 || bi % 4 == 2) + ? apply(flag_op, input[i - 1], input[i], ii) + : flag_type(true); + } + else + { + expected_heads[i] = apply(flag_op, input[i - 1], input[i], ii); + } + if(ii == items_per_block - 1) + { + expected_tails[i] = (bi % 4 == 0 || bi % 4 == 1) + ? apply(flag_op, input[i], input[i + 1], ii + 1) + : flag_type(true); + } + else + { + expected_tails[i] = apply(flag_op, input[i], input[i + 1], ii + 1); + } } } - } - // Preparing Device - type* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - long long* device_heads; - HIP_CHECK(hipMalloc(&device_heads, tails.size() * sizeof(typename decltype(heads)::value_type))); - long long* device_tails; - HIP_CHECK(hipMalloc(&device_tails, tails.size() * sizeof(typename decltype(tails)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(type), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - flag_heads_and_tails_kernel< - type, flag_type, flag_op_type, - block_size, items_per_thread - > - ), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_heads, device_tails - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Reading results - HIP_CHECK( - hipMemcpy( - heads.data(), device_heads, - heads.size() * sizeof(typename decltype(heads)::value_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - tails.data(), device_tails, - tails.size() * sizeof(typename decltype(tails)::value_type), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - for(size_t i = 0; i < size; i++) - { - ASSERT_EQ(heads[i], expected_heads[i]); - ASSERT_EQ(tails[i], expected_tails[i]); - } + // Preparing Device + type* device_input; + HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); + long long* device_heads; + HIP_CHECK(hipMalloc(&device_heads, tails.size() * sizeof(typename decltype(heads)::value_type))); + long long* device_tails; + HIP_CHECK(hipMalloc(&device_tails, tails.size() * sizeof(typename decltype(tails)::value_type))); + + HIP_CHECK( + hipMemcpy( + device_input, input.data(), + input.size() * sizeof(type), + hipMemcpyHostToDevice + ) + ); + + // Running kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + flag_heads_and_tails_kernel< + type, flag_type, flag_op_type, + block_size, items_per_thread + > + ), + dim3(grid_size), dim3(block_size), 0, 0, + device_input, device_heads, device_tails + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Reading results + HIP_CHECK( + hipMemcpy( + heads.data(), device_heads, + heads.size() * sizeof(typename decltype(heads)::value_type), + hipMemcpyDeviceToHost + ) + ); + + HIP_CHECK( + hipMemcpy( + tails.data(), device_tails, + tails.size() * sizeof(typename decltype(tails)::value_type), + hipMemcpyDeviceToHost + ) + ); + + // Validating results + for(size_t i = 0; i < size; i++) + { + ASSERT_EQ(heads[i], expected_heads[i]); + ASSERT_EQ(tails[i], expected_tails[i]); + } - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_heads)); - HIP_CHECK(hipFree(device_tails)); + HIP_CHECK(hipFree(device_input)); + HIP_CHECK(hipFree(device_heads)); + HIP_CHECK(hipFree(device_tails)); + } + } // Static for-loop diff --git a/test/rocprim/test_block_histogram.cpp b/test/rocprim/test_block_histogram.cpp index 69c382c07..d30a68d51 100644 --- a/test/rocprim/test_block_histogram.cpp +++ b/test/rocprim/test_block_histogram.cpp @@ -137,66 +137,74 @@ void test_block_histogram_input_arrays() const size_t size = items_per_block * 37; const size_t bin_sizes = bin * 37; const size_t grid_size = size / items_per_block; - // Generate data - std::vector output = test_utils::get_random_data(size, 0, bin - 1); - // Output histogram results - std::vector output_bin(bin_sizes, 0); - - // Calculate expected results on host - std::vector expected_bin(output_bin.size(), 0); - for(size_t i = 0; i < output.size() / items_per_block; i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - for(size_t j = 0; j < items_per_block; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector output = test_utils::get_random_data(size, 0, bin - 1, seed_value); + + // Output histogram results + std::vector output_bin(bin_sizes, 0); + + // Calculate expected results on host + std::vector expected_bin(output_bin.size(), 0); + for(size_t i = 0; i < output.size() / items_per_block; i++) { - auto bin_idx = i * bin; - auto idx = i * items_per_block + j; - expected_bin[bin_idx + static_cast(output[idx])]++; + for(size_t j = 0; j < items_per_block; j++) + { + auto bin_idx = i * bin; + auto idx = i * items_per_block + j; + expected_bin[bin_idx + static_cast(output[idx])]++; + } } - } - // Preparing device - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(T))); - T* device_output_bin; - HIP_CHECK(hipMalloc(&device_output_bin, output_bin.size() * sizeof(T))); - - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - HIP_CHECK( - hipMemcpy( - device_output_bin, output_bin.data(), - output_bin.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(histogram_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_output, device_output_bin - ); - - // Reading results back - HIP_CHECK( - hipMemcpy( - output_bin.data(), device_output_bin, - output_bin.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - test_utils::assert_eq(output_bin, expected_bin); - - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_bin)); + // Preparing device + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(T))); + T* device_output_bin; + HIP_CHECK(hipMalloc(&device_output_bin, output_bin.size() * sizeof(T))); + + HIP_CHECK( + hipMemcpy( + device_output, output.data(), + output.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + HIP_CHECK( + hipMemcpy( + device_output_bin, output_bin.data(), + output_bin.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + // Running kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(histogram_kernel), + dim3(grid_size), dim3(block_size), 0, 0, + device_output, device_output_bin + ); + + // Reading results back + HIP_CHECK( + hipMemcpy( + output_bin.data(), device_output_bin, + output_bin.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + test_utils::assert_eq(output_bin, expected_bin); + + HIP_CHECK(hipFree(device_output)); + HIP_CHECK(hipFree(device_output_bin)); + } + } // Static for-loop diff --git a/test/rocprim/test_block_load_store.cpp b/test/rocprim/test_block_load_store.cpp index 4a16d1171..5eb7fc19d 100644 --- a/test/rocprim/test_block_load_store.cpp +++ b/test/rocprim/test_block_load_store.cpp @@ -280,61 +280,68 @@ TYPED_TEST(RocprimBlockLoadStoreClassTests, LoadStoreClass) return; } - // Generate data - std::vector input = test_utils::get_random_data(size, -100, 100); - std::vector output(input.size(), 0); - - // Calculate expected results on host - std::vector expected(input.size(), 0); - for (size_t i = 0; i < 113; i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - size_t block_offset = i * items_per_block; - for (size_t j = 0; j < items_per_block; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector input = test_utils::get_random_data(size, -100, 100, seed_value); + std::vector output(input.size(), 0); + + // Calculate expected results on host + std::vector expected(input.size(), 0); + for (size_t i = 0; i < 113; i++) { - expected[j + block_offset] = input[j + block_offset]; + size_t block_offset = i * items_per_block; + for (size_t j = 0; j < items_per_block; j++) + { + expected[j + block_offset] = input[j + block_offset]; + } } + + // Preparing device + Type* device_input; + HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); + Type* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + + HIP_CHECK( + hipMemcpy( + device_input, input.data(), + input.size() * sizeof(typename decltype(input)::value_type), + hipMemcpyHostToDevice + ) + ); + + // Running kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + load_store_kernel< + Type, load_method, store_method, + block_size, items_per_thread + > + ), + dim3(grid_size), dim3(block_size), 0, 0, + device_input, device_output + ); + + // Reading results from device + HIP_CHECK( + hipMemcpy( + output.data(), device_output, + output.size() * sizeof(typename decltype(output)::value_type), + hipMemcpyDeviceToHost + ) + ); + + // Validating results + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); + + HIP_CHECK(hipFree(device_input)); + HIP_CHECK(hipFree(device_output)); } - // Preparing device - Type* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - Type* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(typename decltype(input)::value_type), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - load_store_kernel< - Type, load_method, store_method, - block_size, items_per_thread - > - ), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_output - ); - - // Reading results from device - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(typename decltype(output)::value_type), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); } template< @@ -372,73 +379,81 @@ TYPED_TEST(RocprimBlockLoadStoreClassTests, LoadStoreClassValid) } const size_t valid = items_per_block - 32; - // Generate data - std::vector input = test_utils::get_random_data(size, -100, 100); - std::vector output(input.size(), 0); - - // Calculate expected results on host - std::vector expected(input.size(), 0); - for (size_t i = 0; i < 113; i++) + + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - size_t block_offset = i * items_per_block; - for (size_t j = 0; j < items_per_block; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector input = test_utils::get_random_data(size, -100, 100, seed_value); + std::vector output(input.size(), 0); + + // Calculate expected results on host + std::vector expected(input.size(), 0); + for (size_t i = 0; i < 113; i++) { - if (j < valid) + size_t block_offset = i * items_per_block; + for (size_t j = 0; j < items_per_block; j++) { - expected[j + block_offset] = input[j + block_offset]; + if (j < valid) + { + expected[j + block_offset] = input[j + block_offset]; + } } } - } - // Preparing device - Type* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - Type* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(typename decltype(input)::value_type), - hipMemcpyHostToDevice - ) - ); - - // Have to initialize output for unvalid data to make sure they are not changed - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(typename decltype(output)::value_type), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - load_store_valid_kernel< - Type, load_method, store_method, - block_size, items_per_thread - > - ), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_output, valid - ); - - // Reading results from device - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(typename decltype(output)::value_type), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); + // Preparing device + Type* device_input; + HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); + Type* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + + HIP_CHECK( + hipMemcpy( + device_input, input.data(), + input.size() * sizeof(typename decltype(input)::value_type), + hipMemcpyHostToDevice + ) + ); + + // Have to initialize output for unvalid data to make sure they are not changed + HIP_CHECK( + hipMemcpy( + device_output, output.data(), + output.size() * sizeof(typename decltype(output)::value_type), + hipMemcpyHostToDevice + ) + ); + + // Running kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + load_store_valid_kernel< + Type, load_method, store_method, + block_size, items_per_thread + > + ), + dim3(grid_size), dim3(block_size), 0, 0, + device_input, device_output, valid + ); + + // Reading results from device + HIP_CHECK( + hipMemcpy( + output.data(), device_output, + output.size() * sizeof(typename decltype(output)::value_type), + hipMemcpyDeviceToHost + ) + ); + + // Validating results + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); + + HIP_CHECK(hipFree(device_input)); + HIP_CHECK(hipFree(device_output)); + } + } template< @@ -477,64 +492,71 @@ TYPED_TEST(RocprimBlockLoadStoreClassTests, LoadStoreClassDefault) const size_t valid = items_per_thread + 1; int _default = -1; - // Generate data - std::vector input = test_utils::get_random_data(size, -100, 100); - std::vector output(input.size(), 0); - - // Calculate expected results on host - std::vector expected(input.size(), _default); - for (size_t i = 0; i < 113; i++) + + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - size_t block_offset = i * items_per_block; - for (size_t j = 0; j < items_per_block; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector input = test_utils::get_random_data(size, -100, 100, seed_value); + std::vector output(input.size(), 0); + + // Calculate expected results on host + std::vector expected(input.size(), _default); + for (size_t i = 0; i < 113; i++) { - if (j < valid) + size_t block_offset = i * items_per_block; + for (size_t j = 0; j < items_per_block; j++) { - expected[j + block_offset] = input[j + block_offset]; + if (j < valid) + { + expected[j + block_offset] = input[j + block_offset]; + } } } - } - // Preparing device - Type* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - Type* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(typename decltype(input)::value_type), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - load_store_valid_default_kernel< - Type, load_method, store_method, - block_size, items_per_thread - > - ), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_output, valid, _default - ); - - // Reading results from device - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(typename decltype(output)::value_type), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); + // Preparing device + Type* device_input; + HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); + Type* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + + HIP_CHECK( + hipMemcpy( + device_input, input.data(), + input.size() * sizeof(typename decltype(input)::value_type), + hipMemcpyHostToDevice + ) + ); + + // Running kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + load_store_valid_default_kernel< + Type, load_method, store_method, + block_size, items_per_thread + > + ), + dim3(grid_size), dim3(block_size), 0, 0, + device_input, device_output, valid, _default + ); + + // Reading results from device + HIP_CHECK( + hipMemcpy( + output.data(), device_output, + output.size() * sizeof(typename decltype(output)::value_type), + hipMemcpyDeviceToHost + ) + ); + + // Validating results + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); + + HIP_CHECK(hipFree(device_input)); + HIP_CHECK(hipFree(device_output)); + } } TYPED_TEST(RocprimVectorizationTests, IsVectorizable) diff --git a/test/rocprim/test_block_radix_sort.cpp b/test/rocprim/test_block_radix_sort.cpp index 3c847b787..7f69420fb 100644 --- a/test/rocprim/test_block_radix_sort.cpp +++ b/test/rocprim/test_block_radix_sort.cpp @@ -227,64 +227,73 @@ auto test_block_radix_sort() const size_t size = items_per_block * 19; const size_t grid_size = size / items_per_block; - // Generate data - std::vector keys_output; - if(rp::is_floating_point::value) - { - keys_output = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000); - } - else + + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - keys_output = test_utils::get_random_data( - size, - std::numeric_limits::min(), - std::numeric_limits::max() + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector keys_output; + if(rp::is_floating_point::value) + { + keys_output = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000, seed_value); + } + else + { + keys_output = test_utils::get_random_data( + size, + std::numeric_limits::min(), + std::numeric_limits::max(), + seed_value + ); + } + + // Calculate expected results on host + std::vector expected(keys_output); + for(size_t i = 0; i < size / items_per_block; i++) + { + std::stable_sort( + expected.begin() + (i * items_per_block), + expected.begin() + ((i + 1) * items_per_block), + key_comparator() + ); + } + + // Preparing device + key_type* device_keys_output; + HIP_CHECK(hipMalloc(&device_keys_output, keys_output.size() * sizeof(key_type))); + + HIP_CHECK( + hipMemcpy( + device_keys_output, keys_output.data(), + keys_output.size() * sizeof(typename decltype(keys_output)::value_type), + hipMemcpyHostToDevice + ) ); - } - // Calculate expected results on host - std::vector expected(keys_output); - for(size_t i = 0; i < size / items_per_block; i++) - { - std::stable_sort( - expected.begin() + (i * items_per_block), - expected.begin() + ((i + 1) * items_per_block), - key_comparator() + // Running kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(sort_key_kernel), + dim3(grid_size), dim3(block_size), 0, 0, + device_keys_output, to_striped, descending, start_bit, end_bit ); - } - // Preparing device - key_type* device_keys_output; - HIP_CHECK(hipMalloc(&device_keys_output, keys_output.size() * sizeof(key_type))); - - HIP_CHECK( - hipMemcpy( - device_keys_output, keys_output.data(), - keys_output.size() * sizeof(typename decltype(keys_output)::value_type), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(sort_key_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_keys_output, to_striped, descending, start_bit, end_bit - ); - - // Getting results to host - HIP_CHECK( - hipMemcpy( - keys_output.data(), device_keys_output, - keys_output.size() * sizeof(typename decltype(keys_output)::value_type), - hipMemcpyDeviceToHost - ) - ); - - // Verifying results - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, expected)); - - HIP_CHECK(hipFree(device_keys_output)); + // Getting results to host + HIP_CHECK( + hipMemcpy( + keys_output.data(), device_keys_output, + keys_output.size() * sizeof(typename decltype(keys_output)::value_type), + hipMemcpyDeviceToHost + ) + ); + + // Verifying results + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, expected)); + + HIP_CHECK(hipFree(device_keys_output)); + } + } template< @@ -319,99 +328,108 @@ auto test_block_radix_sort() const size_t size = items_per_block * 19; const size_t grid_size = size / items_per_block; - // Generate data - std::vector keys_output; - if(rp::is_floating_point::value) - { - keys_output = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000); - } - else + + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - keys_output = test_utils::get_random_data( - size, - std::numeric_limits::min(), - std::numeric_limits::max() + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector keys_output; + if(rp::is_floating_point::value) + { + keys_output = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000, seed_value); + } + else + { + keys_output = test_utils::get_random_data( + size, + std::numeric_limits::min(), + std::numeric_limits::max(), + seed_index + ); + } + + std::vector values_output = test_utils::get_random_data(size, 0, 100, seed_value); + + using key_value = std::pair; + + // Calculate expected results on host + std::vector expected(size); + for(size_t i = 0; i < size; i++) + { + expected[i] = key_value(keys_output[i], values_output[i]); + } + + for(size_t i = 0; i < size / items_per_block; i++) + { + std::stable_sort( + expected.begin() + (i * items_per_block), + expected.begin() + ((i + 1) * items_per_block), + key_value_comparator() + ); + } + + std::vector keys_expected(size); + std::vector values_expected(size); + for(size_t i = 0; i < size; i++) + { + keys_expected[i] = expected[i].first; + values_expected[i] = expected[i].second; + } + + key_type* device_keys_output; + HIP_CHECK(hipMalloc(&device_keys_output, keys_output.size() * sizeof(key_type))); + value_type* device_values_output; + HIP_CHECK(hipMalloc(&device_values_output, values_output.size() * sizeof(value_type))); + + HIP_CHECK( + hipMemcpy( + device_keys_output, keys_output.data(), + keys_output.size() * sizeof(typename decltype(keys_output)::value_type), + hipMemcpyHostToDevice + ) ); - } - std::vector values_output = test_utils::get_random_data(size, 0, 100); + HIP_CHECK( + hipMemcpy( + device_values_output, values_output.data(), + values_output.size() * sizeof(typename decltype(values_output)::value_type), + hipMemcpyHostToDevice + ) + ); - using key_value = std::pair; + // Running kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(sort_key_value_kernel), + dim3(grid_size), dim3(block_size), 0, 0, + device_keys_output, device_values_output, to_striped, descending, start_bit, end_bit + ); - // Calculate expected results on host - std::vector expected(size); - for(size_t i = 0; i < size; i++) - { - expected[i] = key_value(keys_output[i], values_output[i]); - } + // Getting results to host + HIP_CHECK( + hipMemcpy( + keys_output.data(), device_keys_output, + keys_output.size() * sizeof(typename decltype(keys_output)::value_type), + hipMemcpyDeviceToHost + ) + ); - for(size_t i = 0; i < size / items_per_block; i++) - { - std::stable_sort( - expected.begin() + (i * items_per_block), - expected.begin() + ((i + 1) * items_per_block), - key_value_comparator() + HIP_CHECK( + hipMemcpy( + values_output.data(), device_values_output, + values_output.size() * sizeof(typename decltype(values_output)::value_type), + hipMemcpyDeviceToHost + ) ); - } - std::vector keys_expected(size); - std::vector values_expected(size); - for(size_t i = 0; i < size; i++) - { - keys_expected[i] = expected[i].first; - values_expected[i] = expected[i].second; - } + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, keys_expected)); + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(values_output, values_expected)); - key_type* device_keys_output; - HIP_CHECK(hipMalloc(&device_keys_output, keys_output.size() * sizeof(key_type))); - value_type* device_values_output; - HIP_CHECK(hipMalloc(&device_values_output, values_output.size() * sizeof(value_type))); - - HIP_CHECK( - hipMemcpy( - device_keys_output, keys_output.data(), - keys_output.size() * sizeof(typename decltype(keys_output)::value_type), - hipMemcpyHostToDevice - ) - ); - - HIP_CHECK( - hipMemcpy( - device_values_output, values_output.data(), - values_output.size() * sizeof(typename decltype(values_output)::value_type), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(sort_key_value_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_keys_output, device_values_output, to_striped, descending, start_bit, end_bit - ); - - // Getting results to host - HIP_CHECK( - hipMemcpy( - keys_output.data(), device_keys_output, - keys_output.size() * sizeof(typename decltype(keys_output)::value_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - values_output.data(), device_values_output, - values_output.size() * sizeof(typename decltype(values_output)::value_type), - hipMemcpyDeviceToHost - ) - ); - - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, keys_expected)); - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(values_output, values_expected)); - - HIP_CHECK(hipFree(device_keys_output)); - HIP_CHECK(hipFree(device_values_output)); + HIP_CHECK(hipFree(device_keys_output)); + HIP_CHECK(hipFree(device_values_output)); + } + } // Static for-loop diff --git a/test/rocprim/test_block_reduce.cpp b/test/rocprim/test_block_reduce.cpp index 3377db90e..cde4cc03e 100644 --- a/test/rocprim/test_block_reduce.cpp +++ b/test/rocprim/test_block_reduce.cpp @@ -133,41 +133,49 @@ TYPED_TEST(RocprimBlockReduceSingleValueTests, Reduce) const size_t size = block_size * 58; const size_t grid_size = size / block_size; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 50); - std::vector output_reductions(size / block_size); - - // Calculate expected results on host - std::vector expected_reductions(output_reductions.size(), 0); - binary_op_type binary_op; - for(size_t i = 0; i < output.size() / block_size; i++) + + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - T value = 0; - for(size_t j = 0; j < block_size; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector output = test_utils::get_random_data(size, 2, 50, seed_value); + std::vector output_reductions(size / block_size); + + // Calculate expected results on host + std::vector expected_reductions(output_reductions.size(), 0); + binary_op_type binary_op; + for(size_t i = 0; i < output.size() / block_size; i++) { - auto idx = i * block_size + j; - value = apply(binary_op, value, output[idx]); + T value = 0; + for(size_t j = 0; j < block_size; j++) + { + auto idx = i * block_size + j; + value = apply(binary_op, value, output[idx]); + } + expected_reductions[i] = value; } - expected_reductions[i] = value; - } - // Preparing device - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(T))); - T* device_output_reductions; - HIP_CHECK(hipMalloc(&device_output_reductions, output_reductions.size() * sizeof(T))); - - static_run_algo::run( - output, output_reductions, expected_reductions, - device_output, device_output_reductions, grid_size, false - ); - static_run_algo::run( - output, output_reductions, expected_reductions, - device_output, device_output_reductions, grid_size, false - ); - - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_reductions)); + // Preparing device + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(T))); + T* device_output_reductions; + HIP_CHECK(hipMalloc(&device_output_reductions, output_reductions.size() * sizeof(T))); + + static_run_algo::run( + output, output_reductions, expected_reductions, + device_output, device_output_reductions, grid_size, false + ); + static_run_algo::run( + output, output_reductions, expected_reductions, + device_output, device_output_reductions, grid_size, false + ); + + HIP_CHECK(hipFree(device_output)); + HIP_CHECK(hipFree(device_output_reductions)); + } + } TYPED_TEST(RocprimBlockReduceSingleValueTests, ReduceMultiplies) @@ -184,46 +192,54 @@ TYPED_TEST(RocprimBlockReduceSingleValueTests, ReduceMultiplies) const size_t size = block_size * 58; const size_t grid_size = size / block_size; - // Generate data - std::vector output(size, 1); - auto two_places = test_utils::get_random_data(size/32, 0, size-1); - for(auto i : two_places) - { - output[i] = T(2); - } - std::vector output_reductions(size / block_size); - // Calculate expected results on host - std::vector expected_reductions(output_reductions.size(), 0); - binary_op_type binary_op; - for(size_t i = 0; i < output.size() / block_size; i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - T value = 1; - for(size_t j = 0; j < block_size; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector output(size, 1); + auto two_places = test_utils::get_random_data(size/32, 0, size-1, seed_value); + for(auto i : two_places) { - auto idx = i * block_size + j; - value = apply(binary_op, value, output[idx]); + output[i] = T(2); } - expected_reductions[i] = value; - } + std::vector output_reductions(size / block_size); + + // Calculate expected results on host + std::vector expected_reductions(output_reductions.size(), 0); + binary_op_type binary_op; + for(size_t i = 0; i < output.size() / block_size; i++) + { + T value = 1; + for(size_t j = 0; j < block_size; j++) + { + auto idx = i * block_size + j; + value = apply(binary_op, value, output[idx]); + } + expected_reductions[i] = value; + } + + // Preparing device + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(T))); + T* device_output_reductions; + HIP_CHECK(hipMalloc(&device_output_reductions, output_reductions.size() * sizeof(T))); + + static_run_algo::run( + output, output_reductions, expected_reductions, + device_output, device_output_reductions, grid_size, true + ); + static_run_algo::run( + output, output_reductions, expected_reductions, + device_output, device_output_reductions, grid_size, true + ); - // Preparing device - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(T))); - T* device_output_reductions; - HIP_CHECK(hipMalloc(&device_output_reductions, output_reductions.size() * sizeof(T))); - - static_run_algo::run( - output, output_reductions, expected_reductions, - device_output, device_output_reductions, grid_size, true - ); - static_run_algo::run( - output, output_reductions, expected_reductions, - device_output, device_output_reductions, grid_size, true - ); - - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_reductions)); + HIP_CHECK(hipFree(device_output)); + HIP_CHECK(hipFree(device_output_reductions)); + } + } template< @@ -295,51 +311,59 @@ TYPED_TEST(RocprimBlockReduceSingleValueTests, ReduceValid) using T = typename TestFixture::input_type; using binary_op_type = typename std::conditional::value, test_utils::half_plus, rp::plus>::type; constexpr size_t block_size = TestFixture::block_size; - const unsigned int valid_items = test_utils::get_random_value(block_size - 10, block_size); - // Given block size not supported - if(block_size > test_utils::get_max_block_size()) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - return; - } + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - const size_t size = block_size * 58; - const size_t grid_size = size / block_size; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 50); - std::vector output_reductions(size / block_size); - - // Calculate expected results on host - std::vector expected_reductions(output_reductions.size(), 0); - binary_op_type binary_op; - for(size_t i = 0; i < output.size() / block_size; i++) - { - T value = 0; - for(size_t j = 0; j < valid_items; j++) + const unsigned int valid_items = test_utils::get_random_value(block_size - 10, block_size, seed_value); + + // Given block size not supported + if(block_size > test_utils::get_max_block_size()) { - auto idx = i * block_size + j; - value = apply(binary_op, value, output[idx]); + return; } - expected_reductions[i] = value; - } - // Preparing device - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(T))); - T* device_output_reductions; - HIP_CHECK(hipMalloc(&device_output_reductions, output_reductions.size() * sizeof(T))); - - static_run_valid::run( - output, output_reductions, expected_reductions, - device_output, device_output_reductions, valid_items, grid_size - ); - static_run_valid::run( - output, output_reductions, expected_reductions, - device_output, device_output_reductions, valid_items, grid_size - ); - - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_reductions)); + const size_t size = block_size * 58; + const size_t grid_size = size / block_size; + // Generate data + std::vector output = test_utils::get_random_data(size, 2, 50, seed_value); + std::vector output_reductions(size / block_size); + + // Calculate expected results on host + std::vector expected_reductions(output_reductions.size(), 0); + binary_op_type binary_op; + for(size_t i = 0; i < output.size() / block_size; i++) + { + T value = 0; + for(size_t j = 0; j < valid_items; j++) + { + auto idx = i * block_size + j; + value = apply(binary_op, value, output[idx]); + } + expected_reductions[i] = value; + } + + // Preparing device + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(T))); + T* device_output_reductions; + HIP_CHECK(hipMalloc(&device_output_reductions, output_reductions.size() * sizeof(T))); + + static_run_valid::run( + output, output_reductions, expected_reductions, + device_output, device_output_reductions, valid_items, grid_size + ); + static_run_valid::run( + output, output_reductions, expected_reductions, + device_output, device_output_reductions, valid_items, grid_size + ); + + HIP_CHECK(hipFree(device_output)); + HIP_CHECK(hipFree(device_output_reductions)); + } + } @@ -404,69 +428,77 @@ void test_block_reduce_input_arrays() const size_t items_per_block = block_size * items_per_thread; const size_t size = items_per_block * 19; const size_t grid_size = size / items_per_block; - // Generate data - std::vector output = test_utils::get_random_data(size, 0, 100); - // Output reduce results - std::vector output_reductions(size / block_size, 0); - - // Calculate expected results on host - std::vector expected_reductions(output_reductions.size(), 0); - binary_op_type binary_op; - for(size_t i = 0; i < output.size() / items_per_block; i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - T value = 0; - for(size_t j = 0; j < items_per_block; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector output = test_utils::get_random_data(size, 0, 100, seed_value); + + // Output reduce results + std::vector output_reductions(size / block_size, 0); + + // Calculate expected results on host + std::vector expected_reductions(output_reductions.size(), 0); + binary_op_type binary_op; + for(size_t i = 0; i < output.size() / items_per_block; i++) { - auto idx = i * items_per_block + j; - value = apply(binary_op, value, output[idx]); + T value = 0; + for(size_t j = 0; j < items_per_block; j++) + { + auto idx = i * items_per_block + j; + value = apply(binary_op, value, output[idx]); + } + expected_reductions[i] = value; } - expected_reductions[i] = value; - } - // Preparing device - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(T))); - T* device_output_reductions; - HIP_CHECK(hipMalloc(&device_output_reductions, output_reductions.size() * sizeof(T))); - - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - HIP_CHECK( - hipMemcpy( - device_output_reductions, output_reductions.data(), - output_reductions.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(reduce_array_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_output, device_output_reductions - ); - - // Reading results back - HIP_CHECK( - hipMemcpy( - output_reductions.data(), device_output_reductions, - output_reductions.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Verifying results - test_utils::assert_near(output_reductions, expected_reductions, 0.05); - - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_reductions)); + // Preparing device + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(T))); + T* device_output_reductions; + HIP_CHECK(hipMalloc(&device_output_reductions, output_reductions.size() * sizeof(T))); + + HIP_CHECK( + hipMemcpy( + device_output, output.data(), + output.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + HIP_CHECK( + hipMemcpy( + device_output_reductions, output_reductions.data(), + output_reductions.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + // Running kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(reduce_array_kernel), + dim3(grid_size), dim3(block_size), 0, 0, + device_output, device_output_reductions + ); + + // Reading results back + HIP_CHECK( + hipMemcpy( + output_reductions.data(), device_output_reductions, + output_reductions.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + // Verifying results + test_utils::assert_near(output_reductions, expected_reductions, 0.05); + + HIP_CHECK(hipFree(device_output)); + HIP_CHECK(hipFree(device_output_reductions)); + } + } // Static for-loop diff --git a/test/rocprim/test_block_scan.cpp b/test/rocprim/test_block_scan.cpp index e9d6ba6fe..622d2b48b 100644 --- a/test/rocprim/test_block_scan.cpp +++ b/test/rocprim/test_block_scan.cpp @@ -270,37 +270,45 @@ TYPED_TEST(RocprimBlockScanSingleValueTests, InclusiveScan) const size_t size = block_size * 58; const size_t grid_size = size / block_size; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 50); - std::vector output2 = output; - - // Calculate expected results on host - std::vector expected(output.size(), 0); - binary_op_type binary_op; - for(size_t i = 0; i < output.size() / block_size; i++) + + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - for(size_t j = 0; j < block_size; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector output = test_utils::get_random_data(size, 2, 50, seed_value); + std::vector output2 = output; + + // Calculate expected results on host + std::vector expected(output.size(), 0); + binary_op_type binary_op; + for(size_t i = 0; i < output.size() / block_size; i++) { - auto idx = i * block_size + j; - expected[idx] = apply(binary_op, output[idx], expected[j > 0 ? idx-1 : idx]); + for(size_t j = 0; j < block_size; j++) + { + auto idx = i * block_size + j; + expected[idx] = apply(binary_op, output[idx], expected[j > 0 ? idx-1 : idx]); + } } - } - // Writing to device memory - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + // Writing to device memory + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - static_run_algo::run( - output, output, expected, expected, - device_output, NULL, T(0), grid_size - ); + static_run_algo::run( + output, output, expected, expected, + device_output, NULL, T(0), grid_size + ); - static_run_algo::run( - output2, output2, expected, expected, - device_output, NULL, T(0), grid_size - ); + static_run_algo::run( + output2, output2, expected, expected, + device_output, NULL, T(0), grid_size + ); - HIP_CHECK(hipFree(device_output)); + HIP_CHECK(hipFree(device_output)); + } + } TYPED_TEST(RocprimBlockScanSingleValueTests, InclusiveScanReduce) @@ -317,48 +325,56 @@ TYPED_TEST(RocprimBlockScanSingleValueTests, InclusiveScanReduce) const size_t size = block_size * 58; const size_t grid_size = size / block_size; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 50); - std::vector output2 = output; - std::vector output_reductions(size / block_size); - - // Calculate expected results on host - std::vector expected(output.size(), 0); - std::vector expected_reductions(output_reductions.size(), 0); - binary_op_type binary_op; - for(size_t i = 0; i < output.size() / block_size; i++) + + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - for(size_t j = 0; j < block_size; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector output = test_utils::get_random_data(size, 2, 50, seed_value); + std::vector output2 = output; + std::vector output_reductions(size / block_size); + + // Calculate expected results on host + std::vector expected(output.size(), 0); + std::vector expected_reductions(output_reductions.size(), 0); + binary_op_type binary_op; + for(size_t i = 0; i < output.size() / block_size; i++) { - auto idx = i * block_size + j; - expected[idx] = apply(binary_op, output[idx], expected[j > 0 ? idx-1 : idx]); + for(size_t j = 0; j < block_size; j++) + { + auto idx = i * block_size + j; + expected[idx] = apply(binary_op, output[idx], expected[j > 0 ? idx-1 : idx]); + } + expected_reductions[i] = expected[(i+1) * block_size - 1]; } - expected_reductions[i] = expected[(i+1) * block_size - 1]; - } - // Writing to device memory - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - T* device_output_reductions; - HIP_CHECK( - hipMalloc( - &device_output_reductions, - output_reductions.size() * sizeof(typename decltype(output_reductions)::value_type) - ) - ); - - static_run_algo::run( - output, output_reductions, expected, expected_reductions, - device_output, device_output_reductions, T(0), grid_size - ); - - static_run_algo::run( - output2, output_reductions, expected, expected_reductions, - device_output, device_output_reductions, T(0), grid_size - ); - - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_reductions)); + // Writing to device memory + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + T* device_output_reductions; + HIP_CHECK( + hipMalloc( + &device_output_reductions, + output_reductions.size() * sizeof(typename decltype(output_reductions)::value_type) + ) + ); + + static_run_algo::run( + output, output_reductions, expected, expected_reductions, + device_output, device_output_reductions, T(0), grid_size + ); + + static_run_algo::run( + output2, output_reductions, expected, expected_reductions, + device_output, device_output_reductions, T(0), grid_size + ); + + HIP_CHECK(hipFree(device_output)); + HIP_CHECK(hipFree(device_output_reductions)); + } + } TYPED_TEST(RocprimBlockScanSingleValueTests, InclusiveScanPrefixCallback) @@ -375,50 +391,58 @@ TYPED_TEST(RocprimBlockScanSingleValueTests, InclusiveScanPrefixCallback) const size_t size = block_size * 58; const size_t grid_size = size / block_size; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 50); - std::vector output2 = output; - std::vector output_block_prefixes(size / block_size); - T block_prefix = test_utils::get_random_value(0, 5); - - // Calculate expected results on host - std::vector expected(output.size(), 0); - std::vector expected_block_prefixes(output_block_prefixes.size(), 0); - binary_op_type binary_op; - for(size_t i = 0; i < output.size() / block_size; i++) + + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - expected[i * block_size] = block_prefix; - for(size_t j = 0; j < block_size; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector output = test_utils::get_random_data(size, 2, 50, seed_value); + std::vector output2 = output; + std::vector output_block_prefixes(size / block_size); + T block_prefix = test_utils::get_random_value(0, 5, seed_value); + + // Calculate expected results on host + std::vector expected(output.size(), 0); + std::vector expected_block_prefixes(output_block_prefixes.size(), 0); + binary_op_type binary_op; + for(size_t i = 0; i < output.size() / block_size; i++) { - auto idx = i * block_size + j; - expected[idx] = apply(binary_op, output[idx], expected[j > 0 ? idx-1 : idx]); + expected[i * block_size] = block_prefix; + for(size_t j = 0; j < block_size; j++) + { + auto idx = i * block_size + j; + expected[idx] = apply(binary_op, output[idx], expected[j > 0 ? idx-1 : idx]); + } + expected_block_prefixes[i] = expected[(i+1) * block_size - 1]; } - expected_block_prefixes[i] = expected[(i+1) * block_size - 1]; - } - // Writing to device memory - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - T* device_output_bp; - HIP_CHECK( - hipMalloc( - &device_output_bp, - output_block_prefixes.size() * sizeof(typename decltype(output_block_prefixes)::value_type) - ) - ); - - static_run_algo::run( - output, output_block_prefixes, expected, expected_block_prefixes, - device_output, device_output_bp, block_prefix, grid_size - ); - - static_run_algo::run( - output2, output_block_prefixes, expected, expected_block_prefixes, - device_output, device_output_bp, block_prefix, grid_size - ); - - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_bp)); + // Writing to device memory + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + T* device_output_bp; + HIP_CHECK( + hipMalloc( + &device_output_bp, + output_block_prefixes.size() * sizeof(typename decltype(output_block_prefixes)::value_type) + ) + ); + + static_run_algo::run( + output, output_block_prefixes, expected, expected_block_prefixes, + device_output, device_output_bp, block_prefix, grid_size + ); + + static_run_algo::run( + output2, output_block_prefixes, expected, expected_block_prefixes, + device_output, device_output_bp, block_prefix, grid_size + ); + + HIP_CHECK(hipFree(device_output)); + HIP_CHECK(hipFree(device_output_bp)); + } + } TYPED_TEST(RocprimBlockScanSingleValueTests, ExclusiveScan) @@ -435,39 +459,47 @@ TYPED_TEST(RocprimBlockScanSingleValueTests, ExclusiveScan) const size_t size = block_size * 58; const size_t grid_size = size / block_size; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 50); - std::vector output2 = output; - const T init = test_utils::get_random_value(0, 5); - - // Calculate expected results on host - std::vector expected(output.size(), 0); - binary_op_type binary_op; - for(size_t i = 0; i < output.size() / block_size; i++) + + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - expected[i * block_size] = init; - for(size_t j = 1; j < block_size; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector output = test_utils::get_random_data(size, 2, 50, seed_value); + std::vector output2 = output; + const T init = test_utils::get_random_value(0, 5, seed_value); + + // Calculate expected results on host + std::vector expected(output.size(), 0); + binary_op_type binary_op; + for(size_t i = 0; i < output.size() / block_size; i++) { - auto idx = i * block_size + j; - expected[idx] = apply(binary_op, output[idx-1], expected[idx-1]); + expected[i * block_size] = init; + for(size_t j = 1; j < block_size; j++) + { + auto idx = i * block_size + j; + expected[idx] = apply(binary_op, output[idx-1], expected[idx-1]); + } } - } - // Writing to device memory - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + // Writing to device memory + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - static_run_algo::run( - output, output, expected, expected, - device_output, NULL, init, grid_size - ); + static_run_algo::run( + output, output, expected, expected, + device_output, NULL, init, grid_size + ); - static_run_algo::run( - output2, output2, expected, expected, - device_output, NULL, init, grid_size - ); + static_run_algo::run( + output2, output2, expected, expected, + device_output, NULL, init, grid_size + ); - HIP_CHECK(hipFree(device_output)); + HIP_CHECK(hipFree(device_output)); + } + } TYPED_TEST(RocprimBlockScanSingleValueTests, ExclusiveScanReduce) @@ -483,58 +515,66 @@ TYPED_TEST(RocprimBlockScanSingleValueTests, ExclusiveScanReduce) const size_t size = block_size * 58; const size_t grid_size = size / block_size; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 50); - std::vector output2 = output; - const T init = test_utils::get_random_value(0, 5); - - // Output reduce results - std::vector output_reductions(size / block_size); - - // Calculate expected results on host - std::vector expected(output.size(), 0); - std::vector expected_reductions(output_reductions.size(), 0); - binary_op_type binary_op; - for(size_t i = 0; i < output.size() / block_size; i++) + + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - expected[i * block_size] = init; - for(size_t j = 1; j < block_size; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector output = test_utils::get_random_data(size, 2, 50, seed_value); + std::vector output2 = output; + const T init = test_utils::get_random_value(0, 5, seed_value); + + // Output reduce results + std::vector output_reductions(size / block_size); + + // Calculate expected results on host + std::vector expected(output.size(), 0); + std::vector expected_reductions(output_reductions.size(), 0); + binary_op_type binary_op; + for(size_t i = 0; i < output.size() / block_size; i++) { - auto idx = i * block_size + j; - expected[idx] = apply(binary_op, output[idx-1], expected[idx-1]); + expected[i * block_size] = init; + for(size_t j = 1; j < block_size; j++) + { + auto idx = i * block_size + j; + expected[idx] = apply(binary_op, output[idx-1], expected[idx-1]); + } + + expected_reductions[i] = 0; + for(size_t j = 0; j < block_size; j++) + { + auto idx = i * block_size + j; + expected_reductions[i] = apply(binary_op, expected_reductions[i], output[idx]); + } } - expected_reductions[i] = 0; - for(size_t j = 0; j < block_size; j++) - { - auto idx = i * block_size + j; - expected_reductions[i] = apply(binary_op, expected_reductions[i], output[idx]); - } - } + // Writing to device memory + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + T* device_output_reductions; + HIP_CHECK( + hipMalloc( + &device_output_reductions, + output_reductions.size() * sizeof(typename decltype(output_reductions)::value_type) + ) + ); + + static_run_algo::run( + output, output_reductions, expected, expected_reductions, + device_output, device_output_reductions, init, grid_size + ); - // Writing to device memory - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - T* device_output_reductions; - HIP_CHECK( - hipMalloc( - &device_output_reductions, - output_reductions.size() * sizeof(typename decltype(output_reductions)::value_type) - ) - ); - - static_run_algo::run( - output, output_reductions, expected, expected_reductions, - device_output, device_output_reductions, init, grid_size - ); - - static_run_algo::run( - output2, output_reductions, expected, expected_reductions, - device_output, device_output_reductions, init, grid_size - ); - - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_reductions)); + static_run_algo::run( + output2, output_reductions, expected, expected_reductions, + device_output, device_output_reductions, init, grid_size + ); + + HIP_CHECK(hipFree(device_output)); + HIP_CHECK(hipFree(device_output_reductions)); + } + } TYPED_TEST(RocprimBlockScanSingleValueTests, ExclusiveScanPrefixCallback) @@ -551,56 +591,64 @@ TYPED_TEST(RocprimBlockScanSingleValueTests, ExclusiveScanPrefixCallback) const size_t size = block_size * 58; const size_t grid_size = size / block_size; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 50); - std::vector output2 = output; - std::vector output_block_prefixes(size / block_size); - T block_prefix = test_utils::get_random_value(0, 5); - - // Calculate expected results on host - std::vector expected(output.size(), 0); - std::vector expected_block_prefixes(output_block_prefixes.size(), 0); - binary_op_type binary_op; - for(size_t i = 0; i < output.size() / block_size; i++) + + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - expected[i * block_size] = block_prefix; - for(size_t j = 1; j < block_size; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector output = test_utils::get_random_data(size, 2, 50, seed_value); + std::vector output2 = output; + std::vector output_block_prefixes(size / block_size); + T block_prefix = test_utils::get_random_value(0, 5, seed_value); + + // Calculate expected results on host + std::vector expected(output.size(), 0); + std::vector expected_block_prefixes(output_block_prefixes.size(), 0); + binary_op_type binary_op; + for(size_t i = 0; i < output.size() / block_size; i++) { - auto idx = i * block_size + j; - expected[idx] = apply(binary_op, output[idx-1], expected[idx-1]); + expected[i * block_size] = block_prefix; + for(size_t j = 1; j < block_size; j++) + { + auto idx = i * block_size + j; + expected[idx] = apply(binary_op, output[idx-1], expected[idx-1]); + } + + expected_block_prefixes[i] = block_prefix; + for(size_t j = 0; j < block_size; j++) + { + auto idx = i * block_size + j; + expected_block_prefixes[i] = apply(binary_op, expected_block_prefixes[i], output[idx]); + } } - expected_block_prefixes[i] = block_prefix; - for(size_t j = 0; j < block_size; j++) - { - auto idx = i * block_size + j; - expected_block_prefixes[i] = apply(binary_op, expected_block_prefixes[i], output[idx]); - } - } + // Writing to device memory + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + T* device_output_bp; + HIP_CHECK( + hipMalloc( + &device_output_bp, + output_block_prefixes.size() * sizeof(typename decltype(output_block_prefixes)::value_type) + ) + ); + + static_run_algo::run( + output, output_block_prefixes, expected, expected_block_prefixes, + device_output, device_output_bp, block_prefix, grid_size + ); - // Writing to device memory - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - T* device_output_bp; - HIP_CHECK( - hipMalloc( - &device_output_bp, - output_block_prefixes.size() * sizeof(typename decltype(output_block_prefixes)::value_type) - ) - ); - - static_run_algo::run( - output, output_block_prefixes, expected, expected_block_prefixes, - device_output, device_output_bp, block_prefix, grid_size - ); - - static_run_algo::run( - output2, output_block_prefixes, expected, expected_block_prefixes, - device_output, device_output_bp, block_prefix, grid_size - ); - - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_bp)); + static_run_algo::run( + output2, output_block_prefixes, expected, expected_block_prefixes, + device_output, device_output_bp, block_prefix, grid_size + ); + + HIP_CHECK(hipFree(device_output)); + HIP_CHECK(hipFree(device_output_bp)); + } + } // --------------------------------------------------------- @@ -856,55 +904,63 @@ auto test_block_scan_input_arrays() const size_t items_per_block = block_size * items_per_thread; const size_t size = items_per_block * 19; const size_t grid_size = size / items_per_block; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 100); - // Calculate expected results on host - std::vector expected(output.size(), 0); - binary_op_type binary_op; - for(size_t i = 0; i < output.size() / items_per_block; i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - for(size_t j = 0; j < items_per_block; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector output = test_utils::get_random_data(size, 2, 100, seed_value); + + // Calculate expected results on host + std::vector expected(output.size(), 0); + binary_op_type binary_op; + for(size_t i = 0; i < output.size() / items_per_block; i++) { - auto idx = i * items_per_block + j; - expected[idx] = apply(binary_op, output[idx], expected[j > 0 ? idx-1 : idx]); + for(size_t j = 0; j < items_per_block; j++) + { + auto idx = i * items_per_block + j; + expected[idx] = apply(binary_op, output[idx], expected[j > 0 ? idx-1 : idx]); + } } - } - // Writing to device memory - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(inclusive_scan_array_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_output - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); - HIP_CHECK(hipFree(device_output)); + // Writing to device memory + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + + HIP_CHECK( + hipMemcpy( + device_output, output.data(), + output.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(inclusive_scan_array_kernel), + dim3(grid_size), dim3(block_size), 0, 0, + device_output + ); + + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Read from device memory + HIP_CHECK( + hipMemcpy( + output.data(), device_output, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + // Validating results + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); + HIP_CHECK(hipFree(device_output)); + } + } template< @@ -931,86 +987,94 @@ auto test_block_scan_input_arrays() const size_t items_per_block = block_size * items_per_thread; const size_t size = items_per_block * 19; const size_t grid_size = size / items_per_block; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 100); - - // Output reduce results - std::vector output_reductions(size / block_size, 0); - // Calculate expected results on host - std::vector expected(output.size(), 0); - std::vector expected_reductions(output_reductions.size(), 0); - binary_op_type binary_op; - for(size_t i = 0; i < output.size() / items_per_block; i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - for(size_t j = 0; j < items_per_block; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector output = test_utils::get_random_data(size, 2, 100, seed_value); + + // Output reduce results + std::vector output_reductions(size / block_size, 0); + + // Calculate expected results on host + std::vector expected(output.size(), 0); + std::vector expected_reductions(output_reductions.size(), 0); + binary_op_type binary_op; + for(size_t i = 0; i < output.size() / items_per_block; i++) { - auto idx = i * items_per_block + j; - expected[idx] = apply(binary_op, output[idx], expected[j > 0 ? idx-1 : idx]); + for(size_t j = 0; j < items_per_block; j++) + { + auto idx = i * items_per_block + j; + expected[idx] = apply(binary_op, output[idx], expected[j > 0 ? idx-1 : idx]); + } + expected_reductions[i] = expected[(i+1) * items_per_block - 1]; } - expected_reductions[i] = expected[(i+1) * items_per_block - 1]; - } - // Writing to device memory - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - T* device_output_reductions; - HIP_CHECK( - hipMalloc( - &device_output_reductions, - output_reductions.size() * sizeof(typename decltype(output_reductions)::value_type) - ) - ); - - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - HIP_CHECK( - hipMemcpy( - device_output_reductions, output_reductions.data(), - output_reductions.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(inclusive_scan_reduce_array_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_output, device_output_reductions - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - output_reductions.data(), device_output_reductions, - output_reductions.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output_reductions, expected_reductions)); - - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_reductions)); + // Writing to device memory + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + T* device_output_reductions; + HIP_CHECK( + hipMalloc( + &device_output_reductions, + output_reductions.size() * sizeof(typename decltype(output_reductions)::value_type) + ) + ); + + HIP_CHECK( + hipMemcpy( + device_output, output.data(), + output.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + HIP_CHECK( + hipMemcpy( + device_output_reductions, output_reductions.data(), + output_reductions.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(inclusive_scan_reduce_array_kernel), + dim3(grid_size), dim3(block_size), 0, 0, + device_output, device_output_reductions + ); + + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Read from device memory + HIP_CHECK( + hipMemcpy( + output.data(), device_output, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + HIP_CHECK( + hipMemcpy( + output_reductions.data(), device_output_reductions, + output_reductions.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + // Validating results + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output_reductions, expected_reductions)); + + HIP_CHECK(hipFree(device_output)); + HIP_CHECK(hipFree(device_output_reductions)); + } + } template< @@ -1037,88 +1101,96 @@ auto test_block_scan_input_arrays() const size_t items_per_block = block_size * items_per_thread; const size_t size = items_per_block * 19; const size_t grid_size = size / items_per_block; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 100); - std::vector output_block_prefixes(size / items_per_block, 0); - T block_prefix = test_utils::get_random_value(0, 100); - - // Calculate expected results on host - std::vector expected(output.size(), 0); - std::vector expected_block_prefixes(output_block_prefixes.size(), 0); - binary_op_type binary_op; - for(size_t i = 0; i < output.size() / items_per_block; i++) + + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - expected[i * items_per_block] = block_prefix; - for(size_t j = 0; j < items_per_block; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector output = test_utils::get_random_data(size, 2, 100, seed_value); + std::vector output_block_prefixes(size / items_per_block, 0); + T block_prefix = test_utils::get_random_value(0, 100, seed_value); + + // Calculate expected results on host + std::vector expected(output.size(), 0); + std::vector expected_block_prefixes(output_block_prefixes.size(), 0); + binary_op_type binary_op; + for(size_t i = 0; i < output.size() / items_per_block; i++) { - auto idx = i * items_per_block + j; - expected[idx] = apply(binary_op, output[idx], expected[j > 0 ? idx-1 : idx]); + expected[i * items_per_block] = block_prefix; + for(size_t j = 0; j < items_per_block; j++) + { + auto idx = i * items_per_block + j; + expected[idx] = apply(binary_op, output[idx], expected[j > 0 ? idx-1 : idx]); + } + expected_block_prefixes[i] = expected[(i+1) * items_per_block - 1]; } - expected_block_prefixes[i] = expected[(i+1) * items_per_block - 1]; - } - // Writing to device memory - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - T* device_output_bp; - HIP_CHECK( - hipMalloc( - &device_output_bp, - output_block_prefixes.size() * sizeof(typename decltype(output_block_prefixes)::value_type) - ) - ); - - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - HIP_CHECK( - hipMemcpy( - device_output_bp, output_block_prefixes.data(), - output_block_prefixes.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - inclusive_scan_array_prefix_callback_kernel - ), - dim3(grid_size), dim3(block_size), 0, 0, - device_output, device_output_bp, block_prefix - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - output_block_prefixes.data(), device_output_bp, - output_block_prefixes.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output_block_prefixes, expected_block_prefixes)); - - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_bp)); + // Writing to device memory + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + T* device_output_bp; + HIP_CHECK( + hipMalloc( + &device_output_bp, + output_block_prefixes.size() * sizeof(typename decltype(output_block_prefixes)::value_type) + ) + ); + + HIP_CHECK( + hipMemcpy( + device_output, output.data(), + output.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + HIP_CHECK( + hipMemcpy( + device_output_bp, output_block_prefixes.data(), + output_block_prefixes.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + inclusive_scan_array_prefix_callback_kernel + ), + dim3(grid_size), dim3(block_size), 0, 0, + device_output, device_output_bp, block_prefix + ); + + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Read from device memory + HIP_CHECK( + hipMemcpy( + output.data(), device_output, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + HIP_CHECK( + hipMemcpy( + output_block_prefixes.data(), device_output_bp, + output_block_prefixes.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + // Validating results + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output_block_prefixes, expected_block_prefixes)); + + HIP_CHECK(hipFree(device_output)); + HIP_CHECK(hipFree(device_output_bp)); + } + } template< @@ -1145,58 +1217,66 @@ auto test_block_scan_input_arrays() const size_t items_per_block = block_size * items_per_thread; const size_t size = items_per_block * 19; const size_t grid_size = size / items_per_block; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 100); - const T init = test_utils::get_random_value(0, 100); - - // Calculate expected results on host - std::vector expected(output.size(), 0); - binary_op_type binary_op; - for(size_t i = 0; i < output.size() / items_per_block; i++) + + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - expected[i * items_per_block] = init; - for(size_t j = 1; j < items_per_block; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector output = test_utils::get_random_data(size, 2, 100, seed_value); + const T init = test_utils::get_random_value(0, 100, seed_value); + + // Calculate expected results on host + std::vector expected(output.size(), 0); + binary_op_type binary_op; + for(size_t i = 0; i < output.size() / items_per_block; i++) { - auto idx = i * items_per_block + j; - expected[idx] = apply(binary_op, output[idx-1], expected[idx-1]); + expected[i * items_per_block] = init; + for(size_t j = 1; j < items_per_block; j++) + { + auto idx = i * items_per_block + j; + expected[idx] = apply(binary_op, output[idx-1], expected[idx-1]); + } } - } - // Writing to device memory - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(exclusive_scan_array_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_output, init - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); - - HIP_CHECK(hipFree(device_output)); + // Writing to device memory + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + + HIP_CHECK( + hipMemcpy( + device_output, output.data(), + output.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(exclusive_scan_array_kernel), + dim3(grid_size), dim3(block_size), 0, 0, + device_output, init + ); + + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Read from device memory + HIP_CHECK( + hipMemcpy( + output.data(), device_output, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + // Validating results + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); + + HIP_CHECK(hipFree(device_output)); + } + } template< @@ -1223,83 +1303,91 @@ auto test_block_scan_input_arrays() const size_t items_per_block = block_size * items_per_thread; const size_t size = items_per_block * 19; const size_t grid_size = size / items_per_block; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 100); - - // Output reduce results - std::vector output_reductions(size / items_per_block); - const T init = test_utils::get_random_value(0, 100); - - // Calculate expected results on host - std::vector expected(output.size(), 0); - std::vector expected_reductions(output_reductions.size(), 0); - binary_op_type binary_op; - for(size_t i = 0; i < output.size() / items_per_block; i++) + + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - expected[i * items_per_block] = init; - for(size_t j = 1; j < items_per_block; j++) - { - auto idx = i * items_per_block + j; - expected[idx] = apply(binary_op, output[idx-1], expected[idx-1]); - } - for(size_t j = 0; j < items_per_block; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector output = test_utils::get_random_data(size, 2, 100, seed_value); + + // Output reduce results + std::vector output_reductions(size / items_per_block); + const T init = test_utils::get_random_value(0, 100, seed_value); + + // Calculate expected results on host + std::vector expected(output.size(), 0); + std::vector expected_reductions(output_reductions.size(), 0); + binary_op_type binary_op; + for(size_t i = 0; i < output.size() / items_per_block; i++) { - auto idx = i * items_per_block + j; - expected_reductions[i] = apply(binary_op, expected_reductions[i], output[idx]); + expected[i * items_per_block] = init; + for(size_t j = 1; j < items_per_block; j++) + { + auto idx = i * items_per_block + j; + expected[idx] = apply(binary_op, output[idx-1], expected[idx-1]); + } + for(size_t j = 0; j < items_per_block; j++) + { + auto idx = i * items_per_block + j; + expected_reductions[i] = apply(binary_op, expected_reductions[i], output[idx]); + } } - } - // Writing to device memory - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - T* device_output_reductions; - HIP_CHECK( - hipMalloc( - &device_output_reductions, - output_reductions.size() * sizeof(typename decltype(output_reductions)::value_type) - ) - ); - - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - exclusive_scan_reduce_array_kernel - ), - dim3(grid_size), dim3(block_size), 0, 0, - device_output, device_output_reductions, init - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - output_reductions.data(), device_output_reductions, - output_reductions.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output_reductions, expected_reductions)); + // Writing to device memory + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + T* device_output_reductions; + HIP_CHECK( + hipMalloc( + &device_output_reductions, + output_reductions.size() * sizeof(typename decltype(output_reductions)::value_type) + ) + ); + + HIP_CHECK( + hipMemcpy( + device_output, output.data(), + output.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + exclusive_scan_reduce_array_kernel + ), + dim3(grid_size), dim3(block_size), 0, 0, + device_output, device_output_reductions, init + ); + + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Read from device memory + HIP_CHECK( + hipMemcpy( + output.data(), device_output, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + HIP_CHECK( + hipMemcpy( + output_reductions.data(), device_output_reductions, + output_reductions.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + // Validating results + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output_reductions, expected_reductions)); + } + } template< @@ -1326,85 +1414,93 @@ auto test_block_scan_input_arrays() const size_t items_per_block = block_size * items_per_thread; const size_t size = items_per_block * 19; const size_t grid_size = size / items_per_block; - // Generate data - std::vector output = test_utils::get_random_data(size, 2, 100); - std::vector output_block_prefixes(size / items_per_block); - T block_prefix = test_utils::get_random_value(0, 100); - - // Calculate expected results on host - std::vector expected(output.size(), 0); - std::vector expected_block_prefixes(output_block_prefixes.size(), 0); - binary_op_type binary_op; - for(size_t i = 0; i < output.size() / items_per_block; i++) + + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - expected[i * items_per_block] = block_prefix; - for(size_t j = 1; j < items_per_block; j++) - { - auto idx = i * items_per_block + j; - expected[idx] = apply(binary_op, output[idx-1], expected[idx-1]); - } - expected_block_prefixes[i] = block_prefix; - for(size_t j = 0; j < items_per_block; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector output = test_utils::get_random_data(size, 2, 100, seed_value); + std::vector output_block_prefixes(size / items_per_block); + T block_prefix = test_utils::get_random_value(0, 100, seed_value); + + // Calculate expected results on host + std::vector expected(output.size(), 0); + std::vector expected_block_prefixes(output_block_prefixes.size(), 0); + binary_op_type binary_op; + for(size_t i = 0; i < output.size() / items_per_block; i++) { - auto idx = i * items_per_block + j; - expected_block_prefixes[i] = apply(binary_op, expected_block_prefixes[i], output[idx]); + expected[i * items_per_block] = block_prefix; + for(size_t j = 1; j < items_per_block; j++) + { + auto idx = i * items_per_block + j; + expected[idx] = apply(binary_op, output[idx-1], expected[idx-1]); + } + expected_block_prefixes[i] = block_prefix; + for(size_t j = 0; j < items_per_block; j++) + { + auto idx = i * items_per_block + j; + expected_block_prefixes[i] = apply(binary_op, expected_block_prefixes[i], output[idx]); + } } - } - // Writing to device memory - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - T* device_output_bp; - HIP_CHECK( - hipMalloc( - &device_output_bp, - output_block_prefixes.size() * sizeof(typename decltype(output_block_prefixes)::value_type) - ) - ); - - HIP_CHECK( - hipMemcpy( - device_output, output.data(), - output.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME( - exclusive_scan_prefix_callback_array_kernel - ), - dim3(grid_size), dim3(block_size), 0, 0, - device_output, device_output_bp, block_prefix - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - output_block_prefixes.data(), device_output_bp, - output_block_prefixes.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output_block_prefixes, expected_block_prefixes)); - - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_bp)); + // Writing to device memory + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + T* device_output_bp; + HIP_CHECK( + hipMalloc( + &device_output_bp, + output_block_prefixes.size() * sizeof(typename decltype(output_block_prefixes)::value_type) + ) + ); + + HIP_CHECK( + hipMemcpy( + device_output, output.data(), + output.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME( + exclusive_scan_prefix_callback_array_kernel + ), + dim3(grid_size), dim3(block_size), 0, 0, + device_output, device_output_bp, block_prefix + ); + + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Read from device memory + HIP_CHECK( + hipMemcpy( + output.data(), device_output, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + HIP_CHECK( + hipMemcpy( + output_block_prefixes.data(), device_output_bp, + output_block_prefixes.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + // Validating results + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output_block_prefixes, expected_block_prefixes)); + + HIP_CHECK(hipFree(device_output)); + HIP_CHECK(hipFree(device_output_bp)); + } + } // Static for-loop diff --git a/test/rocprim/test_block_sort.cpp b/test/rocprim/test_block_sort.cpp index 53e9113ee..f7f54003a 100644 --- a/test/rocprim/test_block_sort.cpp +++ b/test/rocprim/test_block_sort.cpp @@ -72,52 +72,59 @@ TYPED_TEST(RocprimBlockSortTests, SortKey) const size_t size = block_size * 1134; const size_t grid_size = size / block_size; - // Generate data - std::vector output = test_utils::get_random_data(size, -100, 100); - - // Calculate expected results on host - std::vector expected(output); - binary_op_type binary_op; - for(size_t i = 0; i < output.size() / block_size; i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - std::sort( - expected.begin() + (i * block_size), - expected.begin() + ((i + 1) * block_size), - binary_op + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector output = test_utils::get_random_data(size, -100, 100, seed_value); + + // Calculate expected results on host + std::vector expected(output); + binary_op_type binary_op; + for(size_t i = 0; i < output.size() / block_size; i++) + { + std::sort( + expected.begin() + (i * block_size), + expected.begin() + ((i + 1) * block_size), + binary_op + ); + } + + // Preparing device + key_type * device_key_output; + HIP_CHECK(hipMalloc(&device_key_output, output.size() * sizeof(key_type))); + + HIP_CHECK( + hipMemcpy( + device_key_output, output.data(), + output.size() * sizeof(key_type), + hipMemcpyHostToDevice + ) ); - } - // Preparing device - key_type * device_key_output; - HIP_CHECK(hipMalloc(&device_key_output, output.size() * sizeof(key_type))); - - HIP_CHECK( - hipMemcpy( - device_key_output, output.data(), - output.size() * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(sort_key_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_key_output - ); - - // Reading results back - HIP_CHECK( - hipMemcpy( - output.data(), device_key_output, - output.size() * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); - - test_utils::assert_eq(output, expected); - - HIP_CHECK(hipFree(device_key_output)); + // Running kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(sort_key_kernel), + dim3(grid_size), dim3(block_size), 0, 0, + device_key_output + ); + + // Reading results back + HIP_CHECK( + hipMemcpy( + output.data(), device_key_output, + output.size() * sizeof(key_type), + hipMemcpyDeviceToHost + ) + ); + + test_utils::assert_eq(output, expected); + + HIP_CHECK(hipFree(device_key_output)); + } + } template @@ -160,96 +167,103 @@ TYPED_TEST(RocprimBlockSortTests, SortKeyValue) const size_t size = block_size * 1134; const size_t grid_size = size / block_size; - // Generate data - std::vector output_key = test_utils::get_random_data(size, 0, 100); - std::vector output_value = test_utils::get_random_data(size, -100, 100); + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + { + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector output_key = test_utils::get_random_data(size, 0, 100, seed_value); + std::vector output_value = test_utils::get_random_data(size, -100, 100, seed_value); + + // Combine vectors to form pairs with key and value + std::vector> target(size); + for (unsigned i = 0; i < target.size(); i++) + target[i] = std::make_pair(output_key[i], output_value[i]); + + // Calculate expected results on host + using key_value = std::pair; + std::vector expected(target); + for(size_t i = 0; i < expected.size() / block_size; i++) + { + std::sort( + expected.begin() + (i * block_size), + expected.begin() + ((i + 1) * block_size), + pair_comparator() + ); + } + + // Preparing device + key_type * device_key_output; + HIP_CHECK(hipMalloc(&device_key_output, output_key.size() * sizeof(key_type))); + value_type * device_value_output; + HIP_CHECK(hipMalloc(&device_value_output, output_value.size() * sizeof(value_type))); + + HIP_CHECK( + hipMemcpy( + device_key_output, output_key.data(), + output_key.size() * sizeof(key_type), + hipMemcpyHostToDevice + ) + ); - // Combine vectors to form pairs with key and value - std::vector> target(size); - for (unsigned i = 0; i < target.size(); i++) - target[i] = std::make_pair(output_key[i], output_value[i]); + HIP_CHECK( + hipMemcpy( + device_value_output, output_value.data(), + output_value.size() * sizeof(value_type), + hipMemcpyHostToDevice + ) + ); - // Calculate expected results on host - using key_value = std::pair; - std::vector expected(target); - for(size_t i = 0; i < expected.size() / block_size; i++) - { - std::sort( - expected.begin() + (i * block_size), - expected.begin() + ((i + 1) * block_size), - pair_comparator() + // Running kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(sort_key_value_kernel), + dim3(grid_size), dim3(block_size), 0, 0, + device_key_output, device_value_output ); - } - // Preparing device - key_type * device_key_output; - HIP_CHECK(hipMalloc(&device_key_output, output_key.size() * sizeof(key_type))); - value_type * device_value_output; - HIP_CHECK(hipMalloc(&device_value_output, output_value.size() * sizeof(value_type))); - - HIP_CHECK( - hipMemcpy( - device_key_output, output_key.data(), - output_key.size() * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - - HIP_CHECK( - hipMemcpy( - device_value_output, output_value.data(), - output_value.size() * sizeof(value_type), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(sort_key_value_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_key_output, device_value_output - ); - - // Reading results back - HIP_CHECK( - hipMemcpy( - output_key.data(), device_key_output, - output_key.size() * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - output_value.data(), device_value_output, - output_value.size() * sizeof(value_type), - hipMemcpyDeviceToHost - ) - ); - - std::vector expected_key(expected.size()); - std::vector expected_value(expected.size()); - for(size_t i = 0; i < expected.size(); i++) - { - expected_key[i] = expected[i].first; - expected_value[i] = expected[i].second; - } + // Reading results back + HIP_CHECK( + hipMemcpy( + output_key.data(), device_key_output, + output_key.size() * sizeof(key_type), + hipMemcpyDeviceToHost + ) + ); - // Keys are sorted, Values order not guaranteed - // Sort subsets where key was the same to make sure all values are still present - value_op_type value_op; - eq_op_type eq_op; - for (size_t i = 0; i < output_key.size();) - { - auto j = i; - for (; j < output_key.size() && eq_op(output_key[j], output_key[i]); ++j) { } - std::sort(output_value.begin() + i, output_value.begin() + j, value_op); - std::sort(expected_value.begin() + i, expected_value.begin() + j, value_op); - i = j; + HIP_CHECK( + hipMemcpy( + output_value.data(), device_value_output, + output_value.size() * sizeof(value_type), + hipMemcpyDeviceToHost + ) + ); + + std::vector expected_key(expected.size()); + std::vector expected_value(expected.size()); + for(size_t i = 0; i < expected.size(); i++) + { + expected_key[i] = expected[i].first; + expected_value[i] = expected[i].second; + } + + // Keys are sorted, Values order not guaranteed + // Sort subsets where key was the same to make sure all values are still present + value_op_type value_op; + eq_op_type eq_op; + for (size_t i = 0; i < output_key.size();) + { + auto j = i; + for (; j < output_key.size() && eq_op(output_key[j], output_key[i]); ++j) { } + std::sort(output_value.begin() + i, output_value.begin() + j, value_op); + std::sort(expected_value.begin() + i, expected_value.begin() + j, value_op); + i = j; + } + + test_utils::assert_eq(output_key, expected_key); + test_utils::assert_eq(output_value, expected_value); } - test_utils::assert_eq(output_key, expected_key); - test_utils::assert_eq(output_value, expected_value); } template @@ -289,94 +303,101 @@ TYPED_TEST(RocprimBlockSortTests, CustomSortKeyValue) const size_t size = block_size * 1134; const size_t grid_size = size / block_size; - // Generate data - std::vector output_key = test_utils::get_random_data(size, 0, 100); - std::vector output_value = test_utils::get_random_data(size, -100, 100); + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + { + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector output_key = test_utils::get_random_data(size, 0, 100, seed_value); + std::vector output_value = test_utils::get_random_data(size, -100, 100, seed_value); + + // Combine vectors to form pairs with key and value + std::vector> target(size); + for (unsigned i = 0; i < target.size(); i++) + target[i] = std::make_pair(output_key[i], output_value[i]); + + // Calculate expected results on host + using key_value = std::pair; + std::vector expected(target); + for(size_t i = 0; i < expected.size() / block_size; i++) + { + std::sort( + expected.begin() + (i * block_size), + expected.begin() + ((i + 1) * block_size), + key_value_comparator() + ); + } + + // Preparing device + key_type * device_key_output; + HIP_CHECK(hipMalloc(&device_key_output, output_key.size() * sizeof(key_type))); + value_type * device_value_output; + HIP_CHECK(hipMalloc(&device_value_output, output_value.size() * sizeof(value_type))); + + HIP_CHECK( + hipMemcpy( + device_key_output, output_key.data(), + output_key.size() * sizeof(key_type), + hipMemcpyHostToDevice + ) + ); - // Combine vectors to form pairs with key and value - std::vector> target(size); - for (unsigned i = 0; i < target.size(); i++) - target[i] = std::make_pair(output_key[i], output_value[i]); + HIP_CHECK( + hipMemcpy( + device_value_output, output_value.data(), + output_value.size() * sizeof(value_type), + hipMemcpyHostToDevice + ) + ); - // Calculate expected results on host - using key_value = std::pair; - std::vector expected(target); - for(size_t i = 0; i < expected.size() / block_size; i++) - { - std::sort( - expected.begin() + (i * block_size), - expected.begin() + ((i + 1) * block_size), - key_value_comparator() + // Running kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(custom_sort_key_value_kernel), + dim3(grid_size), dim3(block_size), 0, 0, + device_key_output, device_value_output ); - } - // Preparing device - key_type * device_key_output; - HIP_CHECK(hipMalloc(&device_key_output, output_key.size() * sizeof(key_type))); - value_type * device_value_output; - HIP_CHECK(hipMalloc(&device_value_output, output_value.size() * sizeof(value_type))); - - HIP_CHECK( - hipMemcpy( - device_key_output, output_key.data(), - output_key.size() * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - - HIP_CHECK( - hipMemcpy( - device_value_output, output_value.data(), - output_value.size() * sizeof(value_type), - hipMemcpyHostToDevice - ) - ); - - // Running kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(custom_sort_key_value_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_key_output, device_value_output - ); - - // Reading results back - HIP_CHECK( - hipMemcpy( - output_key.data(), device_key_output, - output_key.size() * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - output_value.data(), device_value_output, - output_value.size() * sizeof(value_type), - hipMemcpyDeviceToHost - ) - ); - - std::vector expected_key(expected.size()); - std::vector expected_value(expected.size()); - for(size_t i = 0; i < expected.size(); i++) - { - expected_key[i] = expected[i].first; - expected_value[i] = expected[i].second; - } + // Reading results back + HIP_CHECK( + hipMemcpy( + output_key.data(), device_key_output, + output_key.size() * sizeof(key_type), + hipMemcpyDeviceToHost + ) + ); - // Keys are sorted, Values order not guaranteed - // Sort subsets where key was the same to make sure all values are still present - value_op_type value_op; - eq_op_type eq_op; - for (size_t i = 0; i < output_key.size();) - { - auto j = i; - for (; j < output_key.size() && eq_op(output_key[j], output_key[i]); ++j) { } - std::sort(output_value.begin() + i, output_value.begin() + j, value_op); - std::sort(expected_value.begin() + i, expected_value.begin() + j, value_op); - i = j; - } + HIP_CHECK( + hipMemcpy( + output_value.data(), device_value_output, + output_value.size() * sizeof(value_type), + hipMemcpyDeviceToHost + ) + ); - test_utils::assert_eq(output_key, expected_key); - test_utils::assert_eq(output_value, expected_value); + std::vector expected_key(expected.size()); + std::vector expected_value(expected.size()); + for(size_t i = 0; i < expected.size(); i++) + { + expected_key[i] = expected[i].first; + expected_value[i] = expected[i].second; + } + + // Keys are sorted, Values order not guaranteed + // Sort subsets where key was the same to make sure all values are still present + value_op_type value_op; + eq_op_type eq_op; + for (size_t i = 0; i < output_key.size();) + { + auto j = i; + for (; j < output_key.size() && eq_op(output_key[j], output_key[i]); ++j) { } + std::sort(output_value.begin() + i, output_value.begin() + j, value_op); + std::sort(expected_value.begin() + i, expected_value.begin() + j, value_op); + i = j; + } + + test_utils::assert_eq(output_key, expected_key); + test_utils::assert_eq(output_value, expected_value); + } + } diff --git a/test/rocprim/test_constant_iterator.cpp b/test/rocprim/test_constant_iterator.cpp index 9fc93dba5..21e4ad732 100644 --- a/test/rocprim/test_constant_iterator.cpp +++ b/test/rocprim/test_constant_iterator.cpp @@ -83,51 +83,58 @@ TYPED_TEST(RocprimConstantIteratorTests, Transform) hipStream_t stream = 0; // default - // Create constant_iterator with random starting point - const auto value = test_utils::get_random_value(0, 200); - Iterator input_begin(value); - - std::vector output(size); - T * d_output; - HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(T))); - HIP_CHECK(hipDeviceSynchronize()); - - // Calculate expected results on host - std::vector expected(size, transform()(value)); - - // Run - HIP_CHECK( - rocprim::transform( - input_begin, d_output, size, - transform(), stream, debug_synchronous - ) - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Copy output to host - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Validating results - for(size_t i = 0; i < output.size(); i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - if(std::is_integral::value) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Create constant_iterator with random starting point + const auto value = test_utils::get_random_value(0, 200, seed_value); + Iterator input_begin(value); + + std::vector output(size); + T * d_output; + HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(T))); + HIP_CHECK(hipDeviceSynchronize()); + + // Calculate expected results on host + std::vector expected(size, transform()(value)); + + // Run + HIP_CHECK( + rocprim::transform( + input_begin, d_output, size, + transform(), stream, debug_synchronous + ) + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Copy output to host + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Validating results + for(size_t i = 0; i < output.size(); i++) { - ASSERT_EQ(output[i], expected[i]) << "where index = " << i; + if(std::is_integral::value) + { + ASSERT_EQ(output[i], expected[i]) << "where index = " << i; + } + else if(std::is_floating_point::value) + { + auto tolerance = std::max(std::abs(0.1f * expected[i]), T(0.01f)); + ASSERT_NEAR(output[i], expected[i], tolerance) << "where index = " << i; + } } - else if(std::is_floating_point::value) - { - auto tolerance = std::max(std::abs(0.1f * expected[i]), T(0.01f)); - ASSERT_NEAR(output[i], expected[i], tolerance) << "where index = " << i; - } - } - hipFree(d_output); + hipFree(d_output); + } + } diff --git a/test/rocprim/test_counting_iterator.cpp b/test/rocprim/test_counting_iterator.cpp index bb1350362..cd108ef79 100644 --- a/test/rocprim/test_counting_iterator.cpp +++ b/test/rocprim/test_counting_iterator.cpp @@ -83,48 +83,55 @@ TYPED_TEST(RocprimCountingIteratorTests, Transform) hipStream_t stream = 0; // default - // Create counting_iterator with random starting point - Iterator input_begin(test_utils::get_random_value(0, 200)); - - std::vector output(size); - T * d_output; - HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(T))); - HIP_CHECK(hipDeviceSynchronize()); - - // Calculate expected results on host - std::vector expected(size); - std::transform( - input_begin, - input_begin + size, - expected.begin(), - transform() - ); - - // Run - HIP_CHECK( - rocprim::transform( - input_begin, d_output, size, - transform(), stream, debug_synchronous - ) - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Copy output to host - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Validating results - for(size_t i = 0; i < output.size(); i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - ASSERT_EQ(output[i], expected[i]) << "where index = " << i; + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Create counting_iterator with random starting point + Iterator input_begin(test_utils::get_random_value(0, 200, seed_value)); + + std::vector output(size); + T * d_output; + HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(T))); + HIP_CHECK(hipDeviceSynchronize()); + + // Calculate expected results on host + std::vector expected(size); + std::transform( + input_begin, + input_begin + size, + expected.begin(), + transform() + ); + + // Run + HIP_CHECK( + rocprim::transform( + input_begin, d_output, size, + transform(), stream, debug_synchronous + ) + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Copy output to host + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Validating results + for(size_t i = 0; i < output.size(); i++) + { + ASSERT_EQ(output[i], expected[i]) << "where index = " << i; + } + + hipFree(d_output); } - - hipFree(d_output); + } diff --git a/test/rocprim/test_device_binary_search.cpp b/test/rocprim/test_device_binary_search.cpp index cc7c25ec7..763a8b407 100644 --- a/test/rocprim/test_device_binary_search.cpp +++ b/test/rocprim/test_device_binary_search.cpp @@ -76,10 +76,10 @@ typedef ::testing::Types< TYPED_TEST_CASE(RocprimDeviceBinarySearch, Params); -std::vector get_sizes() +std::vector get_sizes(int seed_value) { std::vector sizes = { 1, 10, 53, 211, 1024, 2345, 4096, 34567, (1 << 16) - 1220, (1 << 22) - 76543 }; - const std::vector random_sizes = test_utils::get_random_data(5, 1, 100000); + const std::vector random_sizes = test_utils::get_random_data(5, 1, 100000, seed_value); sizes.insert(sizes.end(), random_sizes.begin(), random_sizes.end()); return sizes; } @@ -97,97 +97,105 @@ TYPED_TEST(RocprimDeviceBinarySearch, LowerBound) compare_op_type compare_op; - for(size_t size : get_sizes()) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - SCOPED_TRACE(testing::Message() << "with size = " << size); - - const size_t haystack_size = size; - const size_t needles_size = std::sqrt(size); - const size_t d = haystack_size / 100; - - // Generate data - std::vector haystack = test_utils::get_random_data( - haystack_size, 0, haystack_size + 2 * d - ); - std::sort(haystack.begin(), haystack.end(), compare_op); - - // Use a narrower range for needles for checking out-of-haystack cases - std::vector needles = test_utils::get_random_data( - needles_size, d, haystack_size + d - ); - - haystack_type * d_haystack; - needle_type * d_needles; - output_type * d_output; - HIP_CHECK(hipMalloc(&d_haystack, haystack_size * sizeof(haystack_type))); - HIP_CHECK(hipMalloc(&d_needles, needles_size * sizeof(needle_type))); - HIP_CHECK(hipMalloc(&d_output, needles_size * sizeof(output_type))); - HIP_CHECK( - hipMemcpy( - d_haystack, haystack.data(), - haystack_size * sizeof(haystack_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - d_needles, needles.data(), - needles_size * sizeof(needle_type), - hipMemcpyHostToDevice - ) - ); - - // Calculate expected results on host - std::vector expected(needles_size); - for(size_t i = 0; i < needles_size; i++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + for(size_t size : get_sizes(seed_value)) { - expected[i] = - std::lower_bound(haystack.begin(), haystack.end(), needles[i], compare_op) - - haystack.begin(); + SCOPED_TRACE(testing::Message() << "with size = " << size); + + const size_t haystack_size = size; + const size_t needles_size = std::sqrt(size); + const size_t d = haystack_size / 100; + + // Generate data + std::vector haystack = test_utils::get_random_data( + haystack_size, 0, haystack_size + 2 * d, seed_value + ); + std::sort(haystack.begin(), haystack.end(), compare_op); + + // Use a narrower range for needles for checking out-of-haystack cases + std::vector needles = test_utils::get_random_data( + needles_size, d, haystack_size + d, seed_value + ); + + haystack_type * d_haystack; + needle_type * d_needles; + output_type * d_output; + HIP_CHECK(hipMalloc(&d_haystack, haystack_size * sizeof(haystack_type))); + HIP_CHECK(hipMalloc(&d_needles, needles_size * sizeof(needle_type))); + HIP_CHECK(hipMalloc(&d_output, needles_size * sizeof(output_type))); + HIP_CHECK( + hipMemcpy( + d_haystack, haystack.data(), + haystack_size * sizeof(haystack_type), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK( + hipMemcpy( + d_needles, needles.data(), + needles_size * sizeof(needle_type), + hipMemcpyHostToDevice + ) + ); + + // Calculate expected results on host + std::vector expected(needles_size); + for(size_t i = 0; i < needles_size; i++) + { + expected[i] = + std::lower_bound(haystack.begin(), haystack.end(), needles[i], compare_op) - + haystack.begin(); + } + + void * d_temporary_storage = nullptr; + size_t temporary_storage_bytes; + HIP_CHECK( + rocprim::lower_bound( + d_temporary_storage, temporary_storage_bytes, + d_haystack, d_needles, d_output, + haystack_size, needles_size, + compare_op, + stream, debug_synchronous + ) + ); + + ASSERT_GT(temporary_storage_bytes, 0); + + HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); + + HIP_CHECK( + rocprim::lower_bound( + d_temporary_storage, temporary_storage_bytes, + d_haystack, d_needles, d_output, + haystack_size, needles_size, + compare_op, + stream, debug_synchronous + ) + ); + + std::vector output(needles_size); + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + needles_size * sizeof(output_type), + hipMemcpyDeviceToHost + ) + ); + + HIP_CHECK(hipFree(d_temporary_storage)); + HIP_CHECK(hipFree(d_haystack)); + HIP_CHECK(hipFree(d_needles)); + HIP_CHECK(hipFree(d_output)); + + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); } - - void * d_temporary_storage = nullptr; - size_t temporary_storage_bytes; - HIP_CHECK( - rocprim::lower_bound( - d_temporary_storage, temporary_storage_bytes, - d_haystack, d_needles, d_output, - haystack_size, needles_size, - compare_op, - stream, debug_synchronous - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0); - - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - HIP_CHECK( - rocprim::lower_bound( - d_temporary_storage, temporary_storage_bytes, - d_haystack, d_needles, d_output, - haystack_size, needles_size, - compare_op, - stream, debug_synchronous - ) - ); - - std::vector output(needles_size); - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - needles_size * sizeof(output_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - HIP_CHECK(hipFree(d_haystack)); - HIP_CHECK(hipFree(d_needles)); - HIP_CHECK(hipFree(d_output)); - - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); } + + } TYPED_TEST(RocprimDeviceBinarySearch, UpperBound) @@ -203,97 +211,105 @@ TYPED_TEST(RocprimDeviceBinarySearch, UpperBound) compare_op_type compare_op; - for(size_t size : get_sizes()) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - SCOPED_TRACE(testing::Message() << "with size = " << size); - - const size_t haystack_size = size; - const size_t needles_size = std::sqrt(size); - const size_t d = haystack_size / 100; - - // Generate data - std::vector haystack = test_utils::get_random_data( - haystack_size, 0, haystack_size + 2 * d - ); - std::sort(haystack.begin(), haystack.end(), compare_op); - - // Use a narrower range for needles for checking out-of-haystack cases - std::vector needles = test_utils::get_random_data( - needles_size, d, haystack_size + d - ); - - haystack_type * d_haystack; - needle_type * d_needles; - output_type * d_output; - HIP_CHECK(hipMalloc(&d_haystack, haystack_size * sizeof(haystack_type))); - HIP_CHECK(hipMalloc(&d_needles, needles_size * sizeof(needle_type))); - HIP_CHECK(hipMalloc(&d_output, needles_size * sizeof(output_type))); - HIP_CHECK( - hipMemcpy( - d_haystack, haystack.data(), - haystack_size * sizeof(haystack_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - d_needles, needles.data(), - needles_size * sizeof(needle_type), - hipMemcpyHostToDevice - ) - ); - - // Calculate expected results on host - std::vector expected(needles_size); - for(size_t i = 0; i < needles_size; i++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + for(size_t size : get_sizes(seed_value)) { - expected[i] = - std::upper_bound(haystack.begin(), haystack.end(), needles[i], compare_op) - - haystack.begin(); + SCOPED_TRACE(testing::Message() << "with size = " << size); + + const size_t haystack_size = size; + const size_t needles_size = std::sqrt(size); + const size_t d = haystack_size / 100; + + // Generate data + std::vector haystack = test_utils::get_random_data( + haystack_size, 0, haystack_size + 2 * d, seed_value + ); + std::sort(haystack.begin(), haystack.end(), compare_op); + + // Use a narrower range for needles for checking out-of-haystack cases + std::vector needles = test_utils::get_random_data( + needles_size, d, haystack_size + d, seed_value + ); + + haystack_type * d_haystack; + needle_type * d_needles; + output_type * d_output; + HIP_CHECK(hipMalloc(&d_haystack, haystack_size * sizeof(haystack_type))); + HIP_CHECK(hipMalloc(&d_needles, needles_size * sizeof(needle_type))); + HIP_CHECK(hipMalloc(&d_output, needles_size * sizeof(output_type))); + HIP_CHECK( + hipMemcpy( + d_haystack, haystack.data(), + haystack_size * sizeof(haystack_type), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK( + hipMemcpy( + d_needles, needles.data(), + needles_size * sizeof(needle_type), + hipMemcpyHostToDevice + ) + ); + + // Calculate expected results on host + std::vector expected(needles_size); + for(size_t i = 0; i < needles_size; i++) + { + expected[i] = + std::upper_bound(haystack.begin(), haystack.end(), needles[i], compare_op) - + haystack.begin(); + } + + void * d_temporary_storage = nullptr; + size_t temporary_storage_bytes; + HIP_CHECK( + rocprim::upper_bound( + d_temporary_storage, temporary_storage_bytes, + d_haystack, d_needles, d_output, + haystack_size, needles_size, + compare_op, + stream, debug_synchronous + ) + ); + + ASSERT_GT(temporary_storage_bytes, 0); + + HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); + + HIP_CHECK( + rocprim::upper_bound( + d_temporary_storage, temporary_storage_bytes, + d_haystack, d_needles, d_output, + haystack_size, needles_size, + compare_op, + stream, debug_synchronous + ) + ); + + std::vector output(needles_size); + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + needles_size * sizeof(output_type), + hipMemcpyDeviceToHost + ) + ); + + HIP_CHECK(hipFree(d_temporary_storage)); + HIP_CHECK(hipFree(d_haystack)); + HIP_CHECK(hipFree(d_needles)); + HIP_CHECK(hipFree(d_output)); + + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); } - - void * d_temporary_storage = nullptr; - size_t temporary_storage_bytes; - HIP_CHECK( - rocprim::upper_bound( - d_temporary_storage, temporary_storage_bytes, - d_haystack, d_needles, d_output, - haystack_size, needles_size, - compare_op, - stream, debug_synchronous - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0); - - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - HIP_CHECK( - rocprim::upper_bound( - d_temporary_storage, temporary_storage_bytes, - d_haystack, d_needles, d_output, - haystack_size, needles_size, - compare_op, - stream, debug_synchronous - ) - ); - - std::vector output(needles_size); - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - needles_size * sizeof(output_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - HIP_CHECK(hipFree(d_haystack)); - HIP_CHECK(hipFree(d_needles)); - HIP_CHECK(hipFree(d_output)); - - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); } + + } TYPED_TEST(RocprimDeviceBinarySearch, BinarySearch) @@ -309,93 +325,99 @@ TYPED_TEST(RocprimDeviceBinarySearch, BinarySearch) compare_op_type compare_op; - for(size_t size : get_sizes()) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - SCOPED_TRACE(testing::Message() << "with size = " << size); - - const size_t haystack_size = size; - const size_t needles_size = std::sqrt(size); - const size_t d = haystack_size / 100; - - // Generate data - std::vector haystack = test_utils::get_random_data( - haystack_size, 0, haystack_size + 2 * d - ); - std::sort(haystack.begin(), haystack.end(), compare_op); - - // Use a narrower range for needles for checking out-of-haystack cases - std::vector needles = test_utils::get_random_data( - needles_size, d, haystack_size + d - ); - - haystack_type * d_haystack; - needle_type * d_needles; - output_type * d_output; - HIP_CHECK(hipMalloc(&d_haystack, haystack_size * sizeof(haystack_type))); - HIP_CHECK(hipMalloc(&d_needles, needles_size * sizeof(needle_type))); - HIP_CHECK(hipMalloc(&d_output, needles_size * sizeof(output_type))); - HIP_CHECK( - hipMemcpy( - d_haystack, haystack.data(), - haystack_size * sizeof(haystack_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - d_needles, needles.data(), - needles_size * sizeof(needle_type), - hipMemcpyHostToDevice - ) - ); - - // Calculate expected results on host - std::vector expected(needles_size); - for(size_t i = 0; i < needles_size; i++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + for(size_t size : get_sizes(seed_value)) { - expected[i] = std::binary_search(haystack.begin(), haystack.end(), needles[i], compare_op); + SCOPED_TRACE(testing::Message() << "with size = " << size); + + const size_t haystack_size = size; + const size_t needles_size = std::sqrt(size); + const size_t d = haystack_size / 100; + + // Generate data + std::vector haystack = test_utils::get_random_data( + haystack_size, 0, haystack_size + 2 * d, seed_value + ); + std::sort(haystack.begin(), haystack.end(), compare_op); + + // Use a narrower range for needles for checking out-of-haystack cases + std::vector needles = test_utils::get_random_data( + needles_size, d, haystack_size + d, seed_value + ); + + haystack_type * d_haystack; + needle_type * d_needles; + output_type * d_output; + HIP_CHECK(hipMalloc(&d_haystack, haystack_size * sizeof(haystack_type))); + HIP_CHECK(hipMalloc(&d_needles, needles_size * sizeof(needle_type))); + HIP_CHECK(hipMalloc(&d_output, needles_size * sizeof(output_type))); + HIP_CHECK( + hipMemcpy( + d_haystack, haystack.data(), + haystack_size * sizeof(haystack_type), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK( + hipMemcpy( + d_needles, needles.data(), + needles_size * sizeof(needle_type), + hipMemcpyHostToDevice + ) + ); + + // Calculate expected results on host + std::vector expected(needles_size); + for(size_t i = 0; i < needles_size; i++) + { + expected[i] = std::binary_search(haystack.begin(), haystack.end(), needles[i], compare_op); + } + + void * d_temporary_storage = nullptr; + size_t temporary_storage_bytes; + HIP_CHECK( + rocprim::binary_search( + d_temporary_storage, temporary_storage_bytes, + d_haystack, d_needles, d_output, + haystack_size, needles_size, + compare_op, + stream, debug_synchronous + ) + ); + + ASSERT_GT(temporary_storage_bytes, 0); + + HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); + + HIP_CHECK( + rocprim::binary_search( + d_temporary_storage, temporary_storage_bytes, + d_haystack, d_needles, d_output, + haystack_size, needles_size, + compare_op, + stream, debug_synchronous + ) + ); + + std::vector output(needles_size); + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + needles_size * sizeof(output_type), + hipMemcpyDeviceToHost + ) + ); + + HIP_CHECK(hipFree(d_temporary_storage)); + HIP_CHECK(hipFree(d_haystack)); + HIP_CHECK(hipFree(d_needles)); + HIP_CHECK(hipFree(d_output)); + + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); } - - void * d_temporary_storage = nullptr; - size_t temporary_storage_bytes; - HIP_CHECK( - rocprim::binary_search( - d_temporary_storage, temporary_storage_bytes, - d_haystack, d_needles, d_output, - haystack_size, needles_size, - compare_op, - stream, debug_synchronous - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0); - - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - HIP_CHECK( - rocprim::binary_search( - d_temporary_storage, temporary_storage_bytes, - d_haystack, d_needles, d_output, - haystack_size, needles_size, - compare_op, - stream, debug_synchronous - ) - ); - - std::vector output(needles_size); - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - needles_size * sizeof(output_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - HIP_CHECK(hipFree(d_haystack)); - HIP_CHECK(hipFree(d_needles)); - HIP_CHECK(hipFree(d_output)); - - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); - } + } } diff --git a/test/rocprim/test_device_histogram.cpp b/test/rocprim/test_device_histogram.cpp index a924be7f4..c0035de57 100644 --- a/test/rocprim/test_device_histogram.cpp +++ b/test/rocprim/test_device_histogram.cpp @@ -73,7 +73,7 @@ std::vector> get_dims() // Generate values ouside the desired histogram range (+-10%) // (correctly handling test cases like uchar [0, 256), ushort [0, 65536)) template -inline auto get_random_samples(size_t size, U min, U max) +inline auto get_random_samples(size_t size, U min, U max, int seed_value) -> typename std::enable_if::value, std::vector>::type { const long long min1 = static_cast(min); @@ -82,12 +82,13 @@ inline auto get_random_samples(size_t size, U min, U max) return test_utils::get_random_data( size, static_cast(std::max(min1 - d / 10, static_cast(std::numeric_limits::lowest()))), - static_cast(std::min(max1 + d / 10, static_cast(std::numeric_limits::max()))) + static_cast(std::min(max1 + d / 10, static_cast(std::numeric_limits::max()))), + seed_value ); } template -inline auto get_random_samples(size_t size, U min, U max) +inline auto get_random_samples(size_t size, U min, U max, int seed_value) -> typename std::enable_if::value, std::vector>::type { const double min1 = static_cast(min); @@ -96,7 +97,8 @@ inline auto get_random_samples(size_t size, U min, U max) return test_utils::get_random_data( size, static_cast(std::max(min1 - d / 10, static_cast(std::numeric_limits::lowest()))), - static_cast(std::min(max1 + d / 10, static_cast(std::numeric_limits::max()))) + static_cast(std::min(max1 + d / 10, static_cast(std::numeric_limits::max()))), + seed_value ); } @@ -193,113 +195,120 @@ TYPED_TEST(RocprimDeviceHistogramEven, Even) const size_t row_stride_bytes = row_stride * sizeof(sample_type); const size_t size = std::max(1, rows * row_stride); - // Generate data - std::vector input = get_random_samples(size, lower_level, upper_level); - - sample_type * d_input; - counter_type * d_histogram; - HIP_CHECK(hipMalloc(&d_input, size * sizeof(sample_type))); - HIP_CHECK(hipMalloc(&d_histogram, bins * sizeof(counter_type))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - size * sizeof(sample_type), - hipMemcpyHostToDevice - ) - ); + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + { + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - // Calculate expected results on host - std::vector histogram_expected(bins, 0); - const level_type scale = (upper_level - lower_level) / bins; - for(size_t row = 0; row < rows; row++) - { - for(size_t column = 0; column < columns; column++) + // Generate data + std::vector input = get_random_samples(size, lower_level, upper_level, seed_value); + + sample_type * d_input; + counter_type * d_histogram; + HIP_CHECK(hipMalloc(&d_input, size * sizeof(sample_type))); + HIP_CHECK(hipMalloc(&d_histogram, bins * sizeof(counter_type))); + HIP_CHECK( + hipMemcpy( + d_input, input.data(), + size * sizeof(sample_type), + hipMemcpyHostToDevice + ) + ); + + // Calculate expected results on host + std::vector histogram_expected(bins, 0); + const level_type scale = (upper_level - lower_level) / bins; + for(size_t row = 0; row < rows; row++) { - const sample_type sample = input[row * row_stride + column]; - const level_type s = static_cast(sample); - if(s >= lower_level && s < upper_level) + for(size_t column = 0; column < columns; column++) { - const int bin = (s - lower_level) / scale; - histogram_expected[bin]++; + const sample_type sample = input[row * row_stride + column]; + const level_type s = static_cast(sample); + if(s >= lower_level && s < upper_level) + { + const int bin = (s - lower_level) / scale; + histogram_expected[bin]++; + } } } - } - using config = rp::histogram_config>; + using config = rp::histogram_config>; - size_t temporary_storage_bytes = 0; - if(rows == 1) - { - HIP_CHECK( - rp::histogram_even( - nullptr, temporary_storage_bytes, - d_input, columns, - d_histogram, - bins + 1, lower_level, upper_level, - stream, debug_synchronous - ) - ); - } - else - { - HIP_CHECK( - rp::histogram_even( - nullptr, temporary_storage_bytes, - d_input, columns, rows, row_stride_bytes, - d_histogram, - bins + 1, lower_level, upper_level, - stream, debug_synchronous - ) - ); - } + size_t temporary_storage_bytes = 0; + if(rows == 1) + { + HIP_CHECK( + rp::histogram_even( + nullptr, temporary_storage_bytes, + d_input, columns, + d_histogram, + bins + 1, lower_level, upper_level, + stream, debug_synchronous + ) + ); + } + else + { + HIP_CHECK( + rp::histogram_even( + nullptr, temporary_storage_bytes, + d_input, columns, rows, row_stride_bytes, + d_histogram, + bins + 1, lower_level, upper_level, + stream, debug_synchronous + ) + ); + } - ASSERT_GT(temporary_storage_bytes, 0U); + ASSERT_GT(temporary_storage_bytes, 0U); - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); + void * d_temporary_storage; + HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - if(rows == 1) - { - HIP_CHECK( - rp::histogram_even( - d_temporary_storage, temporary_storage_bytes, - d_input, columns, - d_histogram, - bins + 1, lower_level, upper_level, - stream, debug_synchronous - ) - ); - } - else - { + if(rows == 1) + { + HIP_CHECK( + rp::histogram_even( + d_temporary_storage, temporary_storage_bytes, + d_input, columns, + d_histogram, + bins + 1, lower_level, upper_level, + stream, debug_synchronous + ) + ); + } + else + { + HIP_CHECK( + rp::histogram_even( + d_temporary_storage, temporary_storage_bytes, + d_input, columns, rows, row_stride_bytes, + d_histogram, + bins + 1, lower_level, upper_level, + stream, debug_synchronous + ) + ); + } + + std::vector histogram(bins); HIP_CHECK( - rp::histogram_even( - d_temporary_storage, temporary_storage_bytes, - d_input, columns, rows, row_stride_bytes, - d_histogram, - bins + 1, lower_level, upper_level, - stream, debug_synchronous + hipMemcpy( + histogram.data(), d_histogram, + bins * sizeof(counter_type), + hipMemcpyDeviceToHost ) ); - } - std::vector histogram(bins); - HIP_CHECK( - hipMemcpy( - histogram.data(), d_histogram, - bins * sizeof(counter_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - HIP_CHECK(hipFree(d_input)); - HIP_CHECK(hipFree(d_histogram)); + HIP_CHECK(hipFree(d_temporary_storage)); + HIP_CHECK(hipFree(d_input)); + HIP_CHECK(hipFree(d_histogram)); - for(size_t i = 0; i < bins; i++) - { - ASSERT_EQ(histogram[i], histogram_expected[i]); + for(size_t i = 0; i < bins; i++) + { + ASSERT_EQ(histogram[i], histogram_expected[i]); + } } + } } @@ -402,124 +411,132 @@ TYPED_TEST(RocprimDeviceHistogramRange, Range) } levels.push_back(level); - std::vector input = get_random_samples(size, levels[0], levels[bins]); - - sample_type * d_input; - level_type * d_levels; - counter_type * d_histogram; - HIP_CHECK(hipMalloc(&d_input, size * sizeof(sample_type))); - HIP_CHECK(hipMalloc(&d_levels, (bins + 1) * sizeof(level_type))); - HIP_CHECK(hipMalloc(&d_histogram, bins * sizeof(counter_type))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - size * sizeof(sample_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - d_levels, levels.data(), - (bins + 1) * sizeof(level_type), - hipMemcpyHostToDevice - ) - ); - - // Calculate expected results on host - std::vector histogram_expected(bins, 0); - for(size_t row = 0; row < rows; row++) - { - for(size_t column = 0; column < columns; column++) - { - const sample_type sample = input[row * row_stride + column]; - const level_type s = static_cast(sample); - if(s >= levels[0] && s < levels[bins]) - { - const auto bin_iter = std::upper_bound(levels.begin(), levels.end(), s); - histogram_expected[bin_iter - levels.begin() - 1]++; - } - } - } + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + { + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - rp::transform_iterator, sample_type> d_input2( - d_input, - transform_op() - ); + std::vector input = get_random_samples(size, levels[0], levels[bins], seed_value); - size_t temporary_storage_bytes = 0; - if(rows == 1) - { + sample_type * d_input; + level_type * d_levels; + counter_type * d_histogram; + HIP_CHECK(hipMalloc(&d_input, size * sizeof(sample_type))); + HIP_CHECK(hipMalloc(&d_levels, (bins + 1) * sizeof(level_type))); + HIP_CHECK(hipMalloc(&d_histogram, bins * sizeof(counter_type))); HIP_CHECK( - rp::histogram_range( - nullptr, temporary_storage_bytes, - d_input2, columns, - d_histogram, - bins + 1, d_levels, - stream, debug_synchronous + hipMemcpy( + d_input, input.data(), + size * sizeof(sample_type), + hipMemcpyHostToDevice ) ); - } - else - { HIP_CHECK( - rp::histogram_range( - nullptr, temporary_storage_bytes, - d_input2, columns, rows, row_stride_bytes, - d_histogram, - bins + 1, d_levels, - stream, debug_synchronous + hipMemcpy( + d_levels, levels.data(), + (bins + 1) * sizeof(level_type), + hipMemcpyHostToDevice ) ); - } - - ASSERT_GT(temporary_storage_bytes, 0U); - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); + // Calculate expected results on host + std::vector histogram_expected(bins, 0); + for(size_t row = 0; row < rows; row++) + { + for(size_t column = 0; column < columns; column++) + { + const sample_type sample = input[row * row_stride + column]; + const level_type s = static_cast(sample); + if(s >= levels[0] && s < levels[bins]) + { + const auto bin_iter = std::upper_bound(levels.begin(), levels.end(), s); + histogram_expected[bin_iter - levels.begin() - 1]++; + } + } + } - if(rows == 1) - { - HIP_CHECK( - rp::histogram_range( - d_temporary_storage, temporary_storage_bytes, - d_input2, columns, - d_histogram, - bins + 1, d_levels, - stream, debug_synchronous - ) + rp::transform_iterator, sample_type> d_input2( + d_input, + transform_op() ); - } - else - { + + size_t temporary_storage_bytes = 0; + if(rows == 1) + { + HIP_CHECK( + rp::histogram_range( + nullptr, temporary_storage_bytes, + d_input2, columns, + d_histogram, + bins + 1, d_levels, + stream, debug_synchronous + ) + ); + } + else + { + HIP_CHECK( + rp::histogram_range( + nullptr, temporary_storage_bytes, + d_input2, columns, rows, row_stride_bytes, + d_histogram, + bins + 1, d_levels, + stream, debug_synchronous + ) + ); + } + + ASSERT_GT(temporary_storage_bytes, 0U); + + void * d_temporary_storage; + HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); + + if(rows == 1) + { + HIP_CHECK( + rp::histogram_range( + d_temporary_storage, temporary_storage_bytes, + d_input2, columns, + d_histogram, + bins + 1, d_levels, + stream, debug_synchronous + ) + ); + } + else + { + HIP_CHECK( + rp::histogram_range( + d_temporary_storage, temporary_storage_bytes, + d_input2, columns, rows, row_stride_bytes, + d_histogram, + bins + 1, d_levels, + stream, debug_synchronous + ) + ); + } + + std::vector histogram(bins); HIP_CHECK( - rp::histogram_range( - d_temporary_storage, temporary_storage_bytes, - d_input2, columns, rows, row_stride_bytes, - d_histogram, - bins + 1, d_levels, - stream, debug_synchronous + hipMemcpy( + histogram.data(), d_histogram, + bins * sizeof(counter_type), + hipMemcpyDeviceToHost ) ); - } - std::vector histogram(bins); - HIP_CHECK( - hipMemcpy( - histogram.data(), d_histogram, - bins * sizeof(counter_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - HIP_CHECK(hipFree(d_input)); - HIP_CHECK(hipFree(d_levels)); - HIP_CHECK(hipFree(d_histogram)); + HIP_CHECK(hipFree(d_temporary_storage)); + HIP_CHECK(hipFree(d_input)); + HIP_CHECK(hipFree(d_levels)); + HIP_CHECK(hipFree(d_histogram)); - for(size_t i = 0; i < bins; i++) - { - ASSERT_EQ(histogram[i], histogram_expected[i]); + for(size_t i = 0; i < bins; i++) + { + ASSERT_EQ(histogram[i], histogram_expected[i]); + } } + + } } @@ -609,160 +626,168 @@ TYPED_TEST(RocprimDeviceHistogramMultiEven, MultiEven) const size_t row_stride_bytes = row_stride * sizeof(sample_type); const size_t size = std::max(1, rows * row_stride); - // Generate data - std::vector input(size); - for(unsigned int channel = 0; channel < channels; channel++) + + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - const size_t gen_columns = (row_stride + channels - 1) / channels; - const size_t gen_size = rows * gen_columns; + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - std::vector channel_input; - if(channel < active_channels) - { - channel_input = get_random_samples(gen_size, lower_level[channel], upper_level[channel]); - } - else - { - channel_input = get_random_samples(gen_size, lower_level[0], upper_level[0]); - } - // Interleave values - for(size_t row = 0; row < rows; row++) + // Generate data + std::vector input(size); + for(unsigned int channel = 0; channel < channels; channel++) { - for(size_t column = 0; column < gen_columns; column++) + const size_t gen_columns = (row_stride + channels - 1) / channels; + const size_t gen_size = rows * gen_columns; + + std::vector channel_input; + if(channel < active_channels) + { + channel_input = get_random_samples(gen_size, lower_level[channel], upper_level[channel], seed_value); + } + else + { + channel_input = get_random_samples(gen_size, lower_level[0], upper_level[0], seed_value); + } + // Interleave values + for(size_t row = 0; row < rows; row++) { - const size_t index = column * channels + channel; - if(index < row_stride) + for(size_t column = 0; column < gen_columns; column++) { - input[row * row_stride + index] = channel_input[row * gen_columns + column]; + const size_t index = column * channels + channel; + if(index < row_stride) + { + input[row * row_stride + index] = channel_input[row * gen_columns + column]; + } } } } - } - sample_type * d_input; - counter_type * d_histogram[active_channels]; - HIP_CHECK(hipMalloc(&d_input, size * sizeof(sample_type))); - for(unsigned int channel = 0; channel < active_channels; channel++) - { - HIP_CHECK(hipMalloc(&d_histogram[channel], bins[channel] * sizeof(counter_type))); - } - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - size * sizeof(sample_type), - hipMemcpyHostToDevice - ) - ); - - // Calculate expected results on host - std::vector histogram_expected[active_channels]; - for(unsigned int channel = 0; channel < active_channels; channel++) - { - histogram_expected[channel] = std::vector(bins[channel], 0); - const level_type scale = (upper_level[channel] - lower_level[channel]) / bins[channel]; + sample_type * d_input; + counter_type * d_histogram[active_channels]; + HIP_CHECK(hipMalloc(&d_input, size * sizeof(sample_type))); + for(unsigned int channel = 0; channel < active_channels; channel++) + { + HIP_CHECK(hipMalloc(&d_histogram[channel], bins[channel] * sizeof(counter_type))); + } + HIP_CHECK( + hipMemcpy( + d_input, input.data(), + size * sizeof(sample_type), + hipMemcpyHostToDevice + ) + ); - for(size_t row = 0; row < rows; row++) + // Calculate expected results on host + std::vector histogram_expected[active_channels]; + for(unsigned int channel = 0; channel < active_channels; channel++) { - for(size_t column = 0; column < columns; column++) + histogram_expected[channel] = std::vector(bins[channel], 0); + const level_type scale = (upper_level[channel] - lower_level[channel]) / bins[channel]; + + for(size_t row = 0; row < rows; row++) { - const sample_type sample = input[row * row_stride + column * channels + channel]; - const level_type s = static_cast(sample); - if(s >= lower_level[channel] && s < upper_level[channel]) + for(size_t column = 0; column < columns; column++) { - const int bin = (s - lower_level[channel]) / scale; - histogram_expected[channel][bin]++; + const sample_type sample = input[row * row_stride + column * channels + channel]; + const level_type s = static_cast(sample); + if(s >= lower_level[channel] && s < upper_level[channel]) + { + const int bin = (s - lower_level[channel]) / scale; + histogram_expected[channel][bin]++; + } } } } - } - rp::transform_iterator, sample_type> d_input2( - d_input, - transform_op() - ); + rp::transform_iterator, sample_type> d_input2( + d_input, + transform_op() + ); - size_t temporary_storage_bytes = 0; - if(rows == 1) - { - HIP_CHECK(( - rp::multi_histogram_even( - nullptr, temporary_storage_bytes, - d_input2, columns, - d_histogram, - num_levels, lower_level, upper_level, - stream, debug_synchronous - ) - )); - } - else - { - HIP_CHECK(( - rp::multi_histogram_even( - nullptr, temporary_storage_bytes, - d_input2, columns, rows, row_stride_bytes, - d_histogram, - num_levels, lower_level, upper_level, - stream, debug_synchronous - ) - )); - } + size_t temporary_storage_bytes = 0; + if(rows == 1) + { + HIP_CHECK(( + rp::multi_histogram_even( + nullptr, temporary_storage_bytes, + d_input2, columns, + d_histogram, + num_levels, lower_level, upper_level, + stream, debug_synchronous + ) + )); + } + else + { + HIP_CHECK(( + rp::multi_histogram_even( + nullptr, temporary_storage_bytes, + d_input2, columns, rows, row_stride_bytes, + d_histogram, + num_levels, lower_level, upper_level, + stream, debug_synchronous + ) + )); + } - ASSERT_GT(temporary_storage_bytes, 0U); + ASSERT_GT(temporary_storage_bytes, 0U); - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); + void * d_temporary_storage; + HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - if(rows == 1) - { - HIP_CHECK(( - rp::multi_histogram_even( - d_temporary_storage, temporary_storage_bytes, - d_input2, columns, - d_histogram, - num_levels, lower_level, upper_level, - stream, debug_synchronous - ) - )); - } - else - { - HIP_CHECK(( - rp::multi_histogram_even( - d_temporary_storage, temporary_storage_bytes, - d_input2, columns, rows, row_stride_bytes, - d_histogram, - num_levels, lower_level, upper_level, - stream, debug_synchronous - ) - )); - } - - std::vector histogram[active_channels]; - for(unsigned int channel = 0; channel < active_channels; channel++) - { - histogram[channel] = std::vector(bins[channel]); - HIP_CHECK( - hipMemcpy( - histogram[channel].data(), d_histogram[channel], - bins[channel] * sizeof(counter_type), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipFree(d_histogram[channel])); - } + if(rows == 1) + { + HIP_CHECK(( + rp::multi_histogram_even( + d_temporary_storage, temporary_storage_bytes, + d_input2, columns, + d_histogram, + num_levels, lower_level, upper_level, + stream, debug_synchronous + ) + )); + } + else + { + HIP_CHECK(( + rp::multi_histogram_even( + d_temporary_storage, temporary_storage_bytes, + d_input2, columns, rows, row_stride_bytes, + d_histogram, + num_levels, lower_level, upper_level, + stream, debug_synchronous + ) + )); + } - HIP_CHECK(hipFree(d_temporary_storage)); - HIP_CHECK(hipFree(d_input)); + std::vector histogram[active_channels]; + for(unsigned int channel = 0; channel < active_channels; channel++) + { + histogram[channel] = std::vector(bins[channel]); + HIP_CHECK( + hipMemcpy( + histogram[channel].data(), d_histogram[channel], + bins[channel] * sizeof(counter_type), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipFree(d_histogram[channel])); + } - for(unsigned int channel = 0; channel < active_channels; channel++) - { - SCOPED_TRACE(testing::Message() << "with channel = " << channel); + HIP_CHECK(hipFree(d_temporary_storage)); + HIP_CHECK(hipFree(d_input)); - for(size_t i = 0; i < bins[channel]; i++) + for(unsigned int channel = 0; channel < active_channels; channel++) { - ASSERT_EQ(histogram[channel][i], histogram_expected[channel][i]); + SCOPED_TRACE(testing::Message() << "with channel = " << channel); + + for(size_t i = 0; i < bins[channel]; i++) + { + ASSERT_EQ(histogram[channel][i], histogram_expected[channel][i]); + } } } + } } @@ -852,183 +877,192 @@ TYPED_TEST(RocprimDeviceHistogramMultiRange, MultiRange) const size_t row_stride_bytes = row_stride * sizeof(sample_type); const size_t size = std::max(1, rows * row_stride); - // Generate data - std::vector levels[active_channels]; - for(unsigned int channel = 0; channel < active_channels; channel++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - level_type level = TestFixture::params::start_level; - for(unsigned int bin = 0 ; bin < bins[channel]; bin++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector levels[active_channels]; + for(unsigned int channel = 0; channel < active_channels; channel++) { + level_type level = TestFixture::params::start_level; + for(unsigned int bin = 0 ; bin < bins[channel]; bin++) + { + levels[channel].push_back(level); + level += bin_length_dis[channel](gen); + } levels[channel].push_back(level); - level += bin_length_dis[channel](gen); } - levels[channel].push_back(level); - } - - std::vector input(size); - for(unsigned int channel = 0; channel < channels; channel++) - { - const size_t gen_columns = (row_stride + channels - 1) / channels; - const size_t gen_size = rows * gen_columns; - std::vector channel_input; - if(channel < active_channels) - { - channel_input = get_random_samples( - gen_size, levels[channel][0], levels[channel][bins[channel]] - ); - } - else + std::vector input(size); + for(unsigned int channel = 0; channel < channels; channel++) { - channel_input = get_random_samples(gen_size, levels[0][0], levels[0][bins[0]]); - } - // Interleave values - for(size_t row = 0; row < rows; row++) - { - for(size_t column = 0; column < gen_columns; column++) + const size_t gen_columns = (row_stride + channels - 1) / channels; + const size_t gen_size = rows * gen_columns; + + + std::vector channel_input; + if(channel < active_channels) + { + channel_input = get_random_samples( + gen_size, levels[channel][0], levels[channel][bins[channel]], seed_value + ); + } + else + { + channel_input = get_random_samples(gen_size, levels[0][0], levels[0][bins[0]], seed_value); + } + // Interleave values + for(size_t row = 0; row < rows; row++) { - const size_t index = column * channels + channel; - if(index < row_stride) + for(size_t column = 0; column < gen_columns; column++) { - input[row * row_stride + index] = channel_input[row * gen_columns + column]; + const size_t index = column * channels + channel; + if(index < row_stride) + { + input[row * row_stride + index] = channel_input[row * gen_columns + column]; + } } } } - } - sample_type * d_input; - level_type * d_levels[active_channels]; - counter_type * d_histogram[active_channels]; - HIP_CHECK(hipMalloc(&d_input, size * sizeof(sample_type))); - for(unsigned int channel = 0; channel < active_channels; channel++) - { - HIP_CHECK(hipMalloc(&d_levels[channel], num_levels[channel] * sizeof(level_type))); - HIP_CHECK(hipMalloc(&d_histogram[channel], bins[channel] * sizeof(counter_type))); - } - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - size * sizeof(sample_type), - hipMemcpyHostToDevice - ) - ); - for(unsigned int channel = 0; channel < active_channels; channel++) - { + sample_type * d_input; + level_type * d_levels[active_channels]; + counter_type * d_histogram[active_channels]; + HIP_CHECK(hipMalloc(&d_input, size * sizeof(sample_type))); + for(unsigned int channel = 0; channel < active_channels; channel++) + { + HIP_CHECK(hipMalloc(&d_levels[channel], num_levels[channel] * sizeof(level_type))); + HIP_CHECK(hipMalloc(&d_histogram[channel], bins[channel] * sizeof(counter_type))); + } HIP_CHECK( hipMemcpy( - d_levels[channel], levels[channel].data(), - num_levels[channel] * sizeof(level_type), + d_input, input.data(), + size * sizeof(sample_type), hipMemcpyHostToDevice ) ); - } - - // Calculate expected results on host - std::vector histogram_expected[active_channels]; - for(unsigned int channel = 0; channel < active_channels; channel++) - { - histogram_expected[channel] = std::vector(bins[channel], 0); + for(unsigned int channel = 0; channel < active_channels; channel++) + { + HIP_CHECK( + hipMemcpy( + d_levels[channel], levels[channel].data(), + num_levels[channel] * sizeof(level_type), + hipMemcpyHostToDevice + ) + ); + } - for(size_t row = 0; row < rows; row++) + // Calculate expected results on host + std::vector histogram_expected[active_channels]; + for(unsigned int channel = 0; channel < active_channels; channel++) { - for(size_t column = 0; column < columns; column++) + histogram_expected[channel] = std::vector(bins[channel], 0); + + for(size_t row = 0; row < rows; row++) { - const sample_type sample = input[row * row_stride + column * channels + channel]; - const level_type s = static_cast(sample); - if(s >= levels[channel][0] && s < levels[channel][bins[channel]]) + for(size_t column = 0; column < columns; column++) { - const auto bin_iter = std::upper_bound(levels[channel].begin(), levels[channel].end(), s); - const int bin = bin_iter - levels[channel].begin() - 1; - histogram_expected[channel][bin]++; + const sample_type sample = input[row * row_stride + column * channels + channel]; + const level_type s = static_cast(sample); + if(s >= levels[channel][0] && s < levels[channel][bins[channel]]) + { + const auto bin_iter = std::upper_bound(levels[channel].begin(), levels[channel].end(), s); + const int bin = bin_iter - levels[channel].begin() - 1; + histogram_expected[channel][bin]++; + } } } } - } - using config = rp::histogram_config>; + using config = rp::histogram_config>; - size_t temporary_storage_bytes = 0; - if(rows == 1) - { - HIP_CHECK(( - rp::multi_histogram_range( - nullptr, temporary_storage_bytes, - d_input, columns, - d_histogram, - num_levels, d_levels, - stream, debug_synchronous - ) - )); - } - else - { - HIP_CHECK(( - rp::multi_histogram_range( - nullptr, temporary_storage_bytes, - d_input, columns, rows, row_stride_bytes, - d_histogram, - num_levels, d_levels, - stream, debug_synchronous - ) - )); - } - - ASSERT_GT(temporary_storage_bytes, 0U); + size_t temporary_storage_bytes = 0; + if(rows == 1) + { + HIP_CHECK(( + rp::multi_histogram_range( + nullptr, temporary_storage_bytes, + d_input, columns, + d_histogram, + num_levels, d_levels, + stream, debug_synchronous + ) + )); + } + else + { + HIP_CHECK(( + rp::multi_histogram_range( + nullptr, temporary_storage_bytes, + d_input, columns, rows, row_stride_bytes, + d_histogram, + num_levels, d_levels, + stream, debug_synchronous + ) + )); + } - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); + ASSERT_GT(temporary_storage_bytes, 0U); - if(rows == 1) - { - HIP_CHECK(( - rp::multi_histogram_range( - d_temporary_storage, temporary_storage_bytes, - d_input, columns, - d_histogram, - num_levels, d_levels, - stream, debug_synchronous - ) - )); - } - else - { - HIP_CHECK(( - rp::multi_histogram_range( - d_temporary_storage, temporary_storage_bytes, - d_input, columns, rows, row_stride_bytes, - d_histogram, - num_levels, d_levels, - stream, debug_synchronous - ) - )); - } + void * d_temporary_storage; + HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - std::vector histogram[active_channels]; - for(unsigned int channel = 0; channel < active_channels; channel++) - { - histogram[channel] = std::vector(bins[channel]); - HIP_CHECK( - hipMemcpy( - histogram[channel].data(), d_histogram[channel], - bins[channel] * sizeof(counter_type), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipFree(d_levels[channel])); - HIP_CHECK(hipFree(d_histogram[channel])); - } + if(rows == 1) + { + HIP_CHECK(( + rp::multi_histogram_range( + d_temporary_storage, temporary_storage_bytes, + d_input, columns, + d_histogram, + num_levels, d_levels, + stream, debug_synchronous + ) + )); + } + else + { + HIP_CHECK(( + rp::multi_histogram_range( + d_temporary_storage, temporary_storage_bytes, + d_input, columns, rows, row_stride_bytes, + d_histogram, + num_levels, d_levels, + stream, debug_synchronous + ) + )); + } - HIP_CHECK(hipFree(d_temporary_storage)); - HIP_CHECK(hipFree(d_input)); + std::vector histogram[active_channels]; + for(unsigned int channel = 0; channel < active_channels; channel++) + { + histogram[channel] = std::vector(bins[channel]); + HIP_CHECK( + hipMemcpy( + histogram[channel].data(), d_histogram[channel], + bins[channel] * sizeof(counter_type), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipFree(d_levels[channel])); + HIP_CHECK(hipFree(d_histogram[channel])); + } - for(unsigned int channel = 0; channel < active_channels; channel++) - { - SCOPED_TRACE(testing::Message() << "with channel = " << channel); + HIP_CHECK(hipFree(d_temporary_storage)); + HIP_CHECK(hipFree(d_input)); - for(size_t i = 0; i < bins[channel]; i++) + for(unsigned int channel = 0; channel < active_channels; channel++) { - ASSERT_EQ(histogram[channel][i], histogram_expected[channel][i]); + SCOPED_TRACE(testing::Message() << "with channel = " << channel); + + for(size_t i = 0; i < bins[channel]; i++) + { + ASSERT_EQ(histogram[channel][i], histogram_expected[channel][i]); + } } } + + } } diff --git a/test/rocprim/test_device_merge.cpp b/test/rocprim/test_device_merge.cpp index d81a60030..3e9b494e0 100644 --- a/test/rocprim/test_device_merge.cpp +++ b/test/rocprim/test_device_merge.cpp @@ -117,104 +117,112 @@ TYPED_TEST(RocprimDeviceMergeTests, MergeKey) // compare function compare_op_type compare_op; - // Generate data - std::vector keys_input1 = test_utils::get_random_data(size1, 0, size1); - std::vector keys_input2 = test_utils::get_random_data(size2, 0, size2); - std::sort(keys_input1.begin(), keys_input1.end(), compare_op); - std::sort(keys_input2.begin(), keys_input2.end(), compare_op); - std::vector keys_output(size1 + size2, 0); - - // Calculate expected results on host - std::vector expected(keys_output.size()); - std::merge( - keys_input1.begin(), - keys_input1.end(), - keys_input2.begin(), - keys_input2.end(), - expected.begin(), - compare_op - ); - - test_utils::out_of_bounds_flag out_of_bounds; - - key_type * d_keys_input1; - key_type * d_keys_input2; - key_type * d_keys_output; - HIP_CHECK(hipMalloc(&d_keys_input1, keys_input1.size() * sizeof(key_type))); - HIP_CHECK(hipMalloc(&d_keys_input2, keys_input2.size() * sizeof(key_type))); - HIP_CHECK(hipMalloc(&d_keys_output, keys_output.size() * sizeof(key_type))); - HIP_CHECK( - hipMemcpy( - d_keys_input1, keys_input1.data(), - keys_input1.size() * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - d_keys_input2, keys_input2.data(), - keys_input2.size() * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - - test_utils::bounds_checking_iterator d_keys_checking_output( - d_keys_output, - out_of_bounds.device_pointer(), - size1 + size2 - ); - - // temp storage - size_t temp_storage_size_bytes; - void * d_temp_storage = nullptr; - // Get size of d_temp_storage - HIP_CHECK( - rocprim::merge( - d_temp_storage, temp_storage_size_bytes, - d_keys_input1, d_keys_input2, - d_keys_checking_output, - keys_input1.size(), keys_input2.size(), - compare_op, stream, debug_synchronous - ) - ); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0); - - // allocate temporary storage - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - - // Run - HIP_CHECK( - rocprim::merge( - d_temp_storage, temp_storage_size_bytes, - d_keys_input1, d_keys_input2, - d_keys_checking_output, - keys_input1.size(), keys_input2.size(), - compare_op, stream, debug_synchronous - ) - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - ASSERT_FALSE(out_of_bounds.get()); - - // Copy keys_output to host - HIP_CHECK( - hipMemcpy( - keys_output.data(), d_keys_output, - keys_output.size() * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); - - // Check if keys_output values are as expected - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, expected)); + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + { + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector keys_input1 = test_utils::get_random_data(size1, 0, size1, seed_value); + std::vector keys_input2 = test_utils::get_random_data(size2, 0, size2, seed_value); + std::sort(keys_input1.begin(), keys_input1.end(), compare_op); + std::sort(keys_input2.begin(), keys_input2.end(), compare_op); + std::vector keys_output(size1 + size2, 0); + + // Calculate expected results on host + std::vector expected(keys_output.size()); + std::merge( + keys_input1.begin(), + keys_input1.end(), + keys_input2.begin(), + keys_input2.end(), + expected.begin(), + compare_op + ); + + test_utils::out_of_bounds_flag out_of_bounds; + + key_type * d_keys_input1; + key_type * d_keys_input2; + key_type * d_keys_output; + HIP_CHECK(hipMalloc(&d_keys_input1, keys_input1.size() * sizeof(key_type))); + HIP_CHECK(hipMalloc(&d_keys_input2, keys_input2.size() * sizeof(key_type))); + HIP_CHECK(hipMalloc(&d_keys_output, keys_output.size() * sizeof(key_type))); + HIP_CHECK( + hipMemcpy( + d_keys_input1, keys_input1.data(), + keys_input1.size() * sizeof(key_type), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK( + hipMemcpy( + d_keys_input2, keys_input2.data(), + keys_input2.size() * sizeof(key_type), + hipMemcpyHostToDevice + ) + ); + + test_utils::bounds_checking_iterator d_keys_checking_output( + d_keys_output, + out_of_bounds.device_pointer(), + size1 + size2 + ); + + // temp storage + size_t temp_storage_size_bytes; + void * d_temp_storage = nullptr; + // Get size of d_temp_storage + HIP_CHECK( + rocprim::merge( + d_temp_storage, temp_storage_size_bytes, + d_keys_input1, d_keys_input2, + d_keys_checking_output, + keys_input1.size(), keys_input2.size(), + compare_op, stream, debug_synchronous + ) + ); + + // temp_storage_size_bytes must be >0 + ASSERT_GT(temp_storage_size_bytes, 0); + + // allocate temporary storage + HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); + + // Run + HIP_CHECK( + rocprim::merge( + d_temp_storage, temp_storage_size_bytes, + d_keys_input1, d_keys_input2, + d_keys_checking_output, + keys_input1.size(), keys_input2.size(), + compare_op, stream, debug_synchronous + ) + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + ASSERT_FALSE(out_of_bounds.get()); + + // Copy keys_output to host + HIP_CHECK( + hipMemcpy( + keys_output.data(), d_keys_output, + keys_output.size() * sizeof(key_type), + hipMemcpyDeviceToHost + ) + ); + + // Check if keys_output values are as expected + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, expected)); + + hipFree(d_keys_input1); + hipFree(d_keys_input2); + hipFree(d_keys_output); + hipFree(d_temp_storage); + } - hipFree(d_keys_input1); - hipFree(d_keys_input2); - hipFree(d_keys_output); - hipFree(d_temp_storage); + } } @@ -242,166 +250,173 @@ TYPED_TEST(RocprimDeviceMergeTests, MergeKeyValue) // compare function compare_op_type compare_op; - // Generate data - std::vector keys_input1 = test_utils::get_random_data(size1, 0, size1); - std::vector keys_input2 = test_utils::get_random_data(size2, 0, size2); - std::sort(keys_input1.begin(), keys_input1.end(), compare_op); - std::sort(keys_input2.begin(), keys_input2.end(), compare_op); - std::vector values_input1(size1); - std::vector values_input2(size2); - std::iota(values_input1.begin(), values_input1.end(), 0); - std::iota(values_input2.begin(), values_input2.end(), size1); - std::vector keys_output(size1 + size2, 0); - std::vector values_output(size1 + size2, 0); - - // Calculate expected results on host - std::vector vector1(size1); - std::vector vector2(size2); - - for(size_t i = 0; i < size1; i++) - { - vector1[i] = key_value(keys_input1[i], values_input1[i]); - } - for(size_t i = 0; i < size2; i++) - { - vector2[i] = key_value(keys_input2[i], values_input2[i]); - } - - std::vector expected(size1 + size2); - std::merge( - vector1.begin(), - vector1.end(), - vector2.begin(), - vector2.end(), - expected.begin(), - [compare_op](const key_value& a, const key_value& b) { return compare_op(a.first, b.first); } - ); - - test_utils::out_of_bounds_flag out_of_bounds; - - key_type * d_keys_input1; - key_type * d_keys_input2; - key_type * d_keys_output; - value_type * d_values_input1; - value_type * d_values_input2; - value_type * d_values_output; - HIP_CHECK(hipMalloc(&d_keys_input1, keys_input1.size() * sizeof(key_type))); - HIP_CHECK(hipMalloc(&d_keys_input2, keys_input2.size() * sizeof(key_type))); - HIP_CHECK(hipMalloc(&d_keys_output, keys_output.size() * sizeof(key_type))); - HIP_CHECK(hipMalloc(&d_values_input1, values_input1.size() * sizeof(value_type))); - HIP_CHECK(hipMalloc(&d_values_input2, values_input2.size() * sizeof(value_type))); - HIP_CHECK(hipMalloc(&d_values_output, values_output.size() * sizeof(value_type))); - HIP_CHECK( - hipMemcpy( - d_keys_input1, keys_input1.data(), - keys_input1.size() * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - d_keys_input2, keys_input2.data(), - keys_input2.size() * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - d_values_input1, values_input1.data(), - values_input1.size() * sizeof(value_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - d_values_input2, values_input2.data(), - values_input2.size() * sizeof(value_type), - hipMemcpyHostToDevice - ) - ); - - test_utils::bounds_checking_iterator d_keys_checking_output( - d_keys_output, - out_of_bounds.device_pointer(), - size1 + size2 - ); - test_utils::bounds_checking_iterator d_values_checking_output( - d_values_output, - out_of_bounds.device_pointer(), - size1 + size2 - ); - - // temp storage - size_t temp_storage_size_bytes; - void * d_temp_storage = nullptr; - // Get size of d_temp_storage - HIP_CHECK( - rocprim::merge( - d_temp_storage, temp_storage_size_bytes, - d_keys_input1, d_keys_input2, - d_keys_checking_output, - d_values_input1, d_values_input2, - d_values_checking_output, - keys_input1.size(), keys_input2.size(), - compare_op, stream, debug_synchronous - ) - ); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0); - - // allocate temporary storage - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - - // Run - HIP_CHECK( - rocprim::merge( - d_temp_storage, temp_storage_size_bytes, - d_keys_input1, d_keys_input2, - d_keys_checking_output, - d_values_input1, d_values_input2, - d_values_checking_output, - keys_input1.size(), keys_input2.size(), - compare_op, stream, debug_synchronous - ) - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - ASSERT_FALSE(out_of_bounds.get()); - - HIP_CHECK( - hipMemcpy( - keys_output.data(), d_keys_output, - keys_output.size() * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK( - hipMemcpy( - values_output.data(), d_values_output, - values_output.size() * sizeof(value_type), - hipMemcpyDeviceToHost - ) - ); - - // Check if keys_output values are as expected - std::vector expected_key(expected.size()); - std::vector expected_value(expected.size()); - for(size_t i = 0; i < expected.size(); i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - expected_key[i] = expected[i].first; - expected_value[i] = expected[i].second; + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector keys_input1 = test_utils::get_random_data(size1, 0, size1, seed_value); + std::vector keys_input2 = test_utils::get_random_data(size2, 0, size2, seed_value); + std::sort(keys_input1.begin(), keys_input1.end(), compare_op); + std::sort(keys_input2.begin(), keys_input2.end(), compare_op); + std::vector values_input1(size1); + std::vector values_input2(size2); + std::iota(values_input1.begin(), values_input1.end(), 0); + std::iota(values_input2.begin(), values_input2.end(), size1); + std::vector keys_output(size1 + size2, 0); + std::vector values_output(size1 + size2, 0); + + // Calculate expected results on host + std::vector vector1(size1); + std::vector vector2(size2); + + for(size_t i = 0; i < size1; i++) + { + vector1[i] = key_value(keys_input1[i], values_input1[i]); + } + for(size_t i = 0; i < size2; i++) + { + vector2[i] = key_value(keys_input2[i], values_input2[i]); + } + + std::vector expected(size1 + size2); + std::merge( + vector1.begin(), + vector1.end(), + vector2.begin(), + vector2.end(), + expected.begin(), + [compare_op](const key_value& a, const key_value& b) { return compare_op(a.first, b.first); } + ); + + test_utils::out_of_bounds_flag out_of_bounds; + + key_type * d_keys_input1; + key_type * d_keys_input2; + key_type * d_keys_output; + value_type * d_values_input1; + value_type * d_values_input2; + value_type * d_values_output; + HIP_CHECK(hipMalloc(&d_keys_input1, keys_input1.size() * sizeof(key_type))); + HIP_CHECK(hipMalloc(&d_keys_input2, keys_input2.size() * sizeof(key_type))); + HIP_CHECK(hipMalloc(&d_keys_output, keys_output.size() * sizeof(key_type))); + HIP_CHECK(hipMalloc(&d_values_input1, values_input1.size() * sizeof(value_type))); + HIP_CHECK(hipMalloc(&d_values_input2, values_input2.size() * sizeof(value_type))); + HIP_CHECK(hipMalloc(&d_values_output, values_output.size() * sizeof(value_type))); + HIP_CHECK( + hipMemcpy( + d_keys_input1, keys_input1.data(), + keys_input1.size() * sizeof(key_type), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK( + hipMemcpy( + d_keys_input2, keys_input2.data(), + keys_input2.size() * sizeof(key_type), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK( + hipMemcpy( + d_values_input1, values_input1.data(), + values_input1.size() * sizeof(value_type), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK( + hipMemcpy( + d_values_input2, values_input2.data(), + values_input2.size() * sizeof(value_type), + hipMemcpyHostToDevice + ) + ); + + test_utils::bounds_checking_iterator d_keys_checking_output( + d_keys_output, + out_of_bounds.device_pointer(), + size1 + size2 + ); + test_utils::bounds_checking_iterator d_values_checking_output( + d_values_output, + out_of_bounds.device_pointer(), + size1 + size2 + ); + + // temp storage + size_t temp_storage_size_bytes; + void * d_temp_storage = nullptr; + // Get size of d_temp_storage + HIP_CHECK( + rocprim::merge( + d_temp_storage, temp_storage_size_bytes, + d_keys_input1, d_keys_input2, + d_keys_checking_output, + d_values_input1, d_values_input2, + d_values_checking_output, + keys_input1.size(), keys_input2.size(), + compare_op, stream, debug_synchronous + ) + ); + + // temp_storage_size_bytes must be >0 + ASSERT_GT(temp_storage_size_bytes, 0); + + // allocate temporary storage + HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); + + // Run + HIP_CHECK( + rocprim::merge( + d_temp_storage, temp_storage_size_bytes, + d_keys_input1, d_keys_input2, + d_keys_checking_output, + d_values_input1, d_values_input2, + d_values_checking_output, + keys_input1.size(), keys_input2.size(), + compare_op, stream, debug_synchronous + ) + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + ASSERT_FALSE(out_of_bounds.get()); + + HIP_CHECK( + hipMemcpy( + keys_output.data(), d_keys_output, + keys_output.size() * sizeof(key_type), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK( + hipMemcpy( + values_output.data(), d_values_output, + values_output.size() * sizeof(value_type), + hipMemcpyDeviceToHost + ) + ); + + // Check if keys_output values are as expected + std::vector expected_key(expected.size()); + std::vector expected_value(expected.size()); + for(size_t i = 0; i < expected.size(); i++) + { + expected_key[i] = expected[i].first; + expected_value[i] = expected[i].second; + } + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, expected_key)); + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(values_output, expected_value)); + + hipFree(d_keys_input1); + hipFree(d_keys_input2); + hipFree(d_keys_output); + hipFree(d_values_input1); + hipFree(d_values_input2); + hipFree(d_values_output); + hipFree(d_temp_storage); } - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, expected_key)); - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(values_output, expected_value)); - - hipFree(d_keys_input1); - hipFree(d_keys_input2); - hipFree(d_keys_output); - hipFree(d_values_input1); - hipFree(d_values_input2); - hipFree(d_values_output); - hipFree(d_temp_storage); + } } diff --git a/test/rocprim/test_device_merge_sort.cpp b/test/rocprim/test_device_merge_sort.cpp index 878d6a499..ea4483533 100644 --- a/test/rocprim/test_device_merge_sort.cpp +++ b/test/rocprim/test_device_merge_sort.cpp @@ -81,7 +81,7 @@ typedef ::testing::Types< DeviceSortParams, test_utils::custom_test_type> > RocprimDeviceSortTestsParams; -std::vector get_sizes() +std::vector get_sizes(int seed_value) { std::vector sizes = { 1, 10, 53, 211, @@ -89,7 +89,7 @@ std::vector get_sizes() 1024, 2048, 5000, 34567, (1 << 17) - 1220, (1 << 20) - 123 }; - const std::vector random_sizes = test_utils::get_random_data(5, 1, 100000); + const std::vector random_sizes = test_utils::get_random_data(5, 1, 100000, seed_value); sizes.insert(sizes.end(), random_sizes.begin(), random_sizes.end()); std::sort(sizes.begin(), sizes.end()); return sizes; @@ -105,99 +105,106 @@ TYPED_TEST(RocprimDeviceSortTests, SortKey) bool in_place = false; - for(size_t size : get_sizes()) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - hipStream_t stream = 0; // default + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - SCOPED_TRACE(testing::Message() << "with size = " << size); - - in_place = !in_place; - - // Generate data - std::vector input = test_utils::get_random_data(size, -100, 100); // float16 can't exceed 65504 - std::vector output(size); - - key_type * d_input; - key_type * d_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(key_type))); - if(in_place) - { - d_output = d_input; - } - else - { - HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(key_type))); - } - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // compare function - compare_function compare_op; - - // Calculate expected results on host - std::vector expected(input); - std::stable_sort( - expected.begin(), - expected.end(), - compare_op - ); - - // temp storage - size_t temp_storage_size_bytes; - void * d_temp_storage = nullptr; - // Get size of d_temp_storage - HIP_CHECK( - rocprim::merge_sort( - d_temp_storage, temp_storage_size_bytes, - d_input, d_output, input.size(), - compare_op, stream, debug_synchronous - ) - ); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0); - - // allocate temporary storage - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - rocprim::merge_sort( - d_temp_storage, temp_storage_size_bytes, - d_input, d_output, input.size(), - compare_op, stream, debug_synchronous - ) - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Copy output to host - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if output values are as expected - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); - - hipFree(d_input); - if(!in_place) + for(size_t size : get_sizes(seed_value)) { - hipFree(d_output); + hipStream_t stream = 0; // default + + SCOPED_TRACE(testing::Message() << "with size = " << size); + + in_place = !in_place; + + // Generate data + std::vector input = test_utils::get_random_data(size, -100, 100, seed_value); // float16 can't exceed 65504 + std::vector output(size); + + key_type * d_input; + key_type * d_output; + HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(key_type))); + if(in_place) + { + d_output = d_input; + } + else + { + HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(key_type))); + } + HIP_CHECK( + hipMemcpy( + d_input, input.data(), + input.size() * sizeof(key_type), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // compare function + compare_function compare_op; + + // Calculate expected results on host + std::vector expected(input); + std::stable_sort( + expected.begin(), + expected.end(), + compare_op + ); + + // temp storage + size_t temp_storage_size_bytes; + void * d_temp_storage = nullptr; + // Get size of d_temp_storage + HIP_CHECK( + rocprim::merge_sort( + d_temp_storage, temp_storage_size_bytes, + d_input, d_output, input.size(), + compare_op, stream, debug_synchronous + ) + ); + + // temp_storage_size_bytes must be >0 + ASSERT_GT(temp_storage_size_bytes, 0); + + // allocate temporary storage + HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); + HIP_CHECK(hipDeviceSynchronize()); + + // Run + HIP_CHECK( + rocprim::merge_sort( + d_temp_storage, temp_storage_size_bytes, + d_input, d_output, input.size(), + compare_op, stream, debug_synchronous + ) + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Copy output to host + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(key_type), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Check if output values are as expected + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(output, expected)); + + hipFree(d_input); + if(!in_place) + { + hipFree(d_output); + } + hipFree(d_temp_storage); } - hipFree(d_temp_storage); } + } TYPED_TEST(RocprimDeviceSortTests, SortKeyValue) @@ -209,146 +216,153 @@ TYPED_TEST(RocprimDeviceSortTests, SortKeyValue) bool in_place = false; - for(size_t size : get_sizes()) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - hipStream_t stream = 0; // default - - SCOPED_TRACE(testing::Message() << "with size = " << size); - - in_place = !in_place; - - // Generate data - std::vector keys_input = test_utils::get_random_data(size, -100, 100); // float16 can't exceed 65504 - - std::vector values_input(size); - std::iota(values_input.begin(), values_input.end(), 0); - - std::vector keys_output(size); - std::vector values_output(size); - - key_type * d_keys_input; - key_type * d_keys_output; - HIP_CHECK(hipMalloc(&d_keys_input, keys_input.size() * sizeof(key_type))); - if(in_place) - { - d_keys_output = d_keys_input; - } - else - { - HIP_CHECK(hipMalloc(&d_keys_output, keys_output.size() * sizeof(key_type))); - } - HIP_CHECK( - hipMemcpy( - d_keys_input, keys_input.data(), - keys_input.size() * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - value_type * d_values_input; - value_type * d_values_output; - HIP_CHECK(hipMalloc(&d_values_input, values_input.size() * sizeof(value_type))); - if(in_place) - { - d_values_output = d_values_input; - } - else - { - HIP_CHECK(hipMalloc(&d_values_output, values_output.size() * sizeof(value_type))); - } - HIP_CHECK( - hipMemcpy( - d_values_input, values_input.data(), - values_input.size() * sizeof(value_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // compare function - compare_function compare_op; - - // Calculate expected results on host - using key_value = std::pair; - std::vector expected(size); - for(size_t i = 0; i < size; i++) - { - expected[i] = key_value(keys_input[i], values_input[i]); - } - std::stable_sort( - expected.begin(), - expected.end(), - [compare_op](const key_value& a, const key_value& b) { return compare_op(a.first, b.first); } - ); - - // temp storage - size_t temp_storage_size_bytes; - void * d_temp_storage = nullptr; - // Get size of d_temp_storage - HIP_CHECK( - rocprim::merge_sort( - d_temp_storage, temp_storage_size_bytes, - d_keys_input, d_keys_output, - d_values_input, d_values_output, keys_input.size(), - compare_op, stream, debug_synchronous - ) - ); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0); - - // allocate temporary storage - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - rocprim::merge_sort( - d_temp_storage, temp_storage_size_bytes, - d_keys_input, d_keys_output, - d_values_input, d_values_output, keys_input.size(), - compare_op, stream, debug_synchronous - ) - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Copy output to host - HIP_CHECK( - hipMemcpy( - keys_output.data(), d_keys_output, - keys_output.size() * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK( - hipMemcpy( - values_output.data(), d_values_output, - values_output.size() * sizeof(value_type), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if output values are as expected - std::vector expected_key(expected.size()); - std::vector expected_value(expected.size()); - for(size_t i = 0; i < expected.size(); i++) - { - expected_key[i] = expected[i].first; - expected_value[i] = expected[i].second; - } - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, expected_key)); - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(values_output, expected_value)); + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - hipFree(d_keys_input); - hipFree(d_values_input); - if(!in_place) + for(size_t size : get_sizes(seed_value)) { - hipFree(d_keys_output); - hipFree(d_values_output); + hipStream_t stream = 0; // default + + SCOPED_TRACE(testing::Message() << "with size = " << size); + + in_place = !in_place; + + // Generate data + std::vector keys_input = test_utils::get_random_data(size, -100, 100, seed_value); // float16 can't exceed 65504 + + std::vector values_input(size); + std::iota(values_input.begin(), values_input.end(), 0); + + std::vector keys_output(size); + std::vector values_output(size); + + key_type * d_keys_input; + key_type * d_keys_output; + HIP_CHECK(hipMalloc(&d_keys_input, keys_input.size() * sizeof(key_type))); + if(in_place) + { + d_keys_output = d_keys_input; + } + else + { + HIP_CHECK(hipMalloc(&d_keys_output, keys_output.size() * sizeof(key_type))); + } + HIP_CHECK( + hipMemcpy( + d_keys_input, keys_input.data(), + keys_input.size() * sizeof(key_type), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + value_type * d_values_input; + value_type * d_values_output; + HIP_CHECK(hipMalloc(&d_values_input, values_input.size() * sizeof(value_type))); + if(in_place) + { + d_values_output = d_values_input; + } + else + { + HIP_CHECK(hipMalloc(&d_values_output, values_output.size() * sizeof(value_type))); + } + HIP_CHECK( + hipMemcpy( + d_values_input, values_input.data(), + values_input.size() * sizeof(value_type), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // compare function + compare_function compare_op; + + // Calculate expected results on host + using key_value = std::pair; + std::vector expected(size); + for(size_t i = 0; i < size; i++) + { + expected[i] = key_value(keys_input[i], values_input[i]); + } + std::stable_sort( + expected.begin(), + expected.end(), + [compare_op](const key_value& a, const key_value& b) { return compare_op(a.first, b.first); } + ); + + // temp storage + size_t temp_storage_size_bytes; + void * d_temp_storage = nullptr; + // Get size of d_temp_storage + HIP_CHECK( + rocprim::merge_sort( + d_temp_storage, temp_storage_size_bytes, + d_keys_input, d_keys_output, + d_values_input, d_values_output, keys_input.size(), + compare_op, stream, debug_synchronous + ) + ); + + // temp_storage_size_bytes must be >0 + ASSERT_GT(temp_storage_size_bytes, 0); + + // allocate temporary storage + HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); + HIP_CHECK(hipDeviceSynchronize()); + + // Run + HIP_CHECK( + rocprim::merge_sort( + d_temp_storage, temp_storage_size_bytes, + d_keys_input, d_keys_output, + d_values_input, d_values_output, keys_input.size(), + compare_op, stream, debug_synchronous + ) + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Copy output to host + HIP_CHECK( + hipMemcpy( + keys_output.data(), d_keys_output, + keys_output.size() * sizeof(key_type), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK( + hipMemcpy( + values_output.data(), d_values_output, + values_output.size() * sizeof(value_type), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Check if output values are as expected + std::vector expected_key(expected.size()); + std::vector expected_value(expected.size()); + for(size_t i = 0; i < expected.size(); i++) + { + expected_key[i] = expected[i].first; + expected_value[i] = expected[i].second; + } + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, expected_key)); + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(values_output, expected_value)); + + hipFree(d_keys_input); + hipFree(d_values_input); + if(!in_place) + { + hipFree(d_keys_output); + hipFree(d_values_output); + } + hipFree(d_temp_storage); } - hipFree(d_temp_storage); } + } diff --git a/test/rocprim/test_device_partition.cpp b/test/rocprim/test_device_partition.cpp index 31cd92dba..aea4d70c1 100644 --- a/test/rocprim/test_device_partition.cpp +++ b/test/rocprim/test_device_partition.cpp @@ -72,7 +72,7 @@ typedef ::testing::Types< DevicePartitionParams> > RocprimDevicePartitionTestsParams; -std::vector get_sizes() +std::vector get_sizes(int seed_value) { std::vector sizes = { 2, 32, 64, 256, @@ -81,7 +81,7 @@ std::vector get_sizes() 27845, (1 << 18) + 1111, 1024 * 1024 * 32 }; - const std::vector random_sizes = test_utils::get_random_data(2, 1, 16384); + const std::vector random_sizes = test_utils::get_random_data(2, 1, 16384, seed_value); sizes.insert(sizes.end(), random_sizes.begin(), random_sizes.end()); std::sort(sizes.begin(), sizes.end()); return sizes; @@ -99,137 +99,144 @@ TYPED_TEST(RocprimDevicePartitionTests, Flagged) hipStream_t stream = 0; // default stream - const std::vector sizes = get_sizes(); - for(auto size : sizes) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector input = test_utils::get_random_data(size, 1, 100); - std::vector flags = test_utils::get_random_data01(size, 0.25); - - T * d_input; - F * d_flags; - U * d_output; - unsigned int * d_selected_count_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); - HIP_CHECK(hipMalloc(&d_flags, flags.size() * sizeof(F))); - HIP_CHECK(hipMalloc(&d_output, input.size() * sizeof(U))); - HIP_CHECK(hipMalloc(&d_selected_count_output, sizeof(unsigned int))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - d_flags, flags.data(), - flags.size() * sizeof(F), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Calculate expected_selected and expected_rejected results on host - std::vector expected_selected; - std::vector expected_rejected; - expected_selected.reserve(input.size()/2); - expected_rejected.reserve(input.size()/2); - for(size_t i = 0; i < input.size(); i++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + const std::vector sizes = get_sizes(seed_value); + for(auto size : sizes) { - if(flags[i] != 0) + SCOPED_TRACE(testing::Message() << "with size = " << size); + + // Generate data + std::vector input = test_utils::get_random_data(size, 1, 100, seed_value); + std::vector flags = test_utils::get_random_data01(size, 0.25, seed_value); + + T * d_input; + F * d_flags; + U * d_output; + unsigned int * d_selected_count_output; + HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); + HIP_CHECK(hipMalloc(&d_flags, flags.size() * sizeof(F))); + HIP_CHECK(hipMalloc(&d_output, input.size() * sizeof(U))); + HIP_CHECK(hipMalloc(&d_selected_count_output, sizeof(unsigned int))); + HIP_CHECK( + hipMemcpy( + d_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK( + hipMemcpy( + d_flags, flags.data(), + flags.size() * sizeof(F), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Calculate expected_selected and expected_rejected results on host + std::vector expected_selected; + std::vector expected_rejected; + expected_selected.reserve(input.size()/2); + expected_rejected.reserve(input.size()/2); + for(size_t i = 0; i < input.size(); i++) { - expected_selected.push_back(input[i]); + if(flags[i] != 0) + { + expected_selected.push_back(input[i]); + } + else + { + expected_rejected.push_back(input[i]); + } } - else + std::reverse(expected_rejected.begin(), expected_rejected.end()); + + // temp storage + size_t temp_storage_size_bytes; + // Get size of d_temp_storage + HIP_CHECK( + rocprim::partition( + nullptr, + temp_storage_size_bytes, + d_input, + d_flags, + test_utils::wrap_in_identity_iterator(d_output), + d_selected_count_output, + input.size(), + stream, + debug_synchronous + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // temp_storage_size_bytes must be >0 + ASSERT_GT(temp_storage_size_bytes, 0); + + // allocate temporary storage + void * d_temp_storage = nullptr; + HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); + HIP_CHECK(hipDeviceSynchronize()); + + // Run + HIP_CHECK( + rocprim::partition( + d_temp_storage, + temp_storage_size_bytes, + d_input, + d_flags, + test_utils::wrap_in_identity_iterator(d_output), + d_selected_count_output, + input.size(), + stream, + debug_synchronous + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Check if number of selected value is as expected_selected + unsigned int selected_count_output = 0; + HIP_CHECK( + hipMemcpy( + &selected_count_output, d_selected_count_output, + sizeof(unsigned int), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + ASSERT_EQ(selected_count_output, expected_selected.size()); + + // Check if output values are as expected_selected + std::vector output(input.size()); + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(U), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + std::vector output_rejected; + for(size_t i = 0; i < expected_rejected.size(); i++) { - expected_rejected.push_back(input[i]); + auto j = i + expected_selected.size(); + output_rejected.push_back(output[j]); } + ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(output, expected_selected, expected_selected.size())); + ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(output_rejected, expected_rejected, expected_rejected.size())); + + hipFree(d_input); + hipFree(d_flags); + hipFree(d_output); + hipFree(d_selected_count_output); + hipFree(d_temp_storage); } - std::reverse(expected_rejected.begin(), expected_rejected.end()); - - // temp storage - size_t temp_storage_size_bytes; - // Get size of d_temp_storage - HIP_CHECK( - rocprim::partition( - nullptr, - temp_storage_size_bytes, - d_input, - d_flags, - test_utils::wrap_in_identity_iterator(d_output), - d_selected_count_output, - input.size(), - stream, - debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0); - - // allocate temporary storage - void * d_temp_storage = nullptr; - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - rocprim::partition( - d_temp_storage, - temp_storage_size_bytes, - d_input, - d_flags, - test_utils::wrap_in_identity_iterator(d_output), - d_selected_count_output, - input.size(), - stream, - debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if number of selected value is as expected_selected - unsigned int selected_count_output = 0; - HIP_CHECK( - hipMemcpy( - &selected_count_output, d_selected_count_output, - sizeof(unsigned int), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - ASSERT_EQ(selected_count_output, expected_selected.size()); - - // Check if output values are as expected_selected - std::vector output(input.size()); - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(U), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - std::vector output_rejected; - for(size_t i = 0; i < expected_rejected.size(); i++) - { - auto j = i + expected_selected.size(); - output_rejected.push_back(output[j]); - } - ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(output, expected_selected, expected_selected.size())); - ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(output_rejected, expected_rejected, expected_rejected.size())); - - hipFree(d_input); - hipFree(d_flags); - hipFree(d_output); - hipFree(d_selected_count_output); - hipFree(d_temp_storage); } + } TYPED_TEST(RocprimDevicePartitionTests, PredicateEmptyInput) @@ -337,124 +344,131 @@ TYPED_TEST(RocprimDevicePartitionTests, Predicate) return false; }; - const std::vector sizes = get_sizes(); - for(auto size : sizes) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector input = test_utils::get_random_data(size, 1, 100); - - T * d_input; - U * d_output; - unsigned int * d_selected_count_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); - HIP_CHECK(hipMalloc(&d_output, input.size() * sizeof(U))); - HIP_CHECK(hipMalloc(&d_selected_count_output, sizeof(unsigned int))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Calculate expected_selected and expected_rejected results on host - std::vector expected_selected; - std::vector expected_rejected; - expected_selected.reserve(input.size()/2); - expected_rejected.reserve(input.size()/2); - for(size_t i = 0; i < input.size(); i++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + const std::vector sizes = get_sizes(seed_value); + for(auto size : sizes) { - if(select_op(input[i])) + SCOPED_TRACE(testing::Message() << "with size = " << size); + + // Generate data + std::vector input = test_utils::get_random_data(size, 1, 100, seed_value); + + T * d_input; + U * d_output; + unsigned int * d_selected_count_output; + HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); + HIP_CHECK(hipMalloc(&d_output, input.size() * sizeof(U))); + HIP_CHECK(hipMalloc(&d_selected_count_output, sizeof(unsigned int))); + HIP_CHECK( + hipMemcpy( + d_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Calculate expected_selected and expected_rejected results on host + std::vector expected_selected; + std::vector expected_rejected; + expected_selected.reserve(input.size()/2); + expected_rejected.reserve(input.size()/2); + for(size_t i = 0; i < input.size(); i++) { - expected_selected.push_back(input[i]); + if(select_op(input[i])) + { + expected_selected.push_back(input[i]); + } + else + { + expected_rejected.push_back(input[i]); + } } - else + std::reverse(expected_rejected.begin(), expected_rejected.end()); + + // temp storage + size_t temp_storage_size_bytes; + // Get size of d_temp_storage + HIP_CHECK( + rocprim::partition( + nullptr, + temp_storage_size_bytes, + d_input, + test_utils::wrap_in_identity_iterator(d_output), + d_selected_count_output, + input.size(), + select_op, + stream, + debug_synchronous + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // temp_storage_size_bytes must be >0 + ASSERT_GT(temp_storage_size_bytes, 0); + + // allocate temporary storage + void * d_temp_storage = nullptr; + HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); + HIP_CHECK(hipDeviceSynchronize()); + + // Run + HIP_CHECK( + rocprim::partition( + d_temp_storage, + temp_storage_size_bytes, + d_input, + test_utils::wrap_in_identity_iterator(d_output), + d_selected_count_output, + input.size(), + select_op, + stream, + debug_synchronous + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Check if number of selected value is as expected_selected + unsigned int selected_count_output = 0; + HIP_CHECK( + hipMemcpy( + &selected_count_output, d_selected_count_output, + sizeof(unsigned int), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + ASSERT_EQ(selected_count_output, expected_selected.size()); + + // Check if output values are as expected_selected + std::vector output(input.size()); + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(U), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + std::vector output_rejected; + for(size_t i = 0; i < expected_rejected.size(); i++) { - expected_rejected.push_back(input[i]); + auto j = i + expected_selected.size(); + output_rejected.push_back(output[j]); } - } - std::reverse(expected_rejected.begin(), expected_rejected.end()); - - // temp storage - size_t temp_storage_size_bytes; - // Get size of d_temp_storage - HIP_CHECK( - rocprim::partition( - nullptr, - temp_storage_size_bytes, - d_input, - test_utils::wrap_in_identity_iterator(d_output), - d_selected_count_output, - input.size(), - select_op, - stream, - debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0); - - // allocate temporary storage - void * d_temp_storage = nullptr; - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - rocprim::partition( - d_temp_storage, - temp_storage_size_bytes, - d_input, - test_utils::wrap_in_identity_iterator(d_output), - d_selected_count_output, - input.size(), - select_op, - stream, - debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if number of selected value is as expected_selected - unsigned int selected_count_output = 0; - HIP_CHECK( - hipMemcpy( - &selected_count_output, d_selected_count_output, - sizeof(unsigned int), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - ASSERT_EQ(selected_count_output, expected_selected.size()); - - // Check if output values are as expected_selected - std::vector output(input.size()); - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(U), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - std::vector output_rejected; - for(size_t i = 0; i < expected_rejected.size(); i++) - { - auto j = i + expected_selected.size(); - output_rejected.push_back(output[j]); - } - ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(output, expected_selected, expected_selected.size())); - ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(output_rejected, expected_rejected, expected_rejected.size())); + ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(output, expected_selected, expected_selected.size())); + ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(output_rejected, expected_rejected, expected_rejected.size())); - hipFree(d_input); - hipFree(d_output); - hipFree(d_selected_count_output); - hipFree(d_temp_storage); + hipFree(d_input); + hipFree(d_output); + hipFree(d_selected_count_output); + hipFree(d_temp_storage); + } } + } diff --git a/test/rocprim/test_device_radix_sort.cpp b/test/rocprim/test_device_radix_sort.cpp index 0604d6418..fdc168e23 100644 --- a/test/rocprim/test_device_radix_sort.cpp +++ b/test/rocprim/test_device_radix_sort.cpp @@ -139,10 +139,10 @@ struct key_value_comparator } }; -std::vector get_sizes() +std::vector get_sizes(int seed_value) { std::vector sizes = { 1, 10, 53, 211, 1024, 2345, 4096, 34567, (1 << 16) - 1220, (1 << 23) - 76543 }; - const std::vector random_sizes = test_utils::get_random_data(10, 1, 100000); + const std::vector random_sizes = test_utils::get_random_data(10, 1, 100000, seed_value); sizes.insert(sizes.end(), random_sizes.begin(), random_sizes.end()); return sizes; } @@ -161,111 +161,119 @@ TYPED_TEST(RocprimDeviceRadixSort, SortKeys) bool in_place = false; - for(size_t size : get_sizes()) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - if(size > (1 << 20) && !check_huge_sizes) continue; + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - SCOPED_TRACE(testing::Message() << "with size = " << size); - - in_place = !in_place; - - // Generate data - std::vector keys_input; - if(rp::is_floating_point::value) - { - keys_input = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000); - } - else - { - keys_input = test_utils::get_random_data( - size, - std::numeric_limits::min(), - std::numeric_limits::max() - ); - } - - key_type * d_keys_input; - key_type * d_keys_output; - HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); - if(in_place) - { - d_keys_output = d_keys_input; - } - else - { - HIP_CHECK(hipMalloc(&d_keys_output, size * sizeof(key_type))); - } - HIP_CHECK( - hipMemcpy( - d_keys_input, keys_input.data(), - size * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - - // Calculate expected results on host - std::vector expected(keys_input); - std::stable_sort(expected.begin(), expected.end(), key_comparator()); - - // Use custom config - using config = rp::radix_sort_config<8, 5, rp::kernel_config<256, 3>, rp::kernel_config<256, 8>>; - - size_t temporary_storage_bytes; - HIP_CHECK( - rp::radix_sort_keys( - nullptr, temporary_storage_bytes, - d_keys_input, d_keys_output, size, - start_bit, end_bit - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0); - - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - if(descending) + for(size_t size : get_sizes(seed_value)) { + if(size > (1 << 20) && !check_huge_sizes) continue; + + SCOPED_TRACE(testing::Message() << "with size = " << size); + + in_place = !in_place; + + // Generate data + std::vector keys_input; + if(rp::is_floating_point::value) + { + keys_input = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000, seed_value); + } + else + { + keys_input = test_utils::get_random_data( + size, + std::numeric_limits::min(), + std::numeric_limits::max(), + seed_index + ); + } + + key_type * d_keys_input; + key_type * d_keys_output; + HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); + if(in_place) + { + d_keys_output = d_keys_input; + } + else + { + HIP_CHECK(hipMalloc(&d_keys_output, size * sizeof(key_type))); + } HIP_CHECK( - rp::radix_sort_keys_desc( - d_temporary_storage, temporary_storage_bytes, - d_keys_input, d_keys_output, size, - start_bit, end_bit, - stream, debug_synchronous + hipMemcpy( + d_keys_input, keys_input.data(), + size * sizeof(key_type), + hipMemcpyHostToDevice ) ); - } - else - { + + // Calculate expected results on host + std::vector expected(keys_input); + std::stable_sort(expected.begin(), expected.end(), key_comparator()); + + // Use custom config + using config = rp::radix_sort_config<8, 5, rp::kernel_config<256, 3>, rp::kernel_config<256, 8>>; + + size_t temporary_storage_bytes; HIP_CHECK( rp::radix_sort_keys( - d_temporary_storage, temporary_storage_bytes, + nullptr, temporary_storage_bytes, d_keys_input, d_keys_output, size, - start_bit, end_bit, - stream, debug_synchronous + start_bit, end_bit ) ); - } + ASSERT_GT(temporary_storage_bytes, 0); + + void * d_temporary_storage; + HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); + + if(descending) + { + HIP_CHECK( + rp::radix_sort_keys_desc( + d_temporary_storage, temporary_storage_bytes, + d_keys_input, d_keys_output, size, + start_bit, end_bit, + stream, debug_synchronous + ) + ); + } + else + { + HIP_CHECK( + rp::radix_sort_keys( + d_temporary_storage, temporary_storage_bytes, + d_keys_input, d_keys_output, size, + start_bit, end_bit, + stream, debug_synchronous + ) + ); + } + + + std::vector keys_output(size); + HIP_CHECK( + hipMemcpy( + keys_output.data(), d_keys_output, + size * sizeof(key_type), + hipMemcpyDeviceToHost + ) + ); - std::vector keys_output(size); - HIP_CHECK( - hipMemcpy( - keys_output.data(), d_keys_output, - size * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); + HIP_CHECK(hipFree(d_temporary_storage)); + HIP_CHECK(hipFree(d_keys_input)); + if(!in_place) + { + HIP_CHECK(hipFree(d_keys_output)); + } - HIP_CHECK(hipFree(d_temporary_storage)); - HIP_CHECK(hipFree(d_keys_input)); - if(!in_place) - { - HIP_CHECK(hipFree(d_keys_output)); - } - - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, expected)); + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, expected)); + } } + } TYPED_TEST(RocprimDeviceRadixSort, SortPairs) @@ -283,158 +291,166 @@ TYPED_TEST(RocprimDeviceRadixSort, SortPairs) bool in_place = false; - for(size_t size : get_sizes()) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - if(size > (1 << 20) && !check_huge_sizes) continue; - - SCOPED_TRACE(testing::Message() << "with size = " << size); + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - in_place = !in_place; - - // Generate data - std::vector keys_input; - if(rp::is_floating_point::value) - { - keys_input = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000); - } - else + for(size_t size : get_sizes(seed_value)) { - keys_input = test_utils::get_random_data( - size, - std::numeric_limits::min(), - std::numeric_limits::max() + if(size > (1 << 20) && !check_huge_sizes) continue; + + SCOPED_TRACE(testing::Message() << "with size = " << size); + + in_place = !in_place; + + // Generate data + std::vector keys_input; + if(rp::is_floating_point::value) + { + keys_input = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000, seed_value); + } + else + { + keys_input = test_utils::get_random_data( + size, + std::numeric_limits::min(), + std::numeric_limits::max(), + seed_index + ); + } + + std::vector values_input(size); + std::iota(values_input.begin(), values_input.end(), 0); + + key_type * d_keys_input; + key_type * d_keys_output; + HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); + if(in_place) + { + d_keys_output = d_keys_input; + } + else + { + HIP_CHECK(hipMalloc(&d_keys_output, size * sizeof(key_type))); + } + HIP_CHECK( + hipMemcpy( + d_keys_input, keys_input.data(), + size * sizeof(key_type), + hipMemcpyHostToDevice + ) ); - } - - std::vector values_input(size); - std::iota(values_input.begin(), values_input.end(), 0); - key_type * d_keys_input; - key_type * d_keys_output; - HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); - if(in_place) - { - d_keys_output = d_keys_input; - } - else - { - HIP_CHECK(hipMalloc(&d_keys_output, size * sizeof(key_type))); - } - HIP_CHECK( - hipMemcpy( - d_keys_input, keys_input.data(), - size * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - - value_type * d_values_input; - value_type * d_values_output; - HIP_CHECK(hipMalloc(&d_values_input, size * sizeof(value_type))); - if(in_place) - { - d_values_output = d_values_input; - } - else - { - HIP_CHECK(hipMalloc(&d_values_output, size * sizeof(value_type))); - } - HIP_CHECK( - hipMemcpy( - d_values_input, values_input.data(), - size * sizeof(value_type), - hipMemcpyHostToDevice - ) - ); - - using key_value = std::pair; - - // Calculate expected results on host - std::vector expected(size); - for(size_t i = 0; i < size; i++) - { - expected[i] = key_value(keys_input[i], values_input[i]); - } - std::stable_sort( - expected.begin(), expected.end(), - key_value_comparator() - ); - std::vector keys_expected(size); - std::vector values_expected(size); - for(size_t i = 0; i < size; i++) - { - keys_expected[i] = expected[i].first; - values_expected[i] = expected[i].second; - } - - void * d_temporary_storage = nullptr; - size_t temporary_storage_bytes; - HIP_CHECK( - rp::radix_sort_pairs( - d_temporary_storage, temporary_storage_bytes, - d_keys_input, d_keys_output, d_values_input, d_values_output, size, - start_bit, end_bit - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0); - - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - if(descending) - { + value_type * d_values_input; + value_type * d_values_output; + HIP_CHECK(hipMalloc(&d_values_input, size * sizeof(value_type))); + if(in_place) + { + d_values_output = d_values_input; + } + else + { + HIP_CHECK(hipMalloc(&d_values_output, size * sizeof(value_type))); + } HIP_CHECK( - rp::radix_sort_pairs_desc( - d_temporary_storage, temporary_storage_bytes, - d_keys_input, d_keys_output, d_values_input, d_values_output, size, - start_bit, end_bit, - stream, debug_synchronous + hipMemcpy( + d_values_input, values_input.data(), + size * sizeof(value_type), + hipMemcpyHostToDevice ) ); - } - else - { + + using key_value = std::pair; + + // Calculate expected results on host + std::vector expected(size); + for(size_t i = 0; i < size; i++) + { + expected[i] = key_value(keys_input[i], values_input[i]); + } + std::stable_sort( + expected.begin(), expected.end(), + key_value_comparator() + ); + std::vector keys_expected(size); + std::vector values_expected(size); + for(size_t i = 0; i < size; i++) + { + keys_expected[i] = expected[i].first; + values_expected[i] = expected[i].second; + } + + void * d_temporary_storage = nullptr; + size_t temporary_storage_bytes; HIP_CHECK( rp::radix_sort_pairs( d_temporary_storage, temporary_storage_bytes, d_keys_input, d_keys_output, d_values_input, d_values_output, size, - start_bit, end_bit, - stream, debug_synchronous + start_bit, end_bit ) ); - } + ASSERT_GT(temporary_storage_bytes, 0); + + HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); + + if(descending) + { + HIP_CHECK( + rp::radix_sort_pairs_desc( + d_temporary_storage, temporary_storage_bytes, + d_keys_input, d_keys_output, d_values_input, d_values_output, size, + start_bit, end_bit, + stream, debug_synchronous + ) + ); + } + else + { + HIP_CHECK( + rp::radix_sort_pairs( + d_temporary_storage, temporary_storage_bytes, + d_keys_input, d_keys_output, d_values_input, d_values_output, size, + start_bit, end_bit, + stream, debug_synchronous + ) + ); + } + + + std::vector keys_output(size); + HIP_CHECK( + hipMemcpy( + keys_output.data(), d_keys_output, + size * sizeof(key_type), + hipMemcpyDeviceToHost + ) + ); - std::vector keys_output(size); - HIP_CHECK( - hipMemcpy( - keys_output.data(), d_keys_output, - size * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); - - std::vector values_output(size); - HIP_CHECK( - hipMemcpy( - values_output.data(), d_values_output, - size * sizeof(value_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - HIP_CHECK(hipFree(d_keys_input)); - HIP_CHECK(hipFree(d_values_input)); - if(!in_place) - { - HIP_CHECK(hipFree(d_keys_output)); - HIP_CHECK(hipFree(d_values_output)); - } + std::vector values_output(size); + HIP_CHECK( + hipMemcpy( + values_output.data(), d_values_output, + size * sizeof(value_type), + hipMemcpyDeviceToHost + ) + ); - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, keys_expected)); - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(values_output, values_expected)); + HIP_CHECK(hipFree(d_temporary_storage)); + HIP_CHECK(hipFree(d_keys_input)); + HIP_CHECK(hipFree(d_values_input)); + if(!in_place) + { + HIP_CHECK(hipFree(d_keys_output)); + HIP_CHECK(hipFree(d_values_output)); + } + + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, keys_expected)); + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(values_output, values_expected)); + } } + } TYPED_TEST(RocprimDeviceRadixSort, SortKeysDoubleBuffer) @@ -449,99 +465,107 @@ TYPED_TEST(RocprimDeviceRadixSort, SortKeysDoubleBuffer) const bool debug_synchronous = false; - const std::vector sizes = get_sizes(); - for(size_t size : sizes) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - if(size > (1 << 20) && !check_huge_sizes) continue; + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector keys_input; - if(rp::is_floating_point::value) - { - keys_input = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000); - } - else - { - keys_input = test_utils::get_random_data( - size, - std::numeric_limits::min(), - std::numeric_limits::max() - ); - } - - key_type * d_keys_input; - key_type * d_keys_output; - HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); - HIP_CHECK(hipMalloc(&d_keys_output, size * sizeof(key_type))); - HIP_CHECK( - hipMemcpy( - d_keys_input, keys_input.data(), - size * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - - // Calculate expected results on host - std::vector expected(keys_input); - std::stable_sort(expected.begin(), expected.end(), key_comparator()); - - rp::double_buffer d_keys(d_keys_input, d_keys_output); - - size_t temporary_storage_bytes; - HIP_CHECK( - rp::radix_sort_keys( - nullptr, temporary_storage_bytes, - d_keys, size, - start_bit, end_bit - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0); - - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - if(descending) + const std::vector sizes = get_sizes(seed_value); + for(size_t size : sizes) { + if(size > (1 << 20) && !check_huge_sizes) continue; + + SCOPED_TRACE(testing::Message() << "with size = " << size); + + // Generate data + std::vector keys_input; + if(rp::is_floating_point::value) + { + keys_input = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000, seed_value); + } + else + { + keys_input = test_utils::get_random_data( + size, + std::numeric_limits::min(), + std::numeric_limits::max(), + seed_index + ); + } + + key_type * d_keys_input; + key_type * d_keys_output; + HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); + HIP_CHECK(hipMalloc(&d_keys_output, size * sizeof(key_type))); HIP_CHECK( - rp::radix_sort_keys_desc( - d_temporary_storage, temporary_storage_bytes, - d_keys, size, - start_bit, end_bit, - stream, debug_synchronous + hipMemcpy( + d_keys_input, keys_input.data(), + size * sizeof(key_type), + hipMemcpyHostToDevice ) ); - } - else - { + + // Calculate expected results on host + std::vector expected(keys_input); + std::stable_sort(expected.begin(), expected.end(), key_comparator()); + + rp::double_buffer d_keys(d_keys_input, d_keys_output); + + size_t temporary_storage_bytes; HIP_CHECK( rp::radix_sort_keys( - d_temporary_storage, temporary_storage_bytes, + nullptr, temporary_storage_bytes, d_keys, size, - start_bit, end_bit, - stream, debug_synchronous + start_bit, end_bit ) ); - } - HIP_CHECK(hipFree(d_temporary_storage)); - - std::vector keys_output(size); - HIP_CHECK( - hipMemcpy( - keys_output.data(), d_keys.current(), - size * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); + ASSERT_GT(temporary_storage_bytes, 0); + + void * d_temporary_storage; + HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); + + if(descending) + { + HIP_CHECK( + rp::radix_sort_keys_desc( + d_temporary_storage, temporary_storage_bytes, + d_keys, size, + start_bit, end_bit, + stream, debug_synchronous + ) + ); + } + else + { + HIP_CHECK( + rp::radix_sort_keys( + d_temporary_storage, temporary_storage_bytes, + d_keys, size, + start_bit, end_bit, + stream, debug_synchronous + ) + ); + } + + HIP_CHECK(hipFree(d_temporary_storage)); + + std::vector keys_output(size); + HIP_CHECK( + hipMemcpy( + keys_output.data(), d_keys.current(), + size * sizeof(key_type), + hipMemcpyDeviceToHost + ) + ); - HIP_CHECK(hipFree(d_keys_input)); - HIP_CHECK(hipFree(d_keys_output)); + HIP_CHECK(hipFree(d_keys_input)); + HIP_CHECK(hipFree(d_keys_output)); - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, expected)); + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, expected)); + } } + } TYPED_TEST(RocprimDeviceRadixSort, SortPairsDoubleBuffer) @@ -557,141 +581,149 @@ TYPED_TEST(RocprimDeviceRadixSort, SortPairsDoubleBuffer) const bool debug_synchronous = false; - const std::vector sizes = get_sizes(); - for(size_t size : sizes) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - if(size > (1 << 20) && !check_huge_sizes) continue; + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector keys_input; - if(rp::is_floating_point::value) - { - keys_input = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000); - } - else + const std::vector sizes = get_sizes(seed_value); + for(size_t size : sizes) { - keys_input = test_utils::get_random_data( - size, - std::numeric_limits::min(), - std::numeric_limits::max() + if(size > (1 << 20) && !check_huge_sizes) continue; + + SCOPED_TRACE(testing::Message() << "with size = " << size); + + // Generate data + std::vector keys_input; + if(rp::is_floating_point::value) + { + keys_input = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000, seed_value); + } + else + { + keys_input = test_utils::get_random_data( + size, + std::numeric_limits::min(), + std::numeric_limits::max(), + seed_index + ); + } + + std::vector values_input(size); + std::iota(values_input.begin(), values_input.end(), 0); + + key_type * d_keys_input; + key_type * d_keys_output; + HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); + HIP_CHECK(hipMalloc(&d_keys_output, size * sizeof(key_type))); + HIP_CHECK( + hipMemcpy( + d_keys_input, keys_input.data(), + size * sizeof(key_type), + hipMemcpyHostToDevice + ) ); - } - - std::vector values_input(size); - std::iota(values_input.begin(), values_input.end(), 0); - - key_type * d_keys_input; - key_type * d_keys_output; - HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); - HIP_CHECK(hipMalloc(&d_keys_output, size * sizeof(key_type))); - HIP_CHECK( - hipMemcpy( - d_keys_input, keys_input.data(), - size * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - - value_type * d_values_input; - value_type * d_values_output; - HIP_CHECK(hipMalloc(&d_values_input, size * sizeof(value_type))); - HIP_CHECK(hipMalloc(&d_values_output, size * sizeof(value_type))); - HIP_CHECK( - hipMemcpy( - d_values_input, values_input.data(), - size * sizeof(value_type), - hipMemcpyHostToDevice - ) - ); - - using key_value = std::pair; - - // Calculate expected results on host - std::vector expected(size); - for(size_t i = 0; i < size; i++) - { - expected[i] = key_value(keys_input[i], values_input[i]); - } - std::stable_sort( - expected.begin(), expected.end(), - key_value_comparator() - ); - std::vector keys_expected(size); - std::vector values_expected(size); - for(size_t i = 0; i < size; i++) - { - keys_expected[i] = expected[i].first; - values_expected[i] = expected[i].second; - } - - rp::double_buffer d_keys(d_keys_input, d_keys_output); - rp::double_buffer d_values(d_values_input, d_values_output); - void * d_temporary_storage = nullptr; - size_t temporary_storage_bytes; - HIP_CHECK( - rp::radix_sort_pairs( - d_temporary_storage, temporary_storage_bytes, - d_keys, d_values, size, - start_bit, end_bit - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0); - - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - if(descending) - { + value_type * d_values_input; + value_type * d_values_output; + HIP_CHECK(hipMalloc(&d_values_input, size * sizeof(value_type))); + HIP_CHECK(hipMalloc(&d_values_output, size * sizeof(value_type))); HIP_CHECK( - rp::radix_sort_pairs_desc( - d_temporary_storage, temporary_storage_bytes, - d_keys, d_values, size, - start_bit, end_bit, - stream, debug_synchronous + hipMemcpy( + d_values_input, values_input.data(), + size * sizeof(value_type), + hipMemcpyHostToDevice ) ); - } - else - { + + using key_value = std::pair; + + // Calculate expected results on host + std::vector expected(size); + for(size_t i = 0; i < size; i++) + { + expected[i] = key_value(keys_input[i], values_input[i]); + } + std::stable_sort( + expected.begin(), expected.end(), + key_value_comparator() + ); + std::vector keys_expected(size); + std::vector values_expected(size); + for(size_t i = 0; i < size; i++) + { + keys_expected[i] = expected[i].first; + values_expected[i] = expected[i].second; + } + + rp::double_buffer d_keys(d_keys_input, d_keys_output); + rp::double_buffer d_values(d_values_input, d_values_output); + + void * d_temporary_storage = nullptr; + size_t temporary_storage_bytes; HIP_CHECK( rp::radix_sort_pairs( d_temporary_storage, temporary_storage_bytes, d_keys, d_values, size, - start_bit, end_bit, - stream, debug_synchronous + start_bit, end_bit + ) + ); + + ASSERT_GT(temporary_storage_bytes, 0); + + HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); + + if(descending) + { + HIP_CHECK( + rp::radix_sort_pairs_desc( + d_temporary_storage, temporary_storage_bytes, + d_keys, d_values, size, + start_bit, end_bit, + stream, debug_synchronous + ) + ); + } + else + { + HIP_CHECK( + rp::radix_sort_pairs( + d_temporary_storage, temporary_storage_bytes, + d_keys, d_values, size, + start_bit, end_bit, + stream, debug_synchronous + ) + ); + } + + HIP_CHECK(hipFree(d_temporary_storage)); + + std::vector keys_output(size); + HIP_CHECK( + hipMemcpy( + keys_output.data(), d_keys.current(), + size * sizeof(key_type), + hipMemcpyDeviceToHost ) ); - } - HIP_CHECK(hipFree(d_temporary_storage)); - - std::vector keys_output(size); - HIP_CHECK( - hipMemcpy( - keys_output.data(), d_keys.current(), - size * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); - - std::vector values_output(size); - HIP_CHECK( - hipMemcpy( - values_output.data(), d_values.current(), - size * sizeof(value_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_keys_input)); - HIP_CHECK(hipFree(d_keys_output)); - HIP_CHECK(hipFree(d_values_input)); - HIP_CHECK(hipFree(d_values_output)); - - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, keys_expected)); - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(values_output, values_expected)); + std::vector values_output(size); + HIP_CHECK( + hipMemcpy( + values_output.data(), d_values.current(), + size * sizeof(value_type), + hipMemcpyDeviceToHost + ) + ); + + HIP_CHECK(hipFree(d_keys_input)); + HIP_CHECK(hipFree(d_keys_output)); + HIP_CHECK(hipFree(d_values_input)); + HIP_CHECK(hipFree(d_values_output)); + + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, keys_expected)); + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(values_output, values_expected)); + } } + } diff --git a/test/rocprim/test_device_reduce.cpp b/test/rocprim/test_device_reduce.cpp index 5ea5c038e..dc460c999 100644 --- a/test/rocprim/test_device_reduce.cpp +++ b/test/rocprim/test_device_reduce.cpp @@ -79,14 +79,14 @@ typedef ::testing::Types< DeviceReduceParams, test_utils::custom_test_type> > RocprimDeviceReduceTestsParams; -std::vector get_sizes() +std::vector get_sizes(int seed_value) { std::vector sizes = { 1, 10, 53, 211, 1024, 2048, 5096, 34567, (1 << 17) - 1220 }; - const std::vector random_sizes = test_utils::get_random_data(2, 1, 16384); + const std::vector random_sizes = test_utils::get_random_data(2, 1, 16384, seed_value); sizes.insert(sizes.end(), random_sizes.begin(), random_sizes.end()); std::sort(sizes.begin(), sizes.end()); return sizes; @@ -156,95 +156,102 @@ TYPED_TEST(RocprimDeviceReduceTests, Reduce) const bool debug_synchronous = TestFixture::debug_synchronous; static constexpr bool use_identity_iterator = TestFixture::use_identity_iterator; - const std::vector sizes = get_sizes(); - for(auto size : sizes) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - hipStream_t stream = 0; // default + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // precision of half differs between host and device with large plus reductions - if(std::is_same::value && size >= 1024) - { - break; - } - - // Generate data - std::vector input = test_utils::get_random_data(size, 1, 100); - std::vector output(1, 0); - - // reduce function - binary_op_type plus_op; - - T * d_input; - U * d_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); - HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(U))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Calculate expected results on host - U expected = U(0); - for(unsigned int i = 0; i < input.size(); i++) + const std::vector sizes = get_sizes(seed_value); + for(auto size : sizes) { - expected = plus_op(expected, input[i]); + hipStream_t stream = 0; // default + + SCOPED_TRACE(testing::Message() << "with size = " << size); + + // precision of half differs between host and device with large plus reductions + if(std::is_same::value && size >= 1024) + { + break; + } + + // Generate data + std::vector input = test_utils::get_random_data(size, 1, 100, seed_value); + std::vector output(1, 0); + + // reduce function + binary_op_type plus_op; + + T * d_input; + U * d_output; + HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); + HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(U))); + HIP_CHECK( + hipMemcpy( + d_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Calculate expected results on host + U expected = U(0); + for(unsigned int i = 0; i < input.size(); i++) + { + expected = plus_op(expected, input[i]); + } + + // temp storage + size_t temp_storage_size_bytes; + void * d_temp_storage = nullptr; + // Get size of d_temp_storage + HIP_CHECK( + rocprim::reduce( + d_temp_storage, temp_storage_size_bytes, + d_input, + test_utils::wrap_in_identity_iterator(d_output), + input.size(), rp::plus(), stream, debug_synchronous + ) + ); + + // temp_storage_size_bytes must be >0 + ASSERT_GT(temp_storage_size_bytes, 0); + + // allocate temporary storage + HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); + HIP_CHECK(hipDeviceSynchronize()); + + // Run + HIP_CHECK( + rocprim::reduce( + d_temp_storage, temp_storage_size_bytes, + d_input, + test_utils::wrap_in_identity_iterator(d_output), + input.size(), rp::plus(), stream, debug_synchronous + ) + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Copy output to host + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(U), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Check if output values are as expected + ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output[0], expected, 0.01f)); + + hipFree(d_input); + hipFree(d_output); + hipFree(d_temp_storage); } - - // temp storage - size_t temp_storage_size_bytes; - void * d_temp_storage = nullptr; - // Get size of d_temp_storage - HIP_CHECK( - rocprim::reduce( - d_temp_storage, temp_storage_size_bytes, - d_input, - test_utils::wrap_in_identity_iterator(d_output), - input.size(), rp::plus(), stream, debug_synchronous - ) - ); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0); - - // allocate temporary storage - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - rocprim::reduce( - d_temp_storage, temp_storage_size_bytes, - d_input, - test_utils::wrap_in_identity_iterator(d_output), - input.size(), rp::plus(), stream, debug_synchronous - ) - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Copy output to host - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(U), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if output values are as expected - ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output[0], expected, 0.01f)); - - hipFree(d_input); - hipFree(d_output); - hipFree(d_temp_storage); } + } TYPED_TEST(RocprimDeviceReduceTests, ReduceMinimum) @@ -255,89 +262,96 @@ TYPED_TEST(RocprimDeviceReduceTests, ReduceMinimum) const bool debug_synchronous = TestFixture::debug_synchronous; static constexpr bool use_identity_iterator = TestFixture::use_identity_iterator; - const std::vector sizes = get_sizes(); - for(auto size : sizes) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - hipStream_t stream = 0; // default - - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector input = test_utils::get_random_data(size, 1, 100); - std::vector output(1, 0); - - T * d_input; - U * d_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); - HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(U))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // reduce function - binary_op_type min_op; - - // Calculate expected results on host - U expected = U(test_utils::numeric_limits::max()); - for(unsigned int i = 0; i < input.size(); i++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + const std::vector sizes = get_sizes(seed_value); + for(auto size : sizes) { - expected = min_op(expected, input[i]); + hipStream_t stream = 0; // default + + SCOPED_TRACE(testing::Message() << "with size = " << size); + + // Generate data + std::vector input = test_utils::get_random_data(size, 1, 100, seed_value); + std::vector output(1, 0); + + T * d_input; + U * d_output; + HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); + HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(U))); + HIP_CHECK( + hipMemcpy( + d_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // reduce function + binary_op_type min_op; + + // Calculate expected results on host + U expected = U(test_utils::numeric_limits::max()); + for(unsigned int i = 0; i < input.size(); i++) + { + expected = min_op(expected, input[i]); + } + + // temp storage + size_t temp_storage_size_bytes; + void * d_temp_storage = nullptr; + // Get size of d_temp_storage + HIP_CHECK( + rocprim::reduce( + d_temp_storage, temp_storage_size_bytes, + d_input, + test_utils::wrap_in_identity_iterator(d_output), + test_utils::numeric_limits::max(), input.size(), rp::minimum(), stream, debug_synchronous + ) + ); + + // temp_storage_size_bytes must be >0 + ASSERT_GT(temp_storage_size_bytes, 0); + + // allocate temporary storage + HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); + HIP_CHECK(hipDeviceSynchronize()); + + // Run + HIP_CHECK( + rocprim::reduce( + d_temp_storage, temp_storage_size_bytes, + d_input, + test_utils::wrap_in_identity_iterator(d_output), + test_utils::numeric_limits::max(), input.size(), rp::minimum(), stream, debug_synchronous + ) + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Copy output to host + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(U), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Check if output values are as expected + ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output[0], expected, 0.01f)); + + hipFree(d_input); + hipFree(d_output); + hipFree(d_temp_storage); } - - // temp storage - size_t temp_storage_size_bytes; - void * d_temp_storage = nullptr; - // Get size of d_temp_storage - HIP_CHECK( - rocprim::reduce( - d_temp_storage, temp_storage_size_bytes, - d_input, - test_utils::wrap_in_identity_iterator(d_output), - test_utils::numeric_limits::max(), input.size(), rp::minimum(), stream, debug_synchronous - ) - ); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0); - - // allocate temporary storage - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - rocprim::reduce( - d_temp_storage, temp_storage_size_bytes, - d_input, - test_utils::wrap_in_identity_iterator(d_output), - test_utils::numeric_limits::max(), input.size(), rp::minimum(), stream, debug_synchronous - ) - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Copy output to host - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(U), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if output values are as expected - ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output[0], expected, 0.01f)); - - hipFree(d_input); - hipFree(d_output); - hipFree(d_temp_storage); } + } template< @@ -379,93 +393,100 @@ TYPED_TEST(RocprimDeviceReduceTests, ReduceArgMinimum) const bool debug_synchronous = TestFixture::debug_synchronous; static constexpr bool use_identity_iterator = TestFixture::use_identity_iterator; - const std::vector sizes = get_sizes(); - for(auto size : sizes) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - hipStream_t stream = 0; // default - - SCOPED_TRACE(testing::Message() << "with size = " << size); + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - // Generate data - std::vector input(size); - for (size_t i = 0; i < size; i++) + const std::vector sizes = get_sizes(seed_value); + for(auto size : sizes) { - input[i].key = i; - input[i].value = test_utils::get_random_value(1, 100); + hipStream_t stream = 0; // default + + SCOPED_TRACE(testing::Message() << "with size = " << size); + + // Generate data + std::vector input(size); + for (size_t i = 0; i < size; i++) + { + input[i].key = i; + input[i].value = test_utils::get_random_value(1, 100, seed_value); + } + std::vector output(1); + + key_value * d_input; + key_value * d_output; + HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(key_value))); + HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(key_value))); + HIP_CHECK( + hipMemcpy( + d_input, input.data(), + input.size() * sizeof(key_value), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + arg_min reduce_op; + const key_value max(std::numeric_limits::max(), test_utils::numeric_limits::max()); + + // Calculate expected results on host + key_value expected = max; + for(unsigned int i = 0; i < input.size(); i++) + { + expected = reduce_op(expected, input[i]); + } + + // temp storage + size_t temp_storage_size_bytes; + void * d_temp_storage = nullptr; + // Get size of d_temp_storage + HIP_CHECK( + rocprim::reduce( + d_temp_storage, temp_storage_size_bytes, + d_input, + test_utils::wrap_in_identity_iterator(d_output), + max, input.size(), reduce_op, stream, debug_synchronous + ) + ); + + // temp_storage_size_bytes must be >0 + ASSERT_GT(temp_storage_size_bytes, 0); + + // allocate temporary storage + HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); + HIP_CHECK(hipDeviceSynchronize()); + + // Run + HIP_CHECK( + rocprim::reduce( + d_temp_storage, temp_storage_size_bytes, + d_input, + test_utils::wrap_in_identity_iterator(d_output), + max, input.size(), reduce_op, stream, debug_synchronous + ) + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Copy output to host + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(key_value), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Check if output values are as expected + ASSERT_EQ(output[0].key, expected.key); + ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output[0].value, expected.value, 0.01f)); + + hipFree(d_input); + hipFree(d_output); + hipFree(d_temp_storage); } - std::vector output(1); - - key_value * d_input; - key_value * d_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(key_value))); - HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(key_value))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(key_value), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - arg_min reduce_op; - const key_value max(std::numeric_limits::max(), test_utils::numeric_limits::max()); - - // Calculate expected results on host - key_value expected = max; - for(unsigned int i = 0; i < input.size(); i++) - { - expected = reduce_op(expected, input[i]); - } - - // temp storage - size_t temp_storage_size_bytes; - void * d_temp_storage = nullptr; - // Get size of d_temp_storage - HIP_CHECK( - rocprim::reduce( - d_temp_storage, temp_storage_size_bytes, - d_input, - test_utils::wrap_in_identity_iterator(d_output), - max, input.size(), reduce_op, stream, debug_synchronous - ) - ); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0); - - // allocate temporary storage - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - rocprim::reduce( - d_temp_storage, temp_storage_size_bytes, - d_input, - test_utils::wrap_in_identity_iterator(d_output), - max, input.size(), reduce_op, stream, debug_synchronous - ) - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Copy output to host - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(key_value), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if output values are as expected - ASSERT_EQ(output[0].key, expected.key); - ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output[0].value, expected.value, 0.01f)); - - hipFree(d_input); - hipFree(d_output); - hipFree(d_temp_storage); } + } diff --git a/test/rocprim/test_device_reduce_by_key.cpp b/test/rocprim/test_device_reduce_by_key.cpp index e7a99fac7..782ccb68a 100644 --- a/test/rocprim/test_device_reduce_by_key.cpp +++ b/test/rocprim/test_device_reduce_by_key.cpp @@ -116,7 +116,7 @@ typedef ::testing::Types< TYPED_TEST_CASE(RocprimDeviceReduceByKey, Params); -std::vector get_sizes() +std::vector get_sizes(int seed_value) { std::vector sizes = { 1024, 2048, 4096, 1792, @@ -125,7 +125,7 @@ std::vector get_sizes() 100000, (1 << 16) - 1220, (1 << 23) - 76543 }; - const std::vector random_sizes = test_utils::get_random_data(10, 1, 100000); + const std::vector random_sizes = test_utils::get_random_data(10, 1, 100000, seed_value); sizes.insert(sizes.end(), random_sizes.begin(), random_sizes.end()); return sizes; } @@ -153,165 +153,172 @@ TYPED_TEST(RocprimDeviceReduceByKey, ReduceByKey) const unsigned int seed = 123; std::default_random_engine gen(seed); - for(size_t size : get_sizes()) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - SCOPED_TRACE(testing::Message() << "with size = " << size); + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - hipStream_t stream = 0; // default + for(size_t size : get_sizes(seed_value)) + { + SCOPED_TRACE(testing::Message() << "with size = " << size); - const bool use_unique_keys = bool(test_utils::get_random_value(0, 1)); + hipStream_t stream = 0; // default - // Generate data and calculate expected results - std::vector unique_expected; - std::vector aggregates_expected; - size_t unique_count_expected = 0; + const bool use_unique_keys = bool(test_utils::get_random_value(0, 1, seed_value)); - std::vector keys_input(size); - key_distribution_type key_delta_dis(1, 5); - std::uniform_int_distribution key_count_dis( - TestFixture::params::min_segment_length, - TestFixture::params::max_segment_length - ); - std::vector values_input = test_utils::get_random_data(size, 0, 100); + // Generate data and calculate expected results + std::vector unique_expected; + std::vector aggregates_expected; + size_t unique_count_expected = 0; - size_t offset = 0; - key_type prev_key = key_distribution_type(0, 100)(gen); - key_type current_key = prev_key + key_delta_dis(gen); - while(offset < size) - { - const size_t key_count = key_count_dis(gen); + std::vector keys_input(size); + key_distribution_type key_delta_dis(1, 5); + std::uniform_int_distribution key_count_dis( + TestFixture::params::min_segment_length, + TestFixture::params::max_segment_length + ); + std::vector values_input = test_utils::get_random_data(size, 0, 100, seed_value); - const size_t end = std::min(size, offset + key_count); - for(size_t i = offset; i < end; i++) - { - keys_input[i] = current_key; - } - aggregate_type aggregate = values_input[offset]; - for(size_t i = offset + 1; i < end; i++) + size_t offset = 0; + key_type prev_key = key_distribution_type(0, 100)(gen); + key_type current_key = prev_key + key_delta_dis(gen); + while(offset < size) { - aggregate = reduce_op(aggregate, static_cast(values_input[i])); + const size_t key_count = key_count_dis(gen); + + const size_t end = std::min(size, offset + key_count); + for(size_t i = offset; i < end; i++) + { + keys_input[i] = current_key; + } + aggregate_type aggregate = values_input[offset]; + for(size_t i = offset + 1; i < end; i++) + { + aggregate = reduce_op(aggregate, static_cast(values_input[i])); + } + + // The first key of the segment must be written into unique + // (it may differ from other keys in case of custom key compraison operators) + if(unique_count_expected == 0 || !key_compare_op(prev_key, current_key)) + { + unique_expected.push_back(current_key); + unique_count_expected++; + aggregates_expected.push_back(aggregate); + } + else + { + aggregates_expected.back() = reduce_op(aggregates_expected.back(), aggregate); + } + + if (use_unique_keys) + { + prev_key = current_key; + // e.g. 1,1,1,2,5,5,8,8,8 + current_key = current_key + key_delta_dis(gen); + } + else + { + // e.g. 1,1,5,1,5,5,5,1 + std::swap(current_key, prev_key); + } + + offset += key_count; } - // The first key of the segment must be written into unique - // (it may differ from other keys in case of custom key compraison operators) - if(unique_count_expected == 0 || !key_compare_op(prev_key, current_key)) - { - unique_expected.push_back(current_key); - unique_count_expected++; - aggregates_expected.push_back(aggregate); - } - else - { - aggregates_expected.back() = reduce_op(aggregates_expected.back(), aggregate); - } - - if (use_unique_keys) - { - prev_key = current_key; - // e.g. 1,1,1,2,5,5,8,8,8 - current_key = current_key + key_delta_dis(gen); - } - else - { - // e.g. 1,1,5,1,5,5,5,1 - std::swap(current_key, prev_key); - } - - offset += key_count; + key_type * d_keys_input; + value_type * d_values_input; + HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); + HIP_CHECK(hipMalloc(&d_values_input, size * sizeof(value_type))); + HIP_CHECK( + hipMemcpy( + d_keys_input, keys_input.data(), + size * sizeof(key_type), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK( + hipMemcpy( + d_values_input, values_input.data(), + size * sizeof(value_type), + hipMemcpyHostToDevice + ) + ); + + key_type * d_unique_output; + aggregate_type * d_aggregates_output; + unsigned int * d_unique_count_output; + HIP_CHECK(hipMalloc(&d_unique_output, unique_count_expected * sizeof(key_type))); + HIP_CHECK(hipMalloc(&d_aggregates_output, unique_count_expected * sizeof(aggregate_type))); + HIP_CHECK(hipMalloc(&d_unique_count_output, sizeof(unsigned int))); + + size_t temporary_storage_bytes; + + HIP_CHECK( + rp::reduce_by_key( + nullptr, temporary_storage_bytes, + d_keys_input, d_values_input, size, + test_utils::wrap_in_identity_iterator(d_unique_output), + test_utils::wrap_in_identity_iterator(d_aggregates_output), + d_unique_count_output, + reduce_op, key_compare_op, + stream, debug_synchronous + ) + ); + + ASSERT_GT(temporary_storage_bytes, 0); + + void * d_temporary_storage; + HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); + + HIP_CHECK( + rp::reduce_by_key( + d_temporary_storage, temporary_storage_bytes, + d_keys_input, d_values_input, size, + d_unique_output, d_aggregates_output, + d_unique_count_output, + reduce_op, key_compare_op, + stream, debug_synchronous + ) + ); + + HIP_CHECK(hipFree(d_temporary_storage)); + + std::vector unique_output(unique_count_expected); + std::vector aggregates_output(unique_count_expected); + std::vector unique_count_output(1); + HIP_CHECK( + hipMemcpy( + unique_output.data(), d_unique_output, + unique_count_expected * sizeof(key_type), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK( + hipMemcpy( + aggregates_output.data(), d_aggregates_output, + unique_count_expected * sizeof(aggregate_type), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK( + hipMemcpy( + unique_count_output.data(), d_unique_count_output, + sizeof(unsigned int), + hipMemcpyDeviceToHost + ) + ); + + HIP_CHECK(hipFree(d_keys_input)); + HIP_CHECK(hipFree(d_values_input)); + HIP_CHECK(hipFree(d_unique_output)); + HIP_CHECK(hipFree(d_aggregates_output)); + HIP_CHECK(hipFree(d_unique_count_output)); + + ASSERT_EQ(unique_count_output[0], unique_count_expected); + + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(unique_output, unique_expected)); + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(aggregates_output, aggregates_expected)); } - - key_type * d_keys_input; - value_type * d_values_input; - HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); - HIP_CHECK(hipMalloc(&d_values_input, size * sizeof(value_type))); - HIP_CHECK( - hipMemcpy( - d_keys_input, keys_input.data(), - size * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - d_values_input, values_input.data(), - size * sizeof(value_type), - hipMemcpyHostToDevice - ) - ); - - key_type * d_unique_output; - aggregate_type * d_aggregates_output; - unsigned int * d_unique_count_output; - HIP_CHECK(hipMalloc(&d_unique_output, unique_count_expected * sizeof(key_type))); - HIP_CHECK(hipMalloc(&d_aggregates_output, unique_count_expected * sizeof(aggregate_type))); - HIP_CHECK(hipMalloc(&d_unique_count_output, sizeof(unsigned int))); - - size_t temporary_storage_bytes; - - HIP_CHECK( - rp::reduce_by_key( - nullptr, temporary_storage_bytes, - d_keys_input, d_values_input, size, - test_utils::wrap_in_identity_iterator(d_unique_output), - test_utils::wrap_in_identity_iterator(d_aggregates_output), - d_unique_count_output, - reduce_op, key_compare_op, - stream, debug_synchronous - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0); - - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - HIP_CHECK( - rp::reduce_by_key( - d_temporary_storage, temporary_storage_bytes, - d_keys_input, d_values_input, size, - d_unique_output, d_aggregates_output, - d_unique_count_output, - reduce_op, key_compare_op, - stream, debug_synchronous - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - - std::vector unique_output(unique_count_expected); - std::vector aggregates_output(unique_count_expected); - std::vector unique_count_output(1); - HIP_CHECK( - hipMemcpy( - unique_output.data(), d_unique_output, - unique_count_expected * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK( - hipMemcpy( - aggregates_output.data(), d_aggregates_output, - unique_count_expected * sizeof(aggregate_type), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK( - hipMemcpy( - unique_count_output.data(), d_unique_count_output, - sizeof(unsigned int), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_keys_input)); - HIP_CHECK(hipFree(d_values_input)); - HIP_CHECK(hipFree(d_unique_output)); - HIP_CHECK(hipFree(d_aggregates_output)); - HIP_CHECK(hipFree(d_unique_count_output)); - - ASSERT_EQ(unique_count_output[0], unique_count_expected); - - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(unique_output, unique_expected)); - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(aggregates_output, aggregates_expected)); } + } diff --git a/test/rocprim/test_device_run_length_encode.cpp b/test/rocprim/test_device_run_length_encode.cpp index 090712b21..af9dd3f04 100644 --- a/test/rocprim/test_device_run_length_encode.cpp +++ b/test/rocprim/test_device_run_length_encode.cpp @@ -89,7 +89,7 @@ typedef ::testing::Types< TYPED_TEST_CASE(RocprimDeviceRunLengthEncode, Params); -std::vector get_sizes() +std::vector get_sizes(int seed_value) { std::vector sizes = { 1024, 2048, 4096, 1792, @@ -98,7 +98,7 @@ std::vector get_sizes() 100000, (1 << 16) - 1220, (1 << 21) - 76543 }; - const std::vector random_sizes = test_utils::get_random_data(5, 1, 100000); + const std::vector random_sizes = test_utils::get_random_data(5, 1, 100000, seed_value); sizes.insert(sizes.end(), random_sizes.begin(), random_sizes.end()); return sizes; } @@ -120,133 +120,140 @@ TYPED_TEST(RocprimDeviceRunLengthEncode, Encode) const unsigned int seed = 123; std::default_random_engine gen(seed); - for(size_t size : get_sizes()) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - SCOPED_TRACE(testing::Message() << "with size = " << size); - - hipStream_t stream = 0; // default - - // Generate data and calculate expected results - std::vector unique_expected; - std::vector counts_expected; - size_t runs_count_expected = 0; - - std::vector input(size); - key_distribution_type key_delta_dis(1, 5); - std::uniform_int_distribution key_count_dis( - TestFixture::params::min_segment_length, - TestFixture::params::max_segment_length - ); - std::vector values_input = test_utils::get_random_data(size, 0, 100); - - size_t offset = 0; - key_type current_key = key_distribution_type(0, 100)(gen); - while(offset < size) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + for(size_t size : get_sizes(seed_value)) { - size_t key_count = key_count_dis(gen); - current_key = current_key + key_delta_dis(gen); + SCOPED_TRACE(testing::Message() << "with size = " << size); + + hipStream_t stream = 0; // default + + // Generate data and calculate expected results + std::vector unique_expected; + std::vector counts_expected; + size_t runs_count_expected = 0; - const size_t end = std::min(size, offset + key_count); - key_count = end - offset; - for(size_t i = offset; i < end; i++) + std::vector input(size); + key_distribution_type key_delta_dis(1, 5); + std::uniform_int_distribution key_count_dis( + TestFixture::params::min_segment_length, + TestFixture::params::max_segment_length + ); + std::vector values_input = test_utils::get_random_data(size, 0, 100, seed_value); + + size_t offset = 0; + key_type current_key = key_distribution_type(0, 100)(gen); + while(offset < size) { - input[i] = current_key; + size_t key_count = key_count_dis(gen); + current_key = current_key + key_delta_dis(gen); + + const size_t end = std::min(size, offset + key_count); + key_count = end - offset; + for(size_t i = offset; i < end; i++) + { + input[i] = current_key; + } + + unique_expected.push_back(current_key); + runs_count_expected++; + counts_expected.push_back(key_count); + + offset += key_count; } - unique_expected.push_back(current_key); - runs_count_expected++; - counts_expected.push_back(key_count); + key_type * d_input; + HIP_CHECK(hipMalloc(&d_input, size * sizeof(key_type))); + HIP_CHECK( + hipMemcpy( + d_input, input.data(), + size * sizeof(key_type), + hipMemcpyHostToDevice + ) + ); + + key_type * d_unique_output; + count_type * d_counts_output; + count_type * d_runs_count_output; + HIP_CHECK(hipMalloc(&d_unique_output, runs_count_expected * sizeof(key_type))); + HIP_CHECK(hipMalloc(&d_counts_output, runs_count_expected * sizeof(count_type))); + HIP_CHECK(hipMalloc(&d_runs_count_output, sizeof(count_type))); - offset += key_count; - } + size_t temporary_storage_bytes = 0; + + HIP_CHECK( + rocprim::run_length_encode( + nullptr, temporary_storage_bytes, + d_input, size, + test_utils::wrap_in_identity_iterator(d_unique_output), + test_utils::wrap_in_identity_iterator(d_counts_output), + test_utils::wrap_in_identity_iterator(d_runs_count_output), + stream, debug_synchronous + ) + ); + + ASSERT_GT(temporary_storage_bytes, 0U); + + void * d_temporary_storage; + HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - key_type * d_input; - HIP_CHECK(hipMalloc(&d_input, size * sizeof(key_type))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - size * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - - key_type * d_unique_output; - count_type * d_counts_output; - count_type * d_runs_count_output; - HIP_CHECK(hipMalloc(&d_unique_output, runs_count_expected * sizeof(key_type))); - HIP_CHECK(hipMalloc(&d_counts_output, runs_count_expected * sizeof(count_type))); - HIP_CHECK(hipMalloc(&d_runs_count_output, sizeof(count_type))); - - size_t temporary_storage_bytes = 0; - - HIP_CHECK( - rocprim::run_length_encode( - nullptr, temporary_storage_bytes, - d_input, size, - test_utils::wrap_in_identity_iterator(d_unique_output), - test_utils::wrap_in_identity_iterator(d_counts_output), - test_utils::wrap_in_identity_iterator(d_runs_count_output), - stream, debug_synchronous - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0U); - - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - HIP_CHECK( - rocprim::run_length_encode( - d_temporary_storage, temporary_storage_bytes, - d_input, size, - test_utils::wrap_in_identity_iterator(d_unique_output), - test_utils::wrap_in_identity_iterator(d_counts_output), - test_utils::wrap_in_identity_iterator(d_runs_count_output), - stream, debug_synchronous - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - - std::vector unique_output(runs_count_expected); - std::vector counts_output(runs_count_expected); - std::vector runs_count_output(1); - HIP_CHECK( - hipMemcpy( - unique_output.data(), d_unique_output, - runs_count_expected * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK( - hipMemcpy( - counts_output.data(), d_counts_output, - runs_count_expected * sizeof(count_type), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK( - hipMemcpy( - runs_count_output.data(), d_runs_count_output, - sizeof(count_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_input)); - HIP_CHECK(hipFree(d_unique_output)); - HIP_CHECK(hipFree(d_counts_output)); - HIP_CHECK(hipFree(d_runs_count_output)); - - // Validating results - - std::vector runs_count_expected_2; - runs_count_expected_2.push_back(static_cast(runs_count_expected)); - test_utils::custom_assert_eq(runs_count_output, runs_count_expected_2, 1); - - ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(unique_output, unique_expected, runs_count_expected)); - ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(counts_output, counts_expected, runs_count_expected)); + HIP_CHECK( + rocprim::run_length_encode( + d_temporary_storage, temporary_storage_bytes, + d_input, size, + test_utils::wrap_in_identity_iterator(d_unique_output), + test_utils::wrap_in_identity_iterator(d_counts_output), + test_utils::wrap_in_identity_iterator(d_runs_count_output), + stream, debug_synchronous + ) + ); + + HIP_CHECK(hipFree(d_temporary_storage)); + + std::vector unique_output(runs_count_expected); + std::vector counts_output(runs_count_expected); + std::vector runs_count_output(1); + HIP_CHECK( + hipMemcpy( + unique_output.data(), d_unique_output, + runs_count_expected * sizeof(key_type), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK( + hipMemcpy( + counts_output.data(), d_counts_output, + runs_count_expected * sizeof(count_type), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK( + hipMemcpy( + runs_count_output.data(), d_runs_count_output, + sizeof(count_type), + hipMemcpyDeviceToHost + ) + ); + + HIP_CHECK(hipFree(d_input)); + HIP_CHECK(hipFree(d_unique_output)); + HIP_CHECK(hipFree(d_counts_output)); + HIP_CHECK(hipFree(d_runs_count_output)); + + // Validating results + + std::vector runs_count_expected_2; + runs_count_expected_2.push_back(static_cast(runs_count_expected)); + test_utils::custom_assert_eq(runs_count_output, runs_count_expected_2, 1); + + ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(unique_output, unique_expected, runs_count_expected)); + ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(counts_output, counts_expected, runs_count_expected)); + } } + } TYPED_TEST(RocprimDeviceRunLengthEncode, NonTrivialRuns) @@ -267,147 +274,154 @@ TYPED_TEST(RocprimDeviceRunLengthEncode, NonTrivialRuns) const unsigned int seed = 123; std::default_random_engine gen(seed); - for(size_t size : get_sizes()) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - SCOPED_TRACE(testing::Message() << "with size = " << size); - - hipStream_t stream = 0; // default - - // Generate data and calculate expected results - std::vector offsets_expected; - std::vector counts_expected; - size_t runs_count_expected = 0; - - std::vector input(size); - key_distribution_type key_delta_dis(1, 5); - std::uniform_int_distribution key_count_dis( - TestFixture::params::min_segment_length, - TestFixture::params::max_segment_length - ); - std::bernoulli_distribution is_trivial_dis(0.1); - std::vector values_input = test_utils::get_random_data(size, 0, 100); - - size_t offset = 0; - key_type current_key = key_distribution_type(0, 100)(gen); - while(offset < size) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + for(size_t size : get_sizes(seed_value)) { - size_t key_count; - if(TestFixture::params::min_segment_length == 1 && is_trivial_dis(gen)) - { - // Increased probability of trivial runs for long segments - key_count = 1; - } - else - { - key_count = key_count_dis(gen); - } - current_key = current_key + key_delta_dis(gen); + SCOPED_TRACE(testing::Message() << "with size = " << size); - const size_t end = std::min(size, offset + key_count); - key_count = end - offset; - for(size_t i = offset; i < end; i++) - { - input[i] = current_key; - } + hipStream_t stream = 0; // default + + // Generate data and calculate expected results + std::vector offsets_expected; + std::vector counts_expected; + size_t runs_count_expected = 0; + + std::vector input(size); + key_distribution_type key_delta_dis(1, 5); + std::uniform_int_distribution key_count_dis( + TestFixture::params::min_segment_length, + TestFixture::params::max_segment_length + ); + std::bernoulli_distribution is_trivial_dis(0.1); + std::vector values_input = test_utils::get_random_data(size, 0, 100, seed_value); - if(key_count > 1) + size_t offset = 0; + key_type current_key = key_distribution_type(0, 100)(gen); + while(offset < size) { - offsets_expected.push_back(offset); - runs_count_expected++; - counts_expected.push_back(key_count); + size_t key_count; + if(TestFixture::params::min_segment_length == 1 && is_trivial_dis(gen)) + { + // Increased probability of trivial runs for long segments + key_count = 1; + } + else + { + key_count = key_count_dis(gen); + } + current_key = current_key + key_delta_dis(gen); + + const size_t end = std::min(size, offset + key_count); + key_count = end - offset; + for(size_t i = offset; i < end; i++) + { + input[i] = current_key; + } + + if(key_count > 1) + { + offsets_expected.push_back(offset); + runs_count_expected++; + counts_expected.push_back(key_count); + } + + offset += key_count; } - offset += key_count; - } - - key_type * d_input; - HIP_CHECK(hipMalloc(&d_input, size * sizeof(key_type))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - size * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - - offset_type * d_offsets_output; - count_type * d_counts_output; - count_type * d_runs_count_output; - HIP_CHECK(hipMalloc(&d_offsets_output, std::max(1, runs_count_expected) * sizeof(offset_type))); - HIP_CHECK(hipMalloc(&d_counts_output, std::max(1, runs_count_expected) * sizeof(count_type))); - HIP_CHECK(hipMalloc(&d_runs_count_output, sizeof(count_type))); - - size_t temporary_storage_bytes = 0; - - HIP_CHECK( - rocprim::run_length_encode_non_trivial_runs( - nullptr, temporary_storage_bytes, - d_input, size, - test_utils::wrap_in_identity_iterator(d_offsets_output), - test_utils::wrap_in_identity_iterator(d_counts_output), - test_utils::wrap_in_identity_iterator(d_runs_count_output), - stream, debug_synchronous - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0U); - - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - HIP_CHECK( - rocprim::run_length_encode_non_trivial_runs( - d_temporary_storage, temporary_storage_bytes, - d_input, size, - test_utils::wrap_in_identity_iterator(d_offsets_output), - test_utils::wrap_in_identity_iterator(d_counts_output), - test_utils::wrap_in_identity_iterator(d_runs_count_output), - stream, debug_synchronous - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - - std::vector offsets_output(runs_count_expected); - std::vector counts_output(runs_count_expected); - std::vector runs_count_output(1); - if(runs_count_expected > 0) - { + key_type * d_input; + HIP_CHECK(hipMalloc(&d_input, size * sizeof(key_type))); HIP_CHECK( hipMemcpy( - offsets_output.data(), d_offsets_output, - runs_count_expected * sizeof(offset_type), - hipMemcpyDeviceToHost + d_input, input.data(), + size * sizeof(key_type), + hipMemcpyHostToDevice ) ); + + offset_type * d_offsets_output; + count_type * d_counts_output; + count_type * d_runs_count_output; + HIP_CHECK(hipMalloc(&d_offsets_output, std::max(1, runs_count_expected) * sizeof(offset_type))); + HIP_CHECK(hipMalloc(&d_counts_output, std::max(1, runs_count_expected) * sizeof(count_type))); + HIP_CHECK(hipMalloc(&d_runs_count_output, sizeof(count_type))); + + size_t temporary_storage_bytes = 0; + + HIP_CHECK( + rocprim::run_length_encode_non_trivial_runs( + nullptr, temporary_storage_bytes, + d_input, size, + test_utils::wrap_in_identity_iterator(d_offsets_output), + test_utils::wrap_in_identity_iterator(d_counts_output), + test_utils::wrap_in_identity_iterator(d_runs_count_output), + stream, debug_synchronous + ) + ); + + ASSERT_GT(temporary_storage_bytes, 0U); + + void * d_temporary_storage; + HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); + + HIP_CHECK( + rocprim::run_length_encode_non_trivial_runs( + d_temporary_storage, temporary_storage_bytes, + d_input, size, + test_utils::wrap_in_identity_iterator(d_offsets_output), + test_utils::wrap_in_identity_iterator(d_counts_output), + test_utils::wrap_in_identity_iterator(d_runs_count_output), + stream, debug_synchronous + ) + ); + + HIP_CHECK(hipFree(d_temporary_storage)); + + std::vector offsets_output(runs_count_expected); + std::vector counts_output(runs_count_expected); + std::vector runs_count_output(1); + if(runs_count_expected > 0) + { + HIP_CHECK( + hipMemcpy( + offsets_output.data(), d_offsets_output, + runs_count_expected * sizeof(offset_type), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK( + hipMemcpy( + counts_output.data(), d_counts_output, + runs_count_expected * sizeof(count_type), + hipMemcpyDeviceToHost + ) + ); + } HIP_CHECK( hipMemcpy( - counts_output.data(), d_counts_output, - runs_count_expected * sizeof(count_type), + runs_count_output.data(), d_runs_count_output, + sizeof(count_type), hipMemcpyDeviceToHost ) ); + + HIP_CHECK(hipFree(d_input)); + HIP_CHECK(hipFree(d_offsets_output)); + HIP_CHECK(hipFree(d_counts_output)); + HIP_CHECK(hipFree(d_runs_count_output)); + + // Validating results + + std::vector runs_count_expected_2; + runs_count_expected_2.push_back(static_cast(runs_count_expected)); + test_utils::custom_assert_eq(runs_count_output, runs_count_expected_2, 1); + + ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(offsets_output, offsets_expected, runs_count_expected)); + ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(counts_output, counts_expected, runs_count_expected)); } - HIP_CHECK( - hipMemcpy( - runs_count_output.data(), d_runs_count_output, - sizeof(count_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_input)); - HIP_CHECK(hipFree(d_offsets_output)); - HIP_CHECK(hipFree(d_counts_output)); - HIP_CHECK(hipFree(d_runs_count_output)); - - // Validating results - - std::vector runs_count_expected_2; - runs_count_expected_2.push_back(static_cast(runs_count_expected)); - test_utils::custom_assert_eq(runs_count_output, runs_count_expected_2, 1); - - ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(offsets_output, offsets_expected, runs_count_expected)); - ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(counts_output, counts_expected, runs_count_expected)); } + } diff --git a/test/rocprim/test_device_scan.cpp b/test/rocprim/test_device_scan.cpp index 767ba7c39..d42976c9d 100644 --- a/test/rocprim/test_device_scan.cpp +++ b/test/rocprim/test_device_scan.cpp @@ -104,7 +104,7 @@ typedef ::testing::Types< DeviceScanParams > > RocprimDeviceScanTestsParams; -std::vector get_sizes() +std::vector get_sizes(int seed_value) { std::vector sizes = { 1, 10, 53, 211, @@ -112,7 +112,7 @@ std::vector get_sizes() 34567, (1 << 18), (1 << 20) - 12345 }; - const std::vector random_sizes = test_utils::get_random_data(3, 1, 100000); + const std::vector random_sizes = test_utils::get_random_data(3, 1, 100000, seed_value); sizes.insert(sizes.end(), random_sizes.begin(), random_sizes.end()); std::sort(sizes.begin(), sizes.end()); return sizes; @@ -181,89 +181,96 @@ TYPED_TEST(RocprimDeviceScanTests, InclusiveScan) const bool debug_synchronous = TestFixture::debug_synchronous; static constexpr bool use_identity_iterator = TestFixture::use_identity_iterator; - const std::vector sizes = get_sizes(); - for(auto size : sizes) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - hipStream_t stream = 0; // default - - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector input = test_utils::get_random_data(size, 1, 10); - std::vector output(input.size(), 0); - - T * d_input; - U * d_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); - HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(U))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // scan function - scan_op_type scan_op; - - // Calculate expected results on host - std::vector expected(input.size()); - test_utils::host_inclusive_scan( - input.begin(), input.end(), - expected.begin(), scan_op - ); - - // temp storage - size_t temp_storage_size_bytes; - void * d_temp_storage = nullptr; - // Get size of d_temp_storage - HIP_CHECK( - rocprim::inclusive_scan( - d_temp_storage, temp_storage_size_bytes, - d_input, - test_utils::wrap_in_identity_iterator(d_output), - input.size(), scan_op, stream, debug_synchronous - ) - ); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0); - - // allocate temporary storage - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - rocprim::inclusive_scan( - d_temp_storage, temp_storage_size_bytes, - d_input, - test_utils::wrap_in_identity_iterator(d_output), - input.size(), scan_op, stream, debug_synchronous - ) - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Copy output to host - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(U), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if output values are as expected - ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output, expected, 0.01f)); - - hipFree(d_input); - hipFree(d_output); - hipFree(d_temp_storage); + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + const std::vector sizes = get_sizes(seed_value); + for(auto size : sizes) + { + hipStream_t stream = 0; // default + + SCOPED_TRACE(testing::Message() << "with size = " << size); + + // Generate data + std::vector input = test_utils::get_random_data(size, 1, 10, seed_value); + std::vector output(input.size(), 0); + + T * d_input; + U * d_output; + HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); + HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(U))); + HIP_CHECK( + hipMemcpy( + d_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // scan function + scan_op_type scan_op; + + // Calculate expected results on host + std::vector expected(input.size()); + test_utils::host_inclusive_scan( + input.begin(), input.end(), + expected.begin(), scan_op + ); + + // temp storage + size_t temp_storage_size_bytes; + void * d_temp_storage = nullptr; + // Get size of d_temp_storage + HIP_CHECK( + rocprim::inclusive_scan( + d_temp_storage, temp_storage_size_bytes, + d_input, + test_utils::wrap_in_identity_iterator(d_output), + input.size(), scan_op, stream, debug_synchronous + ) + ); + + // temp_storage_size_bytes must be >0 + ASSERT_GT(temp_storage_size_bytes, 0); + + // allocate temporary storage + HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); + HIP_CHECK(hipDeviceSynchronize()); + + // Run + HIP_CHECK( + rocprim::inclusive_scan( + d_temp_storage, temp_storage_size_bytes, + d_input, + test_utils::wrap_in_identity_iterator(d_output), + input.size(), scan_op, stream, debug_synchronous + ) + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Copy output to host + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(U), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Check if output values are as expected + ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output, expected, 0.01f)); + + hipFree(d_input); + hipFree(d_output); + hipFree(d_temp_storage); + } } + } TYPED_TEST(RocprimDeviceScanTests, ExclusiveScan) @@ -274,91 +281,98 @@ TYPED_TEST(RocprimDeviceScanTests, ExclusiveScan) const bool debug_synchronous = TestFixture::debug_synchronous; static constexpr bool use_identity_iterator = TestFixture::use_identity_iterator; - const std::vector sizes = get_sizes(); - for(auto size : sizes) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - hipStream_t stream = 0; // default - - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector input = test_utils::get_random_data(size, 1, 10); - std::vector output(input.size()); - - T * d_input; - U * d_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); - HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(U))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // scan function - scan_op_type scan_op; - - // Calculate expected results on host - std::vector expected(input.size()); - T initial_value = test_utils::get_random_value(1, 10); - test_utils::host_exclusive_scan( - input.begin(), input.end(), - initial_value, expected.begin(), - scan_op - ); - - // temp storage - size_t temp_storage_size_bytes; - void * d_temp_storage = nullptr; - // Get size of d_temp_storage - HIP_CHECK( - rocprim::exclusive_scan( - d_temp_storage, temp_storage_size_bytes, - d_input, - test_utils::wrap_in_identity_iterator(d_output), - initial_value, input.size(), scan_op, stream, debug_synchronous - ) - ); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0); - - // allocate temporary storage - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - rocprim::exclusive_scan( - d_temp_storage, temp_storage_size_bytes, - d_input, - test_utils::wrap_in_identity_iterator(d_output), - initial_value, input.size(), scan_op, stream, debug_synchronous - ) - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Copy output to host - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(U), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if output values are as expected - ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output, expected, 0.01f)); - - hipFree(d_input); - hipFree(d_output); - hipFree(d_temp_storage); + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + const std::vector sizes = get_sizes(seed_value); + for(auto size : sizes) + { + hipStream_t stream = 0; // default + + SCOPED_TRACE(testing::Message() << "with size = " << size); + + // Generate data + std::vector input = test_utils::get_random_data(size, 1, 10, seed_value); + std::vector output(input.size()); + + T * d_input; + U * d_output; + HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); + HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(U))); + HIP_CHECK( + hipMemcpy( + d_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // scan function + scan_op_type scan_op; + + // Calculate expected results on host + std::vector expected(input.size()); + T initial_value = test_utils::get_random_value(1, 10, seed_value); + test_utils::host_exclusive_scan( + input.begin(), input.end(), + initial_value, expected.begin(), + scan_op + ); + + // temp storage + size_t temp_storage_size_bytes; + void * d_temp_storage = nullptr; + // Get size of d_temp_storage + HIP_CHECK( + rocprim::exclusive_scan( + d_temp_storage, temp_storage_size_bytes, + d_input, + test_utils::wrap_in_identity_iterator(d_output), + initial_value, input.size(), scan_op, stream, debug_synchronous + ) + ); + + // temp_storage_size_bytes must be >0 + ASSERT_GT(temp_storage_size_bytes, 0); + + // allocate temporary storage + HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); + HIP_CHECK(hipDeviceSynchronize()); + + // Run + HIP_CHECK( + rocprim::exclusive_scan( + d_temp_storage, temp_storage_size_bytes, + d_input, + test_utils::wrap_in_identity_iterator(d_output), + initial_value, input.size(), scan_op, stream, debug_synchronous + ) + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Copy output to host + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(U), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Check if output values are as expected + ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output, expected, 0.01f)); + + hipFree(d_input); + hipFree(d_output); + hipFree(d_temp_storage); + } } + } TYPED_TEST(RocprimDeviceScanTests, InclusiveScanByKey) @@ -370,131 +384,138 @@ TYPED_TEST(RocprimDeviceScanTests, InclusiveScanByKey) using scan_op_type = typename TestFixture::scan_op_type; const bool debug_synchronous = TestFixture::debug_synchronous; - const std::vector sizes = get_sizes(); - for(auto size : sizes) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - hipStream_t stream = 0; // default + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - SCOPED_TRACE(testing::Message() << "with size = " << size); + const std::vector sizes = get_sizes(seed_value); + for(auto size : sizes) + { + hipStream_t stream = 0; // default - const bool use_unique_keys = bool(test_utils::get_random_value(0, 1)); + SCOPED_TRACE(testing::Message() << "with size = " << size); - // Generate data - std::vector input = test_utils::get_random_data(size, 0, 9); - std::vector keys; - if(use_unique_keys) - { - keys = test_utils::get_random_data(size, 0, 16); - std::sort(keys.begin(), keys.end()); - } - else - { - keys = test_utils::get_random_data(size, 0, 3); - } - std::vector output(input.size(), 0); - - T * d_input; - K * d_keys; - U * d_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); - HIP_CHECK(hipMalloc(&d_keys, keys.size() * sizeof(K))); - HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(U))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - d_keys, keys.data(), - keys.size() * sizeof(K), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // scan function - scan_op_type scan_op; - // key compare function - rocprim::equal_to keys_compare_op; - - // Calculate expected results on host - std::vector expected(input.size()); - test_utils::host_inclusive_scan( - rocprim::make_zip_iterator( - rocprim::make_tuple(input.begin(), keys.begin()) - ), - rocprim::make_zip_iterator( - rocprim::make_tuple(input.end(), keys.end()) - ), - rocprim::make_zip_iterator( - rocprim::make_tuple(expected.begin(), rocprim::make_discard_iterator()) - ), - [scan_op, keys_compare_op](const rocprim::tuple& t1, - const rocprim::tuple& t2) - -> rocprim::tuple + const bool use_unique_keys = bool(test_utils::get_random_value(0, 1, seed_value)); + + // Generate data + std::vector input = test_utils::get_random_data(size, 0, 9, seed_value); + std::vector keys; + if(use_unique_keys) + { + keys = test_utils::get_random_data(size, 0, 16, seed_value); + std::sort(keys.begin(), keys.end()); + } + else { - if(keys_compare_op(rocprim::get<1>(t1), rocprim::get<1>(t2))) + keys = test_utils::get_random_data(size, 0, 3, seed_value); + } + std::vector output(input.size(), 0); + + T * d_input; + K * d_keys; + U * d_output; + HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); + HIP_CHECK(hipMalloc(&d_keys, keys.size() * sizeof(K))); + HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(U))); + HIP_CHECK( + hipMemcpy( + d_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK( + hipMemcpy( + d_keys, keys.data(), + keys.size() * sizeof(K), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // scan function + scan_op_type scan_op; + // key compare function + rocprim::equal_to keys_compare_op; + + // Calculate expected results on host + std::vector expected(input.size()); + test_utils::host_inclusive_scan( + rocprim::make_zip_iterator( + rocprim::make_tuple(input.begin(), keys.begin()) + ), + rocprim::make_zip_iterator( + rocprim::make_tuple(input.end(), keys.end()) + ), + rocprim::make_zip_iterator( + rocprim::make_tuple(expected.begin(), rocprim::make_discard_iterator()) + ), + [scan_op, keys_compare_op](const rocprim::tuple& t1, + const rocprim::tuple& t2) + -> rocprim::tuple { - return rocprim::make_tuple( - scan_op(rocprim::get<0>(t1), rocprim::get<0>(t2)), - rocprim::get<1>(t2) - ); + if(keys_compare_op(rocprim::get<1>(t1), rocprim::get<1>(t2))) + { + return rocprim::make_tuple( + scan_op(rocprim::get<0>(t1), rocprim::get<0>(t2)), + rocprim::get<1>(t2) + ); + } + return t2; } - return t2; - } - ); - - // temp storage - size_t temp_storage_size_bytes; - void * d_temp_storage = nullptr; - // Get size of d_temp_storage - HIP_CHECK( - rocprim::inclusive_scan_by_key( - d_temp_storage, temp_storage_size_bytes, - d_keys, d_input, d_output, input.size(), - scan_op, keys_compare_op, stream, debug_synchronous - ) - ); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0); - - // allocate temporary storage - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - rocprim::inclusive_scan_by_key( - d_temp_storage, temp_storage_size_bytes, - d_keys, d_input, d_output, input.size(), - scan_op, keys_compare_op, stream, debug_synchronous - ) - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Copy output to host - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(U), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if output values are as expected - ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output, expected, 0.01f)); - - hipFree(d_keys); - hipFree(d_input); - hipFree(d_output); - hipFree(d_temp_storage); + ); + + // temp storage + size_t temp_storage_size_bytes; + void * d_temp_storage = nullptr; + // Get size of d_temp_storage + HIP_CHECK( + rocprim::inclusive_scan_by_key( + d_temp_storage, temp_storage_size_bytes, + d_keys, d_input, d_output, input.size(), + scan_op, keys_compare_op, stream, debug_synchronous + ) + ); + + // temp_storage_size_bytes must be >0 + ASSERT_GT(temp_storage_size_bytes, 0); + + // allocate temporary storage + HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); + HIP_CHECK(hipDeviceSynchronize()); + + // Run + HIP_CHECK( + rocprim::inclusive_scan_by_key( + d_temp_storage, temp_storage_size_bytes, + d_keys, d_input, d_output, input.size(), + scan_op, keys_compare_op, stream, debug_synchronous + ) + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Copy output to host + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(U), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Check if output values are as expected + ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output, expected, 0.01f)); + + hipFree(d_keys); + hipFree(d_input); + hipFree(d_output); + hipFree(d_temp_storage); + } } + } TYPED_TEST(RocprimDeviceScanTests, ExclusiveScanByKey) @@ -506,111 +527,118 @@ TYPED_TEST(RocprimDeviceScanTests, ExclusiveScanByKey) using scan_op_type = typename TestFixture::scan_op_type; const bool debug_synchronous = TestFixture::debug_synchronous; - const std::vector sizes = get_sizes(); - for(auto size : sizes) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - hipStream_t stream = 0; // default + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - SCOPED_TRACE(testing::Message() << "with size = " << size); + const std::vector sizes = get_sizes(seed_value); + for(auto size : sizes) + { + hipStream_t stream = 0; // default - const bool use_unique_keys = bool(test_utils::get_random_value(0, 1)); + SCOPED_TRACE(testing::Message() << "with size = " << size); - // Generate data - T initial_value = test_utils::get_random_value(1, 100); - std::vector input = test_utils::get_random_data(size, 0, 9); - std::vector keys; - if(use_unique_keys) - { - keys = test_utils::get_random_data(size, 0, 16); - std::sort(keys.begin(), keys.end()); - } - else - { - keys = test_utils::get_random_data(size, 0, 3); + const bool use_unique_keys = bool(test_utils::get_random_value(0, 1, seed_value)); + + // Generate data + T initial_value = test_utils::get_random_value(1, 100, seed_value); + std::vector input = test_utils::get_random_data(size, 0, 9, seed_value); + std::vector keys; + if(use_unique_keys) + { + keys = test_utils::get_random_data(size, 0, 16, seed_value); + std::sort(keys.begin(), keys.end()); + } + else + { + keys = test_utils::get_random_data(size, 0, 3, seed_value); + } + std::vector output(input.size(), 0); + + T * d_input; + K * d_keys; + U * d_output; + HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); + HIP_CHECK(hipMalloc(&d_keys, keys.size() * sizeof(K))); + HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(U))); + HIP_CHECK( + hipMemcpy( + d_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK( + hipMemcpy( + d_keys, keys.data(), + keys.size() * sizeof(K), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // scan function + scan_op_type scan_op; + // key compare function + rocprim::equal_to keys_compare_op; + + // Calculate expected results on host + std::vector expected(input.size()); + test_utils::host_exclusive_scan_by_key( + input.begin(), input.end(), keys.begin(), + initial_value, expected.begin(), + scan_op, keys_compare_op + ); + + // temp storage + size_t temp_storage_size_bytes; + void * d_temp_storage = nullptr; + // Get size of d_temp_storage + HIP_CHECK( + rocprim::exclusive_scan_by_key( + d_temp_storage, temp_storage_size_bytes, + d_keys, d_input, d_output, initial_value, input.size(), + scan_op, keys_compare_op, stream, debug_synchronous + ) + ); + + // temp_storage_size_bytes must be >0 + ASSERT_GT(temp_storage_size_bytes, 0); + + // allocate temporary storage + HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); + HIP_CHECK(hipDeviceSynchronize()); + + // Run + HIP_CHECK( + rocprim::exclusive_scan_by_key( + d_temp_storage, temp_storage_size_bytes, + d_keys, d_input, d_output, initial_value, input.size(), + scan_op, keys_compare_op, stream, debug_synchronous + ) + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Copy output to host + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(U), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Check if output values are as expected + ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output, expected, 0.01f)); + + hipFree(d_keys); + hipFree(d_input); + hipFree(d_output); + hipFree(d_temp_storage); } - std::vector output(input.size(), 0); - - T * d_input; - K * d_keys; - U * d_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); - HIP_CHECK(hipMalloc(&d_keys, keys.size() * sizeof(K))); - HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(U))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - d_keys, keys.data(), - keys.size() * sizeof(K), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // scan function - scan_op_type scan_op; - // key compare function - rocprim::equal_to keys_compare_op; - - // Calculate expected results on host - std::vector expected(input.size()); - test_utils::host_exclusive_scan_by_key( - input.begin(), input.end(), keys.begin(), - initial_value, expected.begin(), - scan_op, keys_compare_op - ); - - // temp storage - size_t temp_storage_size_bytes; - void * d_temp_storage = nullptr; - // Get size of d_temp_storage - HIP_CHECK( - rocprim::exclusive_scan_by_key( - d_temp_storage, temp_storage_size_bytes, - d_keys, d_input, d_output, initial_value, input.size(), - scan_op, keys_compare_op, stream, debug_synchronous - ) - ); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0); - - // allocate temporary storage - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - rocprim::exclusive_scan_by_key( - d_temp_storage, temp_storage_size_bytes, - d_keys, d_input, d_output, initial_value, input.size(), - scan_op, keys_compare_op, stream, debug_synchronous - ) - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Copy output to host - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(U), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if output values are as expected - ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output, expected, 0.01f)); - - hipFree(d_keys); - hipFree(d_input); - hipFree(d_output); - hipFree(d_temp_storage); } + } diff --git a/test/rocprim/test_device_segmented_radix_sort.cpp b/test/rocprim/test_device_segmented_radix_sort.cpp index cbe3e45fa..6dc2d607b 100644 --- a/test/rocprim/test_device_segmented_radix_sort.cpp +++ b/test/rocprim/test_device_segmented_radix_sort.cpp @@ -139,7 +139,7 @@ struct key_value_comparator } }; -std::vector get_sizes() +std::vector get_sizes(int seed_value) { std::vector sizes = { 1024, 2048, 4096, 1792, @@ -148,7 +148,7 @@ std::vector get_sizes() 1000000, (1 << 16) - 1220 }; - const std::vector random_sizes = test_utils::get_random_data(5, 1, 100000); + const std::vector random_sizes = test_utils::get_random_data(5, 1, 100000, seed_value); sizes.insert(sizes.end(), random_sizes.begin(), random_sizes.end()); return sizes; } @@ -174,127 +174,135 @@ TYPED_TEST(RocprimDeviceSegmentedRadixSort, SortKeys) TestFixture::params::max_segment_length ); - const std::vector sizes = get_sizes(); - for(size_t size : sizes) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - SCOPED_TRACE(testing::Message() << "with size = " << size); + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - // Generate data - std::vector keys_input; - if(rp::is_floating_point::value) + const std::vector sizes = get_sizes(seed_value); + for(size_t size : sizes) { - keys_input = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000); - } - else - { - keys_input = test_utils::get_random_data( - size, - std::numeric_limits::min(), - std::numeric_limits::max() + SCOPED_TRACE(testing::Message() << "with size = " << size); + + // Generate data + std::vector keys_input; + if(rp::is_floating_point::value) + { + keys_input = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000, seed_value); + } + else + { + keys_input = test_utils::get_random_data( + size, + std::numeric_limits::min(), + std::numeric_limits::max(), + seed_index + ); + } + + std::vector offsets; + unsigned int segments_count = 0; + size_t offset = 0; + while(offset < size) + { + const size_t segment_length = segment_length_dis(gen); + offsets.push_back(offset); + segments_count++; + offset += segment_length; + } + offsets.push_back(size); + + key_type * d_keys_input; + key_type * d_keys_output; + HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); + HIP_CHECK(hipMalloc(&d_keys_output, size * sizeof(key_type))); + HIP_CHECK( + hipMemcpy( + d_keys_input, keys_input.data(), + size * sizeof(key_type), + hipMemcpyHostToDevice + ) ); - } - std::vector offsets; - unsigned int segments_count = 0; - size_t offset = 0; - while(offset < size) - { - const size_t segment_length = segment_length_dis(gen); - offsets.push_back(offset); - segments_count++; - offset += segment_length; - } - offsets.push_back(size); - - key_type * d_keys_input; - key_type * d_keys_output; - HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); - HIP_CHECK(hipMalloc(&d_keys_output, size * sizeof(key_type))); - HIP_CHECK( - hipMemcpy( - d_keys_input, keys_input.data(), - size * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - - offset_type * d_offsets; - HIP_CHECK(hipMalloc(&d_offsets, (segments_count + 1) * sizeof(offset_type))); - HIP_CHECK( - hipMemcpy( - d_offsets, offsets.data(), - (segments_count + 1) * sizeof(offset_type), - hipMemcpyHostToDevice - ) - ); - - // Calculate expected results on host - std::vector expected(keys_input); - for(size_t i = 0; i < segments_count; i++) - { - std::stable_sort( - expected.begin() + offsets[i], - expected.begin() + offsets[i + 1], - key_comparator() + offset_type * d_offsets; + HIP_CHECK(hipMalloc(&d_offsets, (segments_count + 1) * sizeof(offset_type))); + HIP_CHECK( + hipMemcpy( + d_offsets, offsets.data(), + (segments_count + 1) * sizeof(offset_type), + hipMemcpyHostToDevice + ) ); - } - - size_t temporary_storage_bytes = 0; - HIP_CHECK( - rp::segmented_radix_sort_keys( - nullptr, temporary_storage_bytes, - d_keys_input, d_keys_output, size, - segments_count, d_offsets, d_offsets + 1, - start_bit, end_bit - ) - ); - ASSERT_GT(temporary_storage_bytes, 0U); - - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - if(descending) - { + // Calculate expected results on host + std::vector expected(keys_input); + for(size_t i = 0; i < segments_count; i++) + { + std::stable_sort( + expected.begin() + offsets[i], + expected.begin() + offsets[i + 1], + key_comparator() + ); + } + + size_t temporary_storage_bytes = 0; HIP_CHECK( - rp::segmented_radix_sort_keys_desc( - d_temporary_storage, temporary_storage_bytes, + rp::segmented_radix_sort_keys( + nullptr, temporary_storage_bytes, d_keys_input, d_keys_output, size, segments_count, d_offsets, d_offsets + 1, - start_bit, end_bit, - stream, debug_synchronous + start_bit, end_bit ) ); - } - else - { + + ASSERT_GT(temporary_storage_bytes, 0U); + + void * d_temporary_storage; + HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); + + if(descending) + { + HIP_CHECK( + rp::segmented_radix_sort_keys_desc( + d_temporary_storage, temporary_storage_bytes, + d_keys_input, d_keys_output, size, + segments_count, d_offsets, d_offsets + 1, + start_bit, end_bit, + stream, debug_synchronous + ) + ); + } + else + { + HIP_CHECK( + rp::segmented_radix_sort_keys( + d_temporary_storage, temporary_storage_bytes, + d_keys_input, d_keys_output, size, + segments_count, d_offsets, d_offsets + 1, + start_bit, end_bit, + stream, debug_synchronous + ) + ); + } + + std::vector keys_output(size); HIP_CHECK( - rp::segmented_radix_sort_keys( - d_temporary_storage, temporary_storage_bytes, - d_keys_input, d_keys_output, size, - segments_count, d_offsets, d_offsets + 1, - start_bit, end_bit, - stream, debug_synchronous + hipMemcpy( + keys_output.data(), d_keys_output, + size * sizeof(key_type), + hipMemcpyDeviceToHost ) ); - } - std::vector keys_output(size); - HIP_CHECK( - hipMemcpy( - keys_output.data(), d_keys_output, - size * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - HIP_CHECK(hipFree(d_keys_input)); - HIP_CHECK(hipFree(d_keys_output)); - HIP_CHECK(hipFree(d_offsets)); - - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, expected)); + HIP_CHECK(hipFree(d_temporary_storage)); + HIP_CHECK(hipFree(d_keys_input)); + HIP_CHECK(hipFree(d_keys_output)); + HIP_CHECK(hipFree(d_offsets)); + + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, expected)); + } } + } TYPED_TEST(RocprimDeviceSegmentedRadixSort, SortPairs) @@ -319,167 +327,175 @@ TYPED_TEST(RocprimDeviceSegmentedRadixSort, SortPairs) TestFixture::params::max_segment_length ); - const std::vector sizes = get_sizes(); - for(size_t size : sizes) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - SCOPED_TRACE(testing::Message() << "with size = " << size); + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - // Generate data - std::vector keys_input; - if(rp::is_floating_point::value) - { - keys_input = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000); - } - else + const std::vector sizes = get_sizes(seed_value); + for(size_t size : sizes) { - keys_input = test_utils::get_random_data( - size, - std::numeric_limits::min(), - std::numeric_limits::max() + SCOPED_TRACE(testing::Message() << "with size = " << size); + + // Generate data + std::vector keys_input; + if(rp::is_floating_point::value) + { + keys_input = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000, seed_value); + } + else + { + keys_input = test_utils::get_random_data( + size, + std::numeric_limits::min(), + std::numeric_limits::max(), + seed_index + ); + } + + std::vector offsets; + unsigned int segments_count = 0; + size_t offset = 0; + while(offset < size) + { + const size_t segment_length = segment_length_dis(gen); + offsets.push_back(offset); + segments_count++; + offset += segment_length; + } + offsets.push_back(size); + + std::vector values_input(size); + std::iota(values_input.begin(), values_input.end(), 0); + + key_type * d_keys_input; + key_type * d_keys_output; + HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); + HIP_CHECK(hipMalloc(&d_keys_output, size * sizeof(key_type))); + HIP_CHECK( + hipMemcpy( + d_keys_input, keys_input.data(), + size * sizeof(key_type), + hipMemcpyHostToDevice + ) ); - } - std::vector offsets; - unsigned int segments_count = 0; - size_t offset = 0; - while(offset < size) - { - const size_t segment_length = segment_length_dis(gen); - offsets.push_back(offset); - segments_count++; - offset += segment_length; - } - offsets.push_back(size); - - std::vector values_input(size); - std::iota(values_input.begin(), values_input.end(), 0); - - key_type * d_keys_input; - key_type * d_keys_output; - HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); - HIP_CHECK(hipMalloc(&d_keys_output, size * sizeof(key_type))); - HIP_CHECK( - hipMemcpy( - d_keys_input, keys_input.data(), - size * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - - value_type * d_values_input; - value_type * d_values_output; - HIP_CHECK(hipMalloc(&d_values_input, size * sizeof(value_type))); - HIP_CHECK(hipMalloc(&d_values_output, size * sizeof(value_type))); - HIP_CHECK( - hipMemcpy( - d_values_input, values_input.data(), - size * sizeof(value_type), - hipMemcpyHostToDevice - ) - ); - - offset_type * d_offsets; - HIP_CHECK(hipMalloc(&d_offsets, (segments_count + 1) * sizeof(offset_type))); - HIP_CHECK( - hipMemcpy( - d_offsets, offsets.data(), - (segments_count + 1) * sizeof(offset_type), - hipMemcpyHostToDevice - ) - ); - - using key_value = std::pair; - - // Calculate expected results on host - std::vector expected(size); - for(size_t i = 0; i < size; i++) - { - expected[i] = key_value(keys_input[i], values_input[i]); - } - for(size_t i = 0; i < segments_count; i++) - { - std::stable_sort( - expected.begin() + offsets[i], - expected.begin() + offsets[i + 1], - key_value_comparator() + value_type * d_values_input; + value_type * d_values_output; + HIP_CHECK(hipMalloc(&d_values_input, size * sizeof(value_type))); + HIP_CHECK(hipMalloc(&d_values_output, size * sizeof(value_type))); + HIP_CHECK( + hipMemcpy( + d_values_input, values_input.data(), + size * sizeof(value_type), + hipMemcpyHostToDevice + ) ); - } - std::vector keys_expected(size); - std::vector values_expected(size); - for(size_t i = 0; i < size; i++) - { - keys_expected[i] = expected[i].first; - values_expected[i] = expected[i].second; - } - - void * d_temporary_storage = nullptr; - size_t temporary_storage_bytes = 0; - HIP_CHECK( - rp::segmented_radix_sort_pairs( - d_temporary_storage, temporary_storage_bytes, - d_keys_input, d_keys_output, d_values_input, d_values_output, size, - segments_count, d_offsets, d_offsets + 1, - start_bit, end_bit - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0U); - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - if(descending) - { + offset_type * d_offsets; + HIP_CHECK(hipMalloc(&d_offsets, (segments_count + 1) * sizeof(offset_type))); HIP_CHECK( - rp::segmented_radix_sort_pairs_desc( - d_temporary_storage, temporary_storage_bytes, - d_keys_input, d_keys_output, d_values_input, d_values_output, size, - segments_count, d_offsets, d_offsets + 1, - start_bit, end_bit, - stream, debug_synchronous + hipMemcpy( + d_offsets, offsets.data(), + (segments_count + 1) * sizeof(offset_type), + hipMemcpyHostToDevice ) ); - } - else - { + + using key_value = std::pair; + + // Calculate expected results on host + std::vector expected(size); + for(size_t i = 0; i < size; i++) + { + expected[i] = key_value(keys_input[i], values_input[i]); + } + for(size_t i = 0; i < segments_count; i++) + { + std::stable_sort( + expected.begin() + offsets[i], + expected.begin() + offsets[i + 1], + key_value_comparator() + ); + } + std::vector keys_expected(size); + std::vector values_expected(size); + for(size_t i = 0; i < size; i++) + { + keys_expected[i] = expected[i].first; + values_expected[i] = expected[i].second; + } + + void * d_temporary_storage = nullptr; + size_t temporary_storage_bytes = 0; HIP_CHECK( rp::segmented_radix_sort_pairs( d_temporary_storage, temporary_storage_bytes, d_keys_input, d_keys_output, d_values_input, d_values_output, size, segments_count, d_offsets, d_offsets + 1, - start_bit, end_bit, - stream, debug_synchronous + start_bit, end_bit ) ); - } - std::vector keys_output(size); - HIP_CHECK( - hipMemcpy( - keys_output.data(), d_keys_output, - size * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); - - std::vector values_output(size); - HIP_CHECK( - hipMemcpy( - values_output.data(), d_values_output, - size * sizeof(value_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - HIP_CHECK(hipFree(d_keys_input)); - HIP_CHECK(hipFree(d_values_input)); - HIP_CHECK(hipFree(d_keys_output)); - HIP_CHECK(hipFree(d_values_output)); - HIP_CHECK(hipFree(d_offsets)); - - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, keys_expected)); - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(values_output, values_expected)); + ASSERT_GT(temporary_storage_bytes, 0U); + + HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); + + if(descending) + { + HIP_CHECK( + rp::segmented_radix_sort_pairs_desc( + d_temporary_storage, temporary_storage_bytes, + d_keys_input, d_keys_output, d_values_input, d_values_output, size, + segments_count, d_offsets, d_offsets + 1, + start_bit, end_bit, + stream, debug_synchronous + ) + ); + } + else + { + HIP_CHECK( + rp::segmented_radix_sort_pairs( + d_temporary_storage, temporary_storage_bytes, + d_keys_input, d_keys_output, d_values_input, d_values_output, size, + segments_count, d_offsets, d_offsets + 1, + start_bit, end_bit, + stream, debug_synchronous + ) + ); + } + + std::vector keys_output(size); + HIP_CHECK( + hipMemcpy( + keys_output.data(), d_keys_output, + size * sizeof(key_type), + hipMemcpyDeviceToHost + ) + ); + + std::vector values_output(size); + HIP_CHECK( + hipMemcpy( + values_output.data(), d_values_output, + size * sizeof(value_type), + hipMemcpyDeviceToHost + ) + ); + + HIP_CHECK(hipFree(d_temporary_storage)); + HIP_CHECK(hipFree(d_keys_input)); + HIP_CHECK(hipFree(d_values_input)); + HIP_CHECK(hipFree(d_keys_output)); + HIP_CHECK(hipFree(d_values_output)); + HIP_CHECK(hipFree(d_offsets)); + + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, keys_expected)); + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(values_output, values_expected)); + } } + } TYPED_TEST(RocprimDeviceSegmentedRadixSort, SortKeysDoubleBuffer) @@ -503,132 +519,140 @@ TYPED_TEST(RocprimDeviceSegmentedRadixSort, SortKeysDoubleBuffer) TestFixture::params::max_segment_length ); - const std::vector sizes = get_sizes(); - for(size_t size : sizes) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - SCOPED_TRACE(testing::Message() << "with size = " << size); + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - // Generate data - std::vector keys_input; - if(rp::is_floating_point::value) + const std::vector sizes = get_sizes(seed_value); + for(size_t size : sizes) { - keys_input = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000); - } - else - { - keys_input = test_utils::get_random_data( - size, - std::numeric_limits::min(), - std::numeric_limits::max() + SCOPED_TRACE(testing::Message() << "with size = " << size); + + // Generate data + std::vector keys_input; + if(rp::is_floating_point::value) + { + keys_input = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000, seed_value); + } + else + { + keys_input = test_utils::get_random_data( + size, + std::numeric_limits::min(), + std::numeric_limits::max(), + seed_index + ); + } + + std::vector offsets; + unsigned int segments_count = 0; + size_t offset = 0; + while(offset < size) + { + const size_t segment_length = segment_length_dis(gen); + offsets.push_back(offset); + segments_count++; + offset += segment_length; + } + offsets.push_back(size); + + key_type * d_keys_input; + key_type * d_keys_output; + HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); + HIP_CHECK(hipMalloc(&d_keys_output, size * sizeof(key_type))); + HIP_CHECK( + hipMemcpy( + d_keys_input, keys_input.data(), + size * sizeof(key_type), + hipMemcpyHostToDevice + ) ); - } - std::vector offsets; - unsigned int segments_count = 0; - size_t offset = 0; - while(offset < size) - { - const size_t segment_length = segment_length_dis(gen); - offsets.push_back(offset); - segments_count++; - offset += segment_length; - } - offsets.push_back(size); - - key_type * d_keys_input; - key_type * d_keys_output; - HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); - HIP_CHECK(hipMalloc(&d_keys_output, size * sizeof(key_type))); - HIP_CHECK( - hipMemcpy( - d_keys_input, keys_input.data(), - size * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - - offset_type * d_offsets; - HIP_CHECK(hipMalloc(&d_offsets, (segments_count + 1) * sizeof(offset_type))); - HIP_CHECK( - hipMemcpy( - d_offsets, offsets.data(), - (segments_count + 1) * sizeof(offset_type), - hipMemcpyHostToDevice - ) - ); - - // Calculate expected results on host - std::vector expected(keys_input); - for(size_t i = 0; i < segments_count; i++) - { - std::stable_sort( - expected.begin() + offsets[i], - expected.begin() + offsets[i + 1], - key_comparator() + offset_type * d_offsets; + HIP_CHECK(hipMalloc(&d_offsets, (segments_count + 1) * sizeof(offset_type))); + HIP_CHECK( + hipMemcpy( + d_offsets, offsets.data(), + (segments_count + 1) * sizeof(offset_type), + hipMemcpyHostToDevice + ) ); - } - - rp::double_buffer d_keys(d_keys_input, d_keys_output); - // Use custom config - using config = rp::segmented_radix_sort_config<7, 4, rp::kernel_config<192, 5>>; + // Calculate expected results on host + std::vector expected(keys_input); + for(size_t i = 0; i < segments_count; i++) + { + std::stable_sort( + expected.begin() + offsets[i], + expected.begin() + offsets[i + 1], + key_comparator() + ); + } - size_t temporary_storage_bytes = 0; - HIP_CHECK( - rp::segmented_radix_sort_keys( - nullptr, temporary_storage_bytes, - d_keys, size, - segments_count, d_offsets, d_offsets + 1, - start_bit, end_bit - ) - ); + rp::double_buffer d_keys(d_keys_input, d_keys_output); - ASSERT_GT(temporary_storage_bytes, 0U); + // Use custom config + using config = rp::segmented_radix_sort_config<7, 4, rp::kernel_config<192, 5>>; - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - if(descending) - { + size_t temporary_storage_bytes = 0; HIP_CHECK( - rp::segmented_radix_sort_keys_desc( - d_temporary_storage, temporary_storage_bytes, + rp::segmented_radix_sort_keys( + nullptr, temporary_storage_bytes, d_keys, size, segments_count, d_offsets, d_offsets + 1, - start_bit, end_bit, - stream, debug_synchronous + start_bit, end_bit ) ); - } - else - { + + ASSERT_GT(temporary_storage_bytes, 0U); + + void * d_temporary_storage; + HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); + + if(descending) + { + HIP_CHECK( + rp::segmented_radix_sort_keys_desc( + d_temporary_storage, temporary_storage_bytes, + d_keys, size, + segments_count, d_offsets, d_offsets + 1, + start_bit, end_bit, + stream, debug_synchronous + ) + ); + } + else + { + HIP_CHECK( + rp::segmented_radix_sort_keys( + d_temporary_storage, temporary_storage_bytes, + d_keys, size, + segments_count, d_offsets, d_offsets + 1, + start_bit, end_bit, + stream, debug_synchronous + ) + ); + } + + std::vector keys_output(size); HIP_CHECK( - rp::segmented_radix_sort_keys( - d_temporary_storage, temporary_storage_bytes, - d_keys, size, - segments_count, d_offsets, d_offsets + 1, - start_bit, end_bit, - stream, debug_synchronous + hipMemcpy( + keys_output.data(), d_keys.current(), + size * sizeof(key_type), + hipMemcpyDeviceToHost ) ); - } - std::vector keys_output(size); - HIP_CHECK( - hipMemcpy( - keys_output.data(), d_keys.current(), - size * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - HIP_CHECK(hipFree(d_keys_input)); - HIP_CHECK(hipFree(d_keys_output)); - HIP_CHECK(hipFree(d_offsets)); - - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, expected)); + HIP_CHECK(hipFree(d_temporary_storage)); + HIP_CHECK(hipFree(d_keys_input)); + HIP_CHECK(hipFree(d_keys_output)); + HIP_CHECK(hipFree(d_offsets)); + + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, expected)); + } } + } TYPED_TEST(RocprimDeviceSegmentedRadixSort, SortPairsDoubleBuffer) @@ -653,168 +677,176 @@ TYPED_TEST(RocprimDeviceSegmentedRadixSort, SortPairsDoubleBuffer) TestFixture::params::max_segment_length ); - const std::vector sizes = get_sizes(); - for(size_t size : sizes) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - SCOPED_TRACE(testing::Message() << "with size = " << size); + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - // Generate data - std::vector keys_input; - if(rp::is_floating_point::value) - { - keys_input = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000); - } - else + const std::vector sizes = get_sizes(seed_value); + for(size_t size : sizes) { - keys_input = test_utils::get_random_data( - size, - std::numeric_limits::min(), - std::numeric_limits::max() + SCOPED_TRACE(testing::Message() << "with size = " << size); + + // Generate data + std::vector keys_input; + if(rp::is_floating_point::value) + { + keys_input = test_utils::get_random_data(size, (key_type)-1000, (key_type)+1000, seed_value); + } + else + { + keys_input = test_utils::get_random_data( + size, + std::numeric_limits::min(), + std::numeric_limits::max(), + seed_index + ); + } + + std::vector offsets; + unsigned int segments_count = 0; + size_t offset = 0; + while(offset < size) + { + const size_t segment_length = segment_length_dis(gen); + offsets.push_back(offset); + segments_count++; + offset += segment_length; + } + offsets.push_back(size); + + std::vector values_input(size); + std::iota(values_input.begin(), values_input.end(), 0); + + key_type * d_keys_input; + key_type * d_keys_output; + HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); + HIP_CHECK(hipMalloc(&d_keys_output, size * sizeof(key_type))); + HIP_CHECK( + hipMemcpy( + d_keys_input, keys_input.data(), + size * sizeof(key_type), + hipMemcpyHostToDevice + ) ); - } - std::vector offsets; - unsigned int segments_count = 0; - size_t offset = 0; - while(offset < size) - { - const size_t segment_length = segment_length_dis(gen); - offsets.push_back(offset); - segments_count++; - offset += segment_length; - } - offsets.push_back(size); - - std::vector values_input(size); - std::iota(values_input.begin(), values_input.end(), 0); - - key_type * d_keys_input; - key_type * d_keys_output; - HIP_CHECK(hipMalloc(&d_keys_input, size * sizeof(key_type))); - HIP_CHECK(hipMalloc(&d_keys_output, size * sizeof(key_type))); - HIP_CHECK( - hipMemcpy( - d_keys_input, keys_input.data(), - size * sizeof(key_type), - hipMemcpyHostToDevice - ) - ); - - value_type * d_values_input; - value_type * d_values_output; - HIP_CHECK(hipMalloc(&d_values_input, size * sizeof(value_type))); - HIP_CHECK(hipMalloc(&d_values_output, size * sizeof(value_type))); - HIP_CHECK( - hipMemcpy( - d_values_input, values_input.data(), - size * sizeof(value_type), - hipMemcpyHostToDevice - ) - ); - - offset_type * d_offsets; - HIP_CHECK(hipMalloc(&d_offsets, (segments_count + 1) * sizeof(offset_type))); - HIP_CHECK( - hipMemcpy( - d_offsets, offsets.data(), - (segments_count + 1) * sizeof(offset_type), - hipMemcpyHostToDevice - ) - ); - - using key_value = std::pair; - - // Calculate expected results on host - std::vector expected(size); - for(size_t i = 0; i < size; i++) - { - expected[i] = key_value(keys_input[i], values_input[i]); - } - for(size_t i = 0; i < segments_count; i++) - { - std::stable_sort( - expected.begin() + offsets[i], - expected.begin() + offsets[i + 1], - key_value_comparator() + value_type * d_values_input; + value_type * d_values_output; + HIP_CHECK(hipMalloc(&d_values_input, size * sizeof(value_type))); + HIP_CHECK(hipMalloc(&d_values_output, size * sizeof(value_type))); + HIP_CHECK( + hipMemcpy( + d_values_input, values_input.data(), + size * sizeof(value_type), + hipMemcpyHostToDevice + ) ); - } - std::vector keys_expected(size); - std::vector values_expected(size); - for(size_t i = 0; i < size; i++) - { - keys_expected[i] = expected[i].first; - values_expected[i] = expected[i].second; - } - - rp::double_buffer d_keys(d_keys_input, d_keys_output); - rp::double_buffer d_values(d_values_input, d_values_output); - - void * d_temporary_storage = nullptr; - size_t temporary_storage_bytes = 0; - HIP_CHECK( - rp::segmented_radix_sort_pairs( - d_temporary_storage, temporary_storage_bytes, - d_keys, d_values, size, - segments_count, d_offsets, d_offsets + 1, - start_bit, end_bit - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0U); - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - if(descending) - { + offset_type * d_offsets; + HIP_CHECK(hipMalloc(&d_offsets, (segments_count + 1) * sizeof(offset_type))); HIP_CHECK( - rp::segmented_radix_sort_pairs_desc( - d_temporary_storage, temporary_storage_bytes, - d_keys, d_values, size, - segments_count, d_offsets, d_offsets + 1, - start_bit, end_bit, - stream, debug_synchronous + hipMemcpy( + d_offsets, offsets.data(), + (segments_count + 1) * sizeof(offset_type), + hipMemcpyHostToDevice ) ); - } - else - { + + using key_value = std::pair; + + // Calculate expected results on host + std::vector expected(size); + for(size_t i = 0; i < size; i++) + { + expected[i] = key_value(keys_input[i], values_input[i]); + } + for(size_t i = 0; i < segments_count; i++) + { + std::stable_sort( + expected.begin() + offsets[i], + expected.begin() + offsets[i + 1], + key_value_comparator() + ); + } + std::vector keys_expected(size); + std::vector values_expected(size); + for(size_t i = 0; i < size; i++) + { + keys_expected[i] = expected[i].first; + values_expected[i] = expected[i].second; + } + + rp::double_buffer d_keys(d_keys_input, d_keys_output); + rp::double_buffer d_values(d_values_input, d_values_output); + + void * d_temporary_storage = nullptr; + size_t temporary_storage_bytes = 0; HIP_CHECK( rp::segmented_radix_sort_pairs( d_temporary_storage, temporary_storage_bytes, d_keys, d_values, size, segments_count, d_offsets, d_offsets + 1, - start_bit, end_bit, - stream, debug_synchronous + start_bit, end_bit ) ); - } - std::vector keys_output(size); - HIP_CHECK( - hipMemcpy( - keys_output.data(), d_keys.current(), - size * sizeof(key_type), - hipMemcpyDeviceToHost - ) - ); - - std::vector values_output(size); - HIP_CHECK( - hipMemcpy( - values_output.data(), d_values.current(), - size * sizeof(value_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - HIP_CHECK(hipFree(d_keys_input)); - HIP_CHECK(hipFree(d_keys_output)); - HIP_CHECK(hipFree(d_values_input)); - HIP_CHECK(hipFree(d_values_output)); - HIP_CHECK(hipFree(d_offsets)); - - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, keys_expected)); - ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(values_output, values_expected)); + ASSERT_GT(temporary_storage_bytes, 0U); + + HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); + + if(descending) + { + HIP_CHECK( + rp::segmented_radix_sort_pairs_desc( + d_temporary_storage, temporary_storage_bytes, + d_keys, d_values, size, + segments_count, d_offsets, d_offsets + 1, + start_bit, end_bit, + stream, debug_synchronous + ) + ); + } + else + { + HIP_CHECK( + rp::segmented_radix_sort_pairs( + d_temporary_storage, temporary_storage_bytes, + d_keys, d_values, size, + segments_count, d_offsets, d_offsets + 1, + start_bit, end_bit, + stream, debug_synchronous + ) + ); + } + + std::vector keys_output(size); + HIP_CHECK( + hipMemcpy( + keys_output.data(), d_keys.current(), + size * sizeof(key_type), + hipMemcpyDeviceToHost + ) + ); + + std::vector values_output(size); + HIP_CHECK( + hipMemcpy( + values_output.data(), d_values.current(), + size * sizeof(value_type), + hipMemcpyDeviceToHost + ) + ); + + HIP_CHECK(hipFree(d_temporary_storage)); + HIP_CHECK(hipFree(d_keys_input)); + HIP_CHECK(hipFree(d_keys_output)); + HIP_CHECK(hipFree(d_values_input)); + HIP_CHECK(hipFree(d_values_output)); + HIP_CHECK(hipFree(d_offsets)); + + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(keys_output, keys_expected)); + ASSERT_NO_FATAL_FAILURE(test_utils::assert_eq(values_output, values_expected)); + } } + } diff --git a/test/rocprim/test_device_segmented_reduce.cpp b/test/rocprim/test_device_segmented_reduce.cpp index 603a586c1..0abffa25a 100644 --- a/test/rocprim/test_device_segmented_reduce.cpp +++ b/test/rocprim/test_device_segmented_reduce.cpp @@ -95,7 +95,7 @@ typedef ::testing::Types< TYPED_TEST_CASE(RocprimDeviceSegmentedReduce, Params); -std::vector get_sizes() +std::vector get_sizes(int seed_value) { std::vector sizes = { 1024, 2048, 4096, 1792, @@ -104,7 +104,7 @@ std::vector get_sizes() 100000, (1 << 16) - 1220 }; - const std::vector random_sizes = test_utils::get_random_data(5, 1, 1000000); + const std::vector random_sizes = test_utils::get_random_data(5, 1, 1000000, seed_value); sizes.insert(sizes.end(), random_sizes.begin(), random_sizes.end()); return sizes; } @@ -130,106 +130,113 @@ TYPED_TEST(RocprimDeviceSegmentedReduce, Reduce) TestFixture::params::max_segment_length ); - for(size_t size : get_sizes()) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - SCOPED_TRACE(testing::Message() << "with size = " << size); - - hipStream_t stream = 0; // default + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + for(size_t size : get_sizes(seed_value)) + { + SCOPED_TRACE(testing::Message() << "with size = " << size); - // Generate data and calculate expected results - std::vector aggregates_expected; + hipStream_t stream = 0; // default - std::vector values_input = test_utils::get_random_data(size, 0, 100); + // Generate data and calculate expected results + std::vector aggregates_expected; - std::vector offsets; - unsigned int segments_count = 0; - size_t offset = 0; - while(offset < size) - { - const size_t segment_length = segment_length_dis(gen); - offsets.push_back(offset); + std::vector values_input = test_utils::get_random_data(size, 0, 100, seed_value); - const size_t end = std::min(size, offset + segment_length); - result_type aggregate = init; - for(size_t i = offset; i < end; i++) + std::vector offsets; + unsigned int segments_count = 0; + size_t offset = 0; + while(offset < size) { - aggregate = reduce_op(aggregate, values_input[i]); + const size_t segment_length = segment_length_dis(gen); + offsets.push_back(offset); + + const size_t end = std::min(size, offset + segment_length); + result_type aggregate = init; + for(size_t i = offset; i < end; i++) + { + aggregate = reduce_op(aggregate, values_input[i]); + } + aggregates_expected.push_back(aggregate); + + segments_count++; + offset += segment_length; } - aggregates_expected.push_back(aggregate); - - segments_count++; - offset += segment_length; + offsets.push_back(size); + + input_type * d_values_input; + HIP_CHECK(hipMalloc(&d_values_input, size * sizeof(input_type))); + HIP_CHECK( + hipMemcpy( + d_values_input, values_input.data(), + size * sizeof(input_type), + hipMemcpyHostToDevice + ) + ); + + offset_type * d_offsets; + HIP_CHECK(hipMalloc(&d_offsets, (segments_count + 1) * sizeof(offset_type))); + HIP_CHECK( + hipMemcpy( + d_offsets, offsets.data(), + (segments_count + 1) * sizeof(offset_type), + hipMemcpyHostToDevice + ) + ); + + output_type * d_aggregates_output; + HIP_CHECK(hipMalloc(&d_aggregates_output, segments_count * sizeof(output_type))); + + size_t temporary_storage_bytes; + + HIP_CHECK( + rp::segmented_reduce( + nullptr, temporary_storage_bytes, + d_values_input, d_aggregates_output, + segments_count, + d_offsets, d_offsets + 1, + reduce_op, init, + stream, debug_synchronous + ) + ); + + ASSERT_GT(temporary_storage_bytes, 0); + + void * d_temporary_storage; + HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); + + HIP_CHECK( + rp::segmented_reduce( + d_temporary_storage, temporary_storage_bytes, + d_values_input, + test_utils::wrap_in_identity_iterator(d_aggregates_output), + segments_count, + d_offsets, d_offsets + 1, + reduce_op, init, + stream, debug_synchronous + ) + ); + + HIP_CHECK(hipFree(d_temporary_storage)); + + std::vector aggregates_output(segments_count); + HIP_CHECK( + hipMemcpy( + aggregates_output.data(), d_aggregates_output, + segments_count * sizeof(output_type), + hipMemcpyDeviceToHost + ) + ); + + HIP_CHECK(hipFree(d_values_input)); + HIP_CHECK(hipFree(d_offsets)); + HIP_CHECK(hipFree(d_aggregates_output)); + + ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(aggregates_output, aggregates_expected, 0.01f)); } - offsets.push_back(size); - - input_type * d_values_input; - HIP_CHECK(hipMalloc(&d_values_input, size * sizeof(input_type))); - HIP_CHECK( - hipMemcpy( - d_values_input, values_input.data(), - size * sizeof(input_type), - hipMemcpyHostToDevice - ) - ); - - offset_type * d_offsets; - HIP_CHECK(hipMalloc(&d_offsets, (segments_count + 1) * sizeof(offset_type))); - HIP_CHECK( - hipMemcpy( - d_offsets, offsets.data(), - (segments_count + 1) * sizeof(offset_type), - hipMemcpyHostToDevice - ) - ); - - output_type * d_aggregates_output; - HIP_CHECK(hipMalloc(&d_aggregates_output, segments_count * sizeof(output_type))); - - size_t temporary_storage_bytes; - - HIP_CHECK( - rp::segmented_reduce( - nullptr, temporary_storage_bytes, - d_values_input, d_aggregates_output, - segments_count, - d_offsets, d_offsets + 1, - reduce_op, init, - stream, debug_synchronous - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0); - - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - HIP_CHECK( - rp::segmented_reduce( - d_temporary_storage, temporary_storage_bytes, - d_values_input, - test_utils::wrap_in_identity_iterator(d_aggregates_output), - segments_count, - d_offsets, d_offsets + 1, - reduce_op, init, - stream, debug_synchronous - ) - ); - - HIP_CHECK(hipFree(d_temporary_storage)); - - std::vector aggregates_output(segments_count); - HIP_CHECK( - hipMemcpy( - aggregates_output.data(), d_aggregates_output, - segments_count * sizeof(output_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK(hipFree(d_values_input)); - HIP_CHECK(hipFree(d_offsets)); - HIP_CHECK(hipFree(d_aggregates_output)); - - ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(aggregates_output, aggregates_expected, 0.01f)); } + } diff --git a/test/rocprim/test_device_segmented_scan.cpp b/test/rocprim/test_device_segmented_scan.cpp index 154061b46..fcffb3854 100644 --- a/test/rocprim/test_device_segmented_scan.cpp +++ b/test/rocprim/test_device_segmented_scan.cpp @@ -92,7 +92,7 @@ typedef ::testing::Types< TYPED_TEST_CASE(RocprimDeviceSegmentedScan, Params); -std::vector get_sizes() +std::vector get_sizes(int seed_value) { std::vector sizes = { 1024, 2048, 4096, 1792, @@ -100,7 +100,7 @@ std::vector get_sizes() 2345, 11001, 34567, (1 << 16) - 1220 }; - const std::vector random_sizes = test_utils::get_random_data(2, 1, 1000000); + const std::vector random_sizes = test_utils::get_random_data(2, 1, 1000000, seed_value); sizes.insert(sizes.end(), random_sizes.begin(), random_sizes.end()); return sizes; } @@ -128,106 +128,113 @@ TYPED_TEST(RocprimDeviceSegmentedScan, InclusiveScan) hipStream_t stream = 0; // default stream - const std::vector sizes = get_sizes(); - for(size_t size : get_sizes()) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - SCOPED_TRACE(testing::Message() << "with size = " << size); + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - // Generate data and calculate expected results - std::vector values_expected(size); - std::vector values_input = test_utils::get_random_data(size, 0, 100); - - std::vector offsets; - unsigned int segments_count = 0; - size_t offset = 0; - while(offset < size) + const std::vector sizes = get_sizes(seed_value); + for(size_t size : get_sizes(seed_value)) { - const size_t segment_length = segment_length_dis(gen); - offsets.push_back(offset); + SCOPED_TRACE(testing::Message() << "with size = " << size); + + // Generate data and calculate expected results + std::vector values_expected(size); + std::vector values_input = test_utils::get_random_data(size, 0, 100, seed_value); - const size_t end = std::min(size, offset + segment_length); - result_type aggregate = values_input[offset]; - values_expected[offset] = aggregate; - for(size_t i = offset + 1; i < end; i++) + std::vector offsets; + unsigned int segments_count = 0; + size_t offset = 0; + while(offset < size) { - aggregate = scan_op(aggregate, values_input[i]); - values_expected[i] = aggregate; - } + const size_t segment_length = segment_length_dis(gen); + offsets.push_back(offset); - segments_count++; - offset += segment_length; + const size_t end = std::min(size, offset + segment_length); + result_type aggregate = values_input[offset]; + values_expected[offset] = aggregate; + for(size_t i = offset + 1; i < end; i++) + { + aggregate = scan_op(aggregate, values_input[i]); + values_expected[i] = aggregate; + } + + segments_count++; + offset += segment_length; + } + offsets.push_back(size); + + input_type * d_values_input; + offset_type * d_offsets; + output_type * d_values_output; + HIP_CHECK(hipMalloc(&d_values_input, size * sizeof(input_type))); + HIP_CHECK(hipMalloc(&d_offsets, (segments_count + 1) * sizeof(offset_type))); + HIP_CHECK(hipMalloc(&d_values_output, size * sizeof(output_type))); + HIP_CHECK( + hipMemcpy( + d_values_input, values_input.data(), + size * sizeof(input_type), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK( + hipMemcpy( + d_offsets, offsets.data(), + (segments_count + 1) * sizeof(offset_type), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + size_t temporary_storage_bytes; + HIP_CHECK( + rocprim::segmented_inclusive_scan( + nullptr, temporary_storage_bytes, + d_values_input, + test_utils::wrap_in_identity_iterator(d_values_output), + segments_count, + d_offsets, d_offsets + 1, + scan_op, + stream, debug_synchronous + ) + ); + + ASSERT_GT(temporary_storage_bytes, 0); + void * d_temporary_storage; + HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); + + HIP_CHECK( + rocprim::segmented_inclusive_scan( + d_temporary_storage, temporary_storage_bytes, + d_values_input, + test_utils::wrap_in_identity_iterator(d_values_output), + segments_count, + d_offsets, d_offsets + 1, + scan_op, + stream, debug_synchronous + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + std::vector values_output(size); + HIP_CHECK( + hipMemcpy( + values_output.data(), d_values_output, + values_output.size() * sizeof(output_type), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(values_output, values_expected, 0.01f)); + + HIP_CHECK(hipFree(d_temporary_storage)); + HIP_CHECK(hipFree(d_values_input)); + HIP_CHECK(hipFree(d_offsets)); + HIP_CHECK(hipFree(d_values_output)); } - offsets.push_back(size); - - input_type * d_values_input; - offset_type * d_offsets; - output_type * d_values_output; - HIP_CHECK(hipMalloc(&d_values_input, size * sizeof(input_type))); - HIP_CHECK(hipMalloc(&d_offsets, (segments_count + 1) * sizeof(offset_type))); - HIP_CHECK(hipMalloc(&d_values_output, size * sizeof(output_type))); - HIP_CHECK( - hipMemcpy( - d_values_input, values_input.data(), - size * sizeof(input_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - d_offsets, offsets.data(), - (segments_count + 1) * sizeof(offset_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - size_t temporary_storage_bytes; - HIP_CHECK( - rocprim::segmented_inclusive_scan( - nullptr, temporary_storage_bytes, - d_values_input, - test_utils::wrap_in_identity_iterator(d_values_output), - segments_count, - d_offsets, d_offsets + 1, - scan_op, - stream, debug_synchronous - ) - ); - - ASSERT_GT(temporary_storage_bytes, 0); - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - HIP_CHECK( - rocprim::segmented_inclusive_scan( - d_temporary_storage, temporary_storage_bytes, - d_values_input, - test_utils::wrap_in_identity_iterator(d_values_output), - segments_count, - d_offsets, d_offsets + 1, - scan_op, - stream, debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - std::vector values_output(size); - HIP_CHECK( - hipMemcpy( - values_output.data(), d_values_output, - values_output.size() * sizeof(output_type), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(values_output, values_expected, 0.01f)); - - HIP_CHECK(hipFree(d_temporary_storage)); - HIP_CHECK(hipFree(d_values_input)); - HIP_CHECK(hipFree(d_offsets)); - HIP_CHECK(hipFree(d_values_output)); } + } TYPED_TEST(RocprimDeviceSegmentedScan, ExclusiveScan) @@ -254,107 +261,114 @@ TYPED_TEST(RocprimDeviceSegmentedScan, ExclusiveScan) hipStream_t stream = 0; // default stream - const std::vector sizes = get_sizes(); - for(size_t size : sizes) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - SCOPED_TRACE(testing::Message() << "with size = " << size); + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - // Generate data and calculate expected results - std::vector values_expected(size); - std::vector values_input = test_utils::get_random_data(size, 0, 100); - - std::vector offsets; - unsigned int segments_count = 0; - size_t offset = 0; - while(offset < size) + const std::vector sizes = get_sizes(seed_value); + for(size_t size : sizes) { - const size_t segment_length = segment_length_dis(gen); - offsets.push_back(offset); + SCOPED_TRACE(testing::Message() << "with size = " << size); + + // Generate data and calculate expected results + std::vector values_expected(size); + std::vector values_input = test_utils::get_random_data(size, 0, 100, seed_value); - const size_t end = std::min(size, offset + segment_length); - result_type aggregate = init; - values_expected[offset] = aggregate; - for(size_t i = offset + 1; i < end; i++) + std::vector offsets; + unsigned int segments_count = 0; + size_t offset = 0; + while(offset < size) { - aggregate = scan_op(aggregate, values_input[i-1]); - values_expected[i] = aggregate; - } + const size_t segment_length = segment_length_dis(gen); + offsets.push_back(offset); - segments_count++; - offset += segment_length; + const size_t end = std::min(size, offset + segment_length); + result_type aggregate = init; + values_expected[offset] = aggregate; + for(size_t i = offset + 1; i < end; i++) + { + aggregate = scan_op(aggregate, values_input[i-1]); + values_expected[i] = aggregate; + } + + segments_count++; + offset += segment_length; + } + offsets.push_back(size); + + input_type * d_values_input; + offset_type * d_offsets; + output_type * d_values_output; + HIP_CHECK(hipMalloc(&d_values_input, size * sizeof(input_type))); + HIP_CHECK(hipMalloc(&d_offsets, (segments_count + 1) * sizeof(offset_type))); + HIP_CHECK(hipMalloc(&d_values_output, size * sizeof(output_type))); + HIP_CHECK( + hipMemcpy( + d_values_input, values_input.data(), + size * sizeof(input_type), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK( + hipMemcpy( + d_offsets, offsets.data(), + (segments_count + 1) * sizeof(offset_type), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + size_t temporary_storage_bytes; + HIP_CHECK( + rocprim::segmented_exclusive_scan( + nullptr, temporary_storage_bytes, + d_values_input, + test_utils::wrap_in_identity_iterator(d_values_output), + segments_count, + d_offsets, d_offsets + 1, + init, scan_op, + stream, debug_synchronous + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + ASSERT_GT(temporary_storage_bytes, 0); + void * d_temporary_storage; + HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); + + HIP_CHECK( + rocprim::segmented_exclusive_scan( + d_temporary_storage, temporary_storage_bytes, + d_values_input, + test_utils::wrap_in_identity_iterator(d_values_output), + segments_count, + d_offsets, d_offsets + 1, + init, scan_op, + stream, debug_synchronous + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + std::vector values_output(size); + HIP_CHECK( + hipMemcpy( + values_output.data(), d_values_output, + values_output.size() * sizeof(output_type), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(values_output, values_expected, 0.01f)); + + HIP_CHECK(hipFree(d_temporary_storage)); + HIP_CHECK(hipFree(d_values_input)); + HIP_CHECK(hipFree(d_offsets)); + HIP_CHECK(hipFree(d_values_output)); } - offsets.push_back(size); - - input_type * d_values_input; - offset_type * d_offsets; - output_type * d_values_output; - HIP_CHECK(hipMalloc(&d_values_input, size * sizeof(input_type))); - HIP_CHECK(hipMalloc(&d_offsets, (segments_count + 1) * sizeof(offset_type))); - HIP_CHECK(hipMalloc(&d_values_output, size * sizeof(output_type))); - HIP_CHECK( - hipMemcpy( - d_values_input, values_input.data(), - size * sizeof(input_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - d_offsets, offsets.data(), - (segments_count + 1) * sizeof(offset_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - size_t temporary_storage_bytes; - HIP_CHECK( - rocprim::segmented_exclusive_scan( - nullptr, temporary_storage_bytes, - d_values_input, - test_utils::wrap_in_identity_iterator(d_values_output), - segments_count, - d_offsets, d_offsets + 1, - init, scan_op, - stream, debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - ASSERT_GT(temporary_storage_bytes, 0); - void * d_temporary_storage; - HIP_CHECK(hipMalloc(&d_temporary_storage, temporary_storage_bytes)); - - HIP_CHECK( - rocprim::segmented_exclusive_scan( - d_temporary_storage, temporary_storage_bytes, - d_values_input, - test_utils::wrap_in_identity_iterator(d_values_output), - segments_count, - d_offsets, d_offsets + 1, - init, scan_op, - stream, debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - std::vector values_output(size); - HIP_CHECK( - hipMemcpy( - values_output.data(), d_values_output, - values_output.size() * sizeof(output_type), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(values_output, values_expected, 0.01f)); - - HIP_CHECK(hipFree(d_temporary_storage)); - HIP_CHECK(hipFree(d_values_input)); - HIP_CHECK(hipFree(d_offsets)); - HIP_CHECK(hipFree(d_values_output)); } + } TYPED_TEST(RocprimDeviceSegmentedScan, InclusiveScanUsingHeadFlags) @@ -368,122 +382,129 @@ TYPED_TEST(RocprimDeviceSegmentedScan, InclusiveScanUsingHeadFlags) hipStream_t stream = 0; // default stream - const std::vector sizes = get_sizes(); - for(auto size : sizes) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector input = test_utils::get_random_data(size, 1, 10); - std::vector flags = test_utils::get_random_data(size, 0, 10); - flags[0] = 1U; - std::transform( - flags.begin(), flags.end(), flags.begin(), - [](flag_type a){ if(a == 1U) return 1U; return 0U; } - ); - - input_type * d_input; - flag_type * d_flags; - output_type * d_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(input_type))); - HIP_CHECK(hipMalloc(&d_flags, flags.size() * sizeof(flag_type))); - HIP_CHECK(hipMalloc(&d_output, input.size() * sizeof(output_type))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(input_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - d_flags, flags.data(), - flags.size() * sizeof(flag_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // scan function - scan_op_type scan_op; - - // Calculate expected results on host - std::vector expected(input.size()); - test_utils::host_inclusive_scan( - rocprim::make_zip_iterator( - rocprim::make_tuple(input.begin(), flags.begin()) - ), - rocprim::make_zip_iterator( - rocprim::make_tuple(input.end(), flags.end()) - ), - rocprim::make_zip_iterator( - rocprim::make_tuple(expected.begin(), rocprim::make_discard_iterator()) - ), - [scan_op](const rocprim::tuple& t1, - const rocprim::tuple& t2) - -> rocprim::tuple - { - if(!rocprim::get<1>(t2)) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + const std::vector sizes = get_sizes(seed_value); + for(auto size : sizes) + { + SCOPED_TRACE(testing::Message() << "with size = " << size); + + // Generate data + std::vector input = test_utils::get_random_data(size, 1, 10, seed_value); + std::vector flags = test_utils::get_random_data(size, 0, 10, seed_value); + flags[0] = 1U; + std::transform( + flags.begin(), flags.end(), flags.begin(), + [](flag_type a){ if(a == 1U) return 1U; return 0U; } + ); + + input_type * d_input; + flag_type * d_flags; + output_type * d_output; + HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(input_type))); + HIP_CHECK(hipMalloc(&d_flags, flags.size() * sizeof(flag_type))); + HIP_CHECK(hipMalloc(&d_output, input.size() * sizeof(output_type))); + HIP_CHECK( + hipMemcpy( + d_input, input.data(), + input.size() * sizeof(input_type), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK( + hipMemcpy( + d_flags, flags.data(), + flags.size() * sizeof(flag_type), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // scan function + scan_op_type scan_op; + + // Calculate expected results on host + std::vector expected(input.size()); + test_utils::host_inclusive_scan( + rocprim::make_zip_iterator( + rocprim::make_tuple(input.begin(), flags.begin()) + ), + rocprim::make_zip_iterator( + rocprim::make_tuple(input.end(), flags.end()) + ), + rocprim::make_zip_iterator( + rocprim::make_tuple(expected.begin(), rocprim::make_discard_iterator()) + ), + [scan_op](const rocprim::tuple& t1, + const rocprim::tuple& t2) + -> rocprim::tuple { - return rocprim::make_tuple( - scan_op(rocprim::get<0>(t1), rocprim::get<0>(t2)), - rocprim::get<1>(t1) + rocprim::get<1>(t2) - ); + if(!rocprim::get<1>(t2)) + { + return rocprim::make_tuple( + scan_op(rocprim::get<0>(t1), rocprim::get<0>(t2)), + rocprim::get<1>(t1) + rocprim::get<1>(t2) + ); + } + return t2; } - return t2; - } - ); - - // temp storage - size_t temp_storage_size_bytes; - // Get size of d_temp_storage - HIP_CHECK( - rocprim::segmented_inclusive_scan( - nullptr, temp_storage_size_bytes, - d_input, d_output, d_flags, - input.size(), scan_op, stream, - debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0); - - // allocate temporary storage - void * d_temp_storage = nullptr; - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - rocprim::segmented_inclusive_scan( - d_temp_storage, temp_storage_size_bytes, - d_input, d_output, d_flags, - input.size(), scan_op, stream, - debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if output values are as expected - std::vector output(input.size()); - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(output_type), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output, expected, 0.01f)); - - HIP_CHECK(hipFree(d_temp_storage)); - HIP_CHECK(hipFree(d_input)); - HIP_CHECK(hipFree(d_flags)); - HIP_CHECK(hipFree(d_output)); + ); + + // temp storage + size_t temp_storage_size_bytes; + // Get size of d_temp_storage + HIP_CHECK( + rocprim::segmented_inclusive_scan( + nullptr, temp_storage_size_bytes, + d_input, d_output, d_flags, + input.size(), scan_op, stream, + debug_synchronous + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // temp_storage_size_bytes must be >0 + ASSERT_GT(temp_storage_size_bytes, 0); + + // allocate temporary storage + void * d_temp_storage = nullptr; + HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); + HIP_CHECK(hipDeviceSynchronize()); + + // Run + HIP_CHECK( + rocprim::segmented_inclusive_scan( + d_temp_storage, temp_storage_size_bytes, + d_input, d_output, d_flags, + input.size(), scan_op, stream, + debug_synchronous + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Check if output values are as expected + std::vector output(input.size()); + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(output_type), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output, expected, 0.01f)); + + HIP_CHECK(hipFree(d_temp_storage)); + HIP_CHECK(hipFree(d_input)); + HIP_CHECK(hipFree(d_flags)); + HIP_CHECK(hipFree(d_output)); + } } + } TYPED_TEST(RocprimDeviceSegmentedScan, ExclusiveScanUsingHeadFlags) @@ -498,145 +519,152 @@ TYPED_TEST(RocprimDeviceSegmentedScan, ExclusiveScanUsingHeadFlags) hipStream_t stream = 0; // default stream - const std::vector sizes = get_sizes(); - for(auto size : sizes) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector input = test_utils::get_random_data(size, 1, 10); - std::vector flags = test_utils::get_random_data(size, 0, 10); - flags[0] = 1U; - std::transform( - flags.begin(), flags.end(), flags.begin(), - [](flag_type a){ if(a == 1U) return 1U; return 0U; } - ); - - input_type * d_input; - flag_type * d_flags; - output_type * d_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(input_type))); - HIP_CHECK(hipMalloc(&d_flags, flags.size() * sizeof(flag_type))); - HIP_CHECK(hipMalloc(&d_output, input.size() * sizeof(output_type))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(input_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - d_flags, flags.data(), - flags.size() * sizeof(flag_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // scan function - scan_op_type scan_op; - - // Calculate expected results on host - std::vector expected(input.size()); - // Modify input to perform exclusive operation on initial input. - // This shifts input one to the right and initializes segments with init. - expected[0] = init; - std::transform( - rocprim::make_zip_iterator( - rocprim::make_tuple(input.begin(), flags.begin()+1) - ), - rocprim::make_zip_iterator( - rocprim::make_tuple(input.end() - 1, flags.end()) - ), - rocprim::make_zip_iterator( - rocprim::make_tuple(expected.begin() + 1, rocprim::make_discard_iterator()) - ), - [init](const rocprim::tuple& t) - -> rocprim::tuple - { - if(rocprim::get<1>(t)) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + const std::vector sizes = get_sizes(seed_value); + for(auto size : sizes) + { + SCOPED_TRACE(testing::Message() << "with size = " << size); + + // Generate data + std::vector input = test_utils::get_random_data(size, 1, 10, seed_value); + std::vector flags = test_utils::get_random_data(size, 0, 10, seed_value); + flags[0] = 1U; + std::transform( + flags.begin(), flags.end(), flags.begin(), + [](flag_type a){ if(a == 1U) return 1U; return 0U; } + ); + + input_type * d_input; + flag_type * d_flags; + output_type * d_output; + HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(input_type))); + HIP_CHECK(hipMalloc(&d_flags, flags.size() * sizeof(flag_type))); + HIP_CHECK(hipMalloc(&d_output, input.size() * sizeof(output_type))); + HIP_CHECK( + hipMemcpy( + d_input, input.data(), + input.size() * sizeof(input_type), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK( + hipMemcpy( + d_flags, flags.data(), + flags.size() * sizeof(flag_type), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // scan function + scan_op_type scan_op; + + // Calculate expected results on host + std::vector expected(input.size()); + // Modify input to perform exclusive operation on initial input. + // This shifts input one to the right and initializes segments with init. + expected[0] = init; + std::transform( + rocprim::make_zip_iterator( + rocprim::make_tuple(input.begin(), flags.begin()+1) + ), + rocprim::make_zip_iterator( + rocprim::make_tuple(input.end() - 1, flags.end()) + ), + rocprim::make_zip_iterator( + rocprim::make_tuple(expected.begin() + 1, rocprim::make_discard_iterator()) + ), + [init](const rocprim::tuple& t) + -> rocprim::tuple { - return rocprim::make_tuple( - init, - rocprim::get<1>(t) - ); + if(rocprim::get<1>(t)) + { + return rocprim::make_tuple( + init, + rocprim::get<1>(t) + ); + } + return t; } - return t; - } - ); - // Now we can run inclusive scan and get segmented exclusive results - test_utils::host_inclusive_scan( - rocprim::make_zip_iterator( - rocprim::make_tuple(expected.begin(), flags.begin()) - ), - rocprim::make_zip_iterator( - rocprim::make_tuple(expected.end(), flags.end()) - ), - rocprim::make_zip_iterator( - rocprim::make_tuple(expected.begin(), rocprim::make_discard_iterator()) - ), - [scan_op](const rocprim::tuple& t1, - const rocprim::tuple& t2) - -> rocprim::tuple - { - if(!rocprim::get<1>(t2)) + ); + // Now we can run inclusive scan and get segmented exclusive results + test_utils::host_inclusive_scan( + rocprim::make_zip_iterator( + rocprim::make_tuple(expected.begin(), flags.begin()) + ), + rocprim::make_zip_iterator( + rocprim::make_tuple(expected.end(), flags.end()) + ), + rocprim::make_zip_iterator( + rocprim::make_tuple(expected.begin(), rocprim::make_discard_iterator()) + ), + [scan_op](const rocprim::tuple& t1, + const rocprim::tuple& t2) + -> rocprim::tuple { - return rocprim::make_tuple( - scan_op(rocprim::get<0>(t1), rocprim::get<0>(t2)), - rocprim::get<1>(t1) + rocprim::get<1>(t2) - ); + if(!rocprim::get<1>(t2)) + { + return rocprim::make_tuple( + scan_op(rocprim::get<0>(t1), rocprim::get<0>(t2)), + rocprim::get<1>(t1) + rocprim::get<1>(t2) + ); + } + return t2; } - return t2; - } - ); - - // temp storage - size_t temp_storage_size_bytes; - // Get size of d_temp_storage - HIP_CHECK( - rocprim::segmented_exclusive_scan( - nullptr, temp_storage_size_bytes, - d_input, d_output, d_flags, init, - input.size(), scan_op, stream, debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0); - - // allocate temporary storage - void * d_temp_storage = nullptr; - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - rocprim::segmented_exclusive_scan( - d_temp_storage, temp_storage_size_bytes, - d_input, d_output, d_flags, init, - input.size(), scan_op, stream, debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if output values are as expected - std::vector output(input.size()); - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(output_type), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output, expected, 0.01f)); - - HIP_CHECK(hipFree(d_temp_storage)); - HIP_CHECK(hipFree(d_input)); - HIP_CHECK(hipFree(d_flags)); - HIP_CHECK(hipFree(d_output)); + ); + + // temp storage + size_t temp_storage_size_bytes; + // Get size of d_temp_storage + HIP_CHECK( + rocprim::segmented_exclusive_scan( + nullptr, temp_storage_size_bytes, + d_input, d_output, d_flags, init, + input.size(), scan_op, stream, debug_synchronous + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // temp_storage_size_bytes must be >0 + ASSERT_GT(temp_storage_size_bytes, 0); + + // allocate temporary storage + void * d_temp_storage = nullptr; + HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); + HIP_CHECK(hipDeviceSynchronize()); + + // Run + HIP_CHECK( + rocprim::segmented_exclusive_scan( + d_temp_storage, temp_storage_size_bytes, + d_input, d_output, d_flags, init, + input.size(), scan_op, stream, debug_synchronous + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Check if output values are as expected + std::vector output(input.size()); + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(output_type), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output, expected, 0.01f)); + + HIP_CHECK(hipFree(d_temp_storage)); + HIP_CHECK(hipFree(d_input)); + HIP_CHECK(hipFree(d_flags)); + HIP_CHECK(hipFree(d_output)); + } } + } diff --git a/test/rocprim/test_device_select.cpp b/test/rocprim/test_device_select.cpp index f55c9254f..bf3e68b9c 100644 --- a/test/rocprim/test_device_select.cpp +++ b/test/rocprim/test_device_select.cpp @@ -72,7 +72,7 @@ typedef ::testing::Types< DeviceSelectParams, test_utils::custom_test_type, int, true> > RocprimDeviceSelectTestsParams; -std::vector get_sizes() +std::vector get_sizes(int seed_value) { std::vector sizes = { 2, 32, 64, 256, @@ -80,7 +80,7 @@ std::vector get_sizes() 3072, 4096, 27845, (1 << 18) + 1111 }; - const std::vector random_sizes = test_utils::get_random_data(2, 1, 16384); + const std::vector random_sizes = test_utils::get_random_data(2, 1, 16384, seed_value); sizes.insert(sizes.end(), random_sizes.begin(), random_sizes.end()); std::sort(sizes.begin(), sizes.end()); return sizes; @@ -98,122 +98,129 @@ TYPED_TEST(RocprimDeviceSelectTests, Flagged) hipStream_t stream = 0; // default stream - const std::vector sizes = get_sizes(); - for(auto size : sizes) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector input = test_utils::get_random_data(size, 1, 100); - std::vector flags = test_utils::get_random_data(size, 0, 1); - - T * d_input; - F * d_flags; - U * d_output; - unsigned int * d_selected_count_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); - HIP_CHECK(hipMalloc(&d_flags, flags.size() * sizeof(F))); - HIP_CHECK(hipMalloc(&d_output, input.size() * sizeof(U))); - HIP_CHECK(hipMalloc(&d_selected_count_output, sizeof(unsigned int))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - d_flags, flags.data(), - flags.size() * sizeof(F), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Calculate expected results on host - std::vector expected; - expected.reserve(input.size()); - for(size_t i = 0; i < input.size(); i++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + const std::vector sizes = get_sizes(seed_value); + for(auto size : sizes) { - if(flags[i] != 0) + SCOPED_TRACE(testing::Message() << "with size = " << size); + + // Generate data + std::vector input = test_utils::get_random_data(size, 1, 100, seed_value); + std::vector flags = test_utils::get_random_data(size, 0, 1, seed_value); + + T * d_input; + F * d_flags; + U * d_output; + unsigned int * d_selected_count_output; + HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); + HIP_CHECK(hipMalloc(&d_flags, flags.size() * sizeof(F))); + HIP_CHECK(hipMalloc(&d_output, input.size() * sizeof(U))); + HIP_CHECK(hipMalloc(&d_selected_count_output, sizeof(unsigned int))); + HIP_CHECK( + hipMemcpy( + d_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK( + hipMemcpy( + d_flags, flags.data(), + flags.size() * sizeof(F), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Calculate expected results on host + std::vector expected; + expected.reserve(input.size()); + for(size_t i = 0; i < input.size(); i++) { - expected.push_back(input[i]); + if(flags[i] != 0) + { + expected.push_back(input[i]); + } } - } - // temp storage - size_t temp_storage_size_bytes; - // Get size of d_temp_storage - HIP_CHECK( - rocprim::select( - nullptr, - temp_storage_size_bytes, - d_input, - d_flags, - test_utils::wrap_in_identity_iterator(d_output), - d_selected_count_output, - input.size(), - stream, - debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0); - - // allocate temporary storage - void * d_temp_storage = nullptr; - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - rocprim::select( - d_temp_storage, - temp_storage_size_bytes, - d_input, - d_flags, - test_utils::wrap_in_identity_iterator(d_output), - d_selected_count_output, - input.size(), - stream, - debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if number of selected value is as expected - unsigned int selected_count_output = 0; - HIP_CHECK( - hipMemcpy( - &selected_count_output, d_selected_count_output, - sizeof(unsigned int), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - ASSERT_EQ(selected_count_output, expected.size()); - - // Check if output values are as expected - std::vector output(input.size()); - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(U), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(output, expected, expected.size())); - - hipFree(d_input); - hipFree(d_flags); - hipFree(d_output); - hipFree(d_selected_count_output); - hipFree(d_temp_storage); + // temp storage + size_t temp_storage_size_bytes; + // Get size of d_temp_storage + HIP_CHECK( + rocprim::select( + nullptr, + temp_storage_size_bytes, + d_input, + d_flags, + test_utils::wrap_in_identity_iterator(d_output), + d_selected_count_output, + input.size(), + stream, + debug_synchronous + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // temp_storage_size_bytes must be >0 + ASSERT_GT(temp_storage_size_bytes, 0); + + // allocate temporary storage + void * d_temp_storage = nullptr; + HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); + HIP_CHECK(hipDeviceSynchronize()); + + // Run + HIP_CHECK( + rocprim::select( + d_temp_storage, + temp_storage_size_bytes, + d_input, + d_flags, + test_utils::wrap_in_identity_iterator(d_output), + d_selected_count_output, + input.size(), + stream, + debug_synchronous + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Check if number of selected value is as expected + unsigned int selected_count_output = 0; + HIP_CHECK( + hipMemcpy( + &selected_count_output, d_selected_count_output, + sizeof(unsigned int), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + ASSERT_EQ(selected_count_output, expected.size()); + + // Check if output values are as expected + std::vector output(input.size()); + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(U), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(output, expected, expected.size())); + + hipFree(d_input); + hipFree(d_flags); + hipFree(d_output); + hipFree(d_selected_count_output); + hipFree(d_temp_storage); + } } + } template @@ -252,111 +259,118 @@ TYPED_TEST(RocprimDeviceSelectTests, SelectOp) hipStream_t stream = 0; // default stream - const std::vector sizes = get_sizes(); - for(auto size : sizes) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector input = test_utils::get_random_data(size, 0, 100); - - T * d_input; - U * d_output; - unsigned int * d_selected_count_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); - HIP_CHECK(hipMalloc(&d_output, input.size() * sizeof(U))); - HIP_CHECK(hipMalloc(&d_selected_count_output, sizeof(unsigned int))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Calculate expected results on host - std::vector expected; - expected.reserve(input.size()); - for(size_t i = 0; i < input.size(); i++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + const std::vector sizes = get_sizes(seed_value); + for(auto size : sizes) { - if(select_op()(input[i])) + SCOPED_TRACE(testing::Message() << "with size = " << size); + + // Generate data + std::vector input = test_utils::get_random_data(size, 0, 100, seed_value); + + T * d_input; + U * d_output; + unsigned int * d_selected_count_output; + HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); + HIP_CHECK(hipMalloc(&d_output, input.size() * sizeof(U))); + HIP_CHECK(hipMalloc(&d_selected_count_output, sizeof(unsigned int))); + HIP_CHECK( + hipMemcpy( + d_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Calculate expected results on host + std::vector expected; + expected.reserve(input.size()); + for(size_t i = 0; i < input.size(); i++) { - expected.push_back(input[i]); + if(select_op()(input[i])) + { + expected.push_back(input[i]); + } } - } - // temp storage - size_t temp_storage_size_bytes; - // Get size of d_temp_storage - HIP_CHECK( - rocprim::select( - nullptr, - temp_storage_size_bytes, - d_input, - test_utils::wrap_in_identity_iterator(d_output), - d_selected_count_output, - input.size(), - select_op(), - stream, - debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0); - - // allocate temporary storage - void * d_temp_storage = nullptr; - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - rocprim::select( - d_temp_storage, - temp_storage_size_bytes, - d_input, - test_utils::wrap_in_identity_iterator(d_output), - d_selected_count_output, - input.size(), - select_op(), - stream, - debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if number of selected value is as expected - unsigned int selected_count_output = 0; - HIP_CHECK( - hipMemcpy( - &selected_count_output, d_selected_count_output, - sizeof(unsigned int), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - ASSERT_EQ(selected_count_output, expected.size()); - - // Check if output values are as expected - std::vector output(input.size()); - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(U), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(output, expected, expected.size())); - - hipFree(d_input); - hipFree(d_output); - hipFree(d_selected_count_output); - hipFree(d_temp_storage); + // temp storage + size_t temp_storage_size_bytes; + // Get size of d_temp_storage + HIP_CHECK( + rocprim::select( + nullptr, + temp_storage_size_bytes, + d_input, + test_utils::wrap_in_identity_iterator(d_output), + d_selected_count_output, + input.size(), + select_op(), + stream, + debug_synchronous + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // temp_storage_size_bytes must be >0 + ASSERT_GT(temp_storage_size_bytes, 0); + + // allocate temporary storage + void * d_temp_storage = nullptr; + HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); + HIP_CHECK(hipDeviceSynchronize()); + + // Run + HIP_CHECK( + rocprim::select( + d_temp_storage, + temp_storage_size_bytes, + d_input, + test_utils::wrap_in_identity_iterator(d_output), + d_selected_count_output, + input.size(), + select_op(), + stream, + debug_synchronous + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Check if number of selected value is as expected + unsigned int selected_count_output = 0; + HIP_CHECK( + hipMemcpy( + &selected_count_output, d_selected_count_output, + sizeof(unsigned int), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + ASSERT_EQ(selected_count_output, expected.size()); + + // Check if output values are as expected + std::vector output(input.size()); + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(U), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(output, expected, expected.size())); + + hipFree(d_input); + hipFree(d_output); + hipFree(d_selected_count_output); + hipFree(d_temp_storage); + } } + } std::vector get_discontinuity_probabilities() @@ -440,122 +454,129 @@ TYPED_TEST(RocprimDeviceSelectTests, Unique) hipStream_t stream = 0; // default stream - const auto sizes = get_sizes(); - const auto probabilities = get_discontinuity_probabilities(); - for(auto size : sizes) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - SCOPED_TRACE(testing::Message() << "with size = " << size); - for(auto p : probabilities) - { - SCOPED_TRACE(testing::Message() << "with p = " << p); + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - // Generate data - std::vector input(size); + const auto sizes = get_sizes(seed_value); + const auto probabilities = get_discontinuity_probabilities(); + for(auto size : sizes) + { + SCOPED_TRACE(testing::Message() << "with size = " << size); + for(auto p : probabilities) { - std::vector input01 = test_utils::get_random_data01(size, p); - test_utils::host_inclusive_scan( - input01.begin(), input01.end(), input.begin(), scan_op_type() - ); - } - - // Allocate and copy to device - T * d_input; - U * d_output; - unsigned int * d_selected_count_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); - HIP_CHECK(hipMalloc(&d_output, input.size() * sizeof(U))); - HIP_CHECK(hipMalloc(&d_selected_count_output, sizeof(unsigned int))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); + SCOPED_TRACE(testing::Message() << "with p = " << p); - // Calculate expected results on host - std::vector expected; - expected.reserve(input.size()); - expected.push_back(input[0]); - for(size_t i = 1; i < input.size(); i++) - { - if(!op_type()(input[i-1], input[i])) + // Generate data + std::vector input(size); { - expected.push_back(input[i]); + std::vector input01 = test_utils::get_random_data01(size, p, seed_value); + test_utils::host_inclusive_scan( + input01.begin(), input01.end(), input.begin(), scan_op_type() + ); } - } - - // temp storage - size_t temp_storage_size_bytes; - // Get size of d_temp_storage - HIP_CHECK( - rocprim::unique( - nullptr, - temp_storage_size_bytes, - d_input, - test_utils::wrap_in_identity_iterator(d_output), - d_selected_count_output, - input.size(), - op_type(), - stream, - debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0); - // allocate temporary storage - void * d_temp_storage = nullptr; - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - rocprim::unique( - d_temp_storage, - temp_storage_size_bytes, - d_input, - test_utils::wrap_in_identity_iterator(d_output), - d_selected_count_output, - input.size(), - op_type(), - stream, - debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); + // Allocate and copy to device + T * d_input; + U * d_output; + unsigned int * d_selected_count_output; + HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); + HIP_CHECK(hipMalloc(&d_output, input.size() * sizeof(U))); + HIP_CHECK(hipMalloc(&d_selected_count_output, sizeof(unsigned int))); + HIP_CHECK( + hipMemcpy( + d_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); - // Check if number of selected value is as expected - unsigned int selected_count_output = 0; - HIP_CHECK( - hipMemcpy( - &selected_count_output, d_selected_count_output, - sizeof(unsigned int), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - ASSERT_EQ(selected_count_output, expected.size()); + // Calculate expected results on host + std::vector expected; + expected.reserve(input.size()); + expected.push_back(input[0]); + for(size_t i = 1; i < input.size(); i++) + { + if(!op_type()(input[i-1], input[i])) + { + expected.push_back(input[i]); + } + } - // Check if output values are as expected - std::vector output(input.size()); - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(U), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(output, expected, expected.size())); + // temp storage + size_t temp_storage_size_bytes; + // Get size of d_temp_storage + HIP_CHECK( + rocprim::unique( + nullptr, + temp_storage_size_bytes, + d_input, + test_utils::wrap_in_identity_iterator(d_output), + d_selected_count_output, + input.size(), + op_type(), + stream, + debug_synchronous + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // temp_storage_size_bytes must be >0 + ASSERT_GT(temp_storage_size_bytes, 0); + + // allocate temporary storage + void * d_temp_storage = nullptr; + HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); + HIP_CHECK(hipDeviceSynchronize()); + + // Run + HIP_CHECK( + rocprim::unique( + d_temp_storage, + temp_storage_size_bytes, + d_input, + test_utils::wrap_in_identity_iterator(d_output), + d_selected_count_output, + input.size(), + op_type(), + stream, + debug_synchronous + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Check if number of selected value is as expected + unsigned int selected_count_output = 0; + HIP_CHECK( + hipMemcpy( + &selected_count_output, d_selected_count_output, + sizeof(unsigned int), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + ASSERT_EQ(selected_count_output, expected.size()); + + // Check if output values are as expected + std::vector output(input.size()); + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(U), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + ASSERT_NO_FATAL_FAILURE(test_utils::custom_assert_eq(output, expected, expected.size())); - hipFree(d_input); - hipFree(d_output); - hipFree(d_selected_count_output); - hipFree(d_temp_storage); + hipFree(d_input); + hipFree(d_output); + hipFree(d_selected_count_output); + hipFree(d_temp_storage); + } } } + } diff --git a/test/rocprim/test_device_transform.cpp b/test/rocprim/test_device_transform.cpp index 128f2ff2a..2775a0839 100644 --- a/test/rocprim/test_device_transform.cpp +++ b/test/rocprim/test_device_transform.cpp @@ -81,14 +81,14 @@ typedef ::testing::Types< DeviceTransformParams > RocprimDeviceTransformTestsParams; -std::vector get_sizes() +std::vector get_sizes(int seed_value) { std::vector sizes = { 1, 10, 53, 211, 1024, 2048, 5096, 34567, (1 << 17) - 1220 }; - const std::vector random_sizes = test_utils::get_random_data(2, 1, 16384); + const std::vector random_sizes = test_utils::get_random_data(2, 1, 16384, seed_value); sizes.insert(sizes.end(), random_sizes.begin(), random_sizes.end()); std::sort(sizes.begin(), sizes.end()); return sizes; @@ -128,61 +128,68 @@ TYPED_TEST(RocprimDeviceTransformTests, Transform) static constexpr bool use_identity_iterator = TestFixture::use_identity_iterator; const bool debug_synchronous = TestFixture::debug_synchronous; - const std::vector sizes = get_sizes(); - for(auto size : sizes) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - hipStream_t stream = 0; // default - - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector input = test_utils::get_random_data(size, 1, 100); - std::vector output(input.size(), 0); - - T * d_input; - U * d_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); - HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(U))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Calculate expected results on host - std::vector expected(input.size()); - std::transform(input.begin(), input.end(), expected.begin(), transform()); - - // Run - HIP_CHECK( - rocprim::transform( - d_input, - test_utils::wrap_in_identity_iterator(d_output), - input.size(), transform(), stream, debug_synchronous - ) - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Copy output to host - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(U), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if output values are as expected - ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output, expected, 0.01f)); - - hipFree(d_input); - hipFree(d_output); + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + const std::vector sizes = get_sizes(seed_value); + for(auto size : sizes) + { + hipStream_t stream = 0; // default + + SCOPED_TRACE(testing::Message() << "with size = " << size); + + // Generate data + std::vector input = test_utils::get_random_data(size, 1, 100, seed_value); + std::vector output(input.size(), 0); + + T * d_input; + U * d_output; + HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); + HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(U))); + HIP_CHECK( + hipMemcpy( + d_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Calculate expected results on host + std::vector expected(input.size()); + std::transform(input.begin(), input.end(), expected.begin(), transform()); + + // Run + HIP_CHECK( + rocprim::transform( + d_input, + test_utils::wrap_in_identity_iterator(d_output), + input.size(), transform(), stream, debug_synchronous + ) + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Copy output to host + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(U), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Check if output values are as expected + ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output, expected, 0.01f)); + + hipFree(d_input); + hipFree(d_output); + } } + } template @@ -217,73 +224,80 @@ TYPED_TEST(RocprimDeviceTransformTests, BinaryTransform) static constexpr bool use_identity_iterator = TestFixture::use_identity_iterator; const bool debug_synchronous = TestFixture::debug_synchronous; - const std::vector sizes = get_sizes(); - for(auto size : sizes) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - hipStream_t stream = 0; // default - - SCOPED_TRACE(testing::Message() << "with size = " << size); - - // Generate data - std::vector input1 = test_utils::get_random_data(size, 1, 100); - std::vector input2 = test_utils::get_random_data(size, 1, 100); - std::vector output(input1.size(), 0); - - T1 * d_input1; - T2 * d_input2; - U * d_output; - HIP_CHECK(hipMalloc(&d_input1, input1.size() * sizeof(T1))); - HIP_CHECK(hipMalloc(&d_input2, input2.size() * sizeof(T2))); - HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(U))); - HIP_CHECK( - hipMemcpy( - d_input1, input1.data(), - input1.size() * sizeof(T1), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - d_input2, input2.data(), - input2.size() * sizeof(T2), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Calculate expected results on host - std::vector expected(input1.size()); - std::transform( - input1.begin(), input1.end(), input2.begin(), - expected.begin(), binary_transform() - ); - - // Run - HIP_CHECK( - rocprim::transform( - d_input1, d_input2, - test_utils::wrap_in_identity_iterator(d_output), - input1.size(), binary_transform(), stream, debug_synchronous - ) - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Copy output to host - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(U), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if output values are as expected - ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output, expected, 0.01f)); - - hipFree(d_input1); - hipFree(d_input2); - hipFree(d_output); + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + const std::vector sizes = get_sizes(seed_value); + for(auto size : sizes) + { + hipStream_t stream = 0; // default + + SCOPED_TRACE(testing::Message() << "with size = " << size); + + // Generate data + std::vector input1 = test_utils::get_random_data(size, 1, 100, seed_value); + std::vector input2 = test_utils::get_random_data(size, 1, 100, seed_value); + std::vector output(input1.size(), 0); + + T1 * d_input1; + T2 * d_input2; + U * d_output; + HIP_CHECK(hipMalloc(&d_input1, input1.size() * sizeof(T1))); + HIP_CHECK(hipMalloc(&d_input2, input2.size() * sizeof(T2))); + HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(U))); + HIP_CHECK( + hipMemcpy( + d_input1, input1.data(), + input1.size() * sizeof(T1), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK( + hipMemcpy( + d_input2, input2.data(), + input2.size() * sizeof(T2), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Calculate expected results on host + std::vector expected(input1.size()); + std::transform( + input1.begin(), input1.end(), input2.begin(), + expected.begin(), binary_transform() + ); + + // Run + HIP_CHECK( + rocprim::transform( + d_input1, d_input2, + test_utils::wrap_in_identity_iterator(d_output), + input1.size(), binary_transform(), stream, debug_synchronous + ) + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Copy output to host + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(U), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Check if output values are as expected + ASSERT_NO_FATAL_FAILURE(test_utils::assert_near(output, expected, 0.01f)); + + hipFree(d_input1); + hipFree(d_input2); + hipFree(d_output); + } } + } diff --git a/test/rocprim/test_discard_iterator.cpp b/test/rocprim/test_discard_iterator.cpp index 2ea359bda..86733908d 100644 --- a/test/rocprim/test_discard_iterator.cpp +++ b/test/rocprim/test_discard_iterator.cpp @@ -40,35 +40,47 @@ TEST(RocprimDiscardIteratorTests, Equal) { using Iterator = typename rocprim::discard_iterator; - Iterator x(test_utils::get_random_value(0, 200)); - Iterator y = x; - ASSERT_EQ(x, y); - - x += 100; - for(size_t i = 0; i < 100; i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - y++; + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + Iterator x(test_utils::get_random_value(0, 200, seed_value)); + Iterator y = x; + ASSERT_EQ(x, y); + + x += 100; + for(size_t i = 0; i < 100; i++) + { + y++; + } + ASSERT_EQ(x, y); + + y--; + ASSERT_NE(x, y); } - ASSERT_EQ(x, y); - - y--; - ASSERT_NE(x, y); } TEST(RocprimDiscardIteratorTests, Less) { using Iterator = typename rocprim::discard_iterator; - Iterator x(test_utils::get_random_value(0, 200)); - Iterator y = x + 1; - ASSERT_LT(x, y); - - x += 100; - for(size_t i = 0; i < 100; i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - y++; + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + Iterator x(test_utils::get_random_value(0, 200, seed_value)); + Iterator y = x + 1; + ASSERT_LT(x, y); + + x += 100; + for(size_t i = 0; i < 100; i++) + { + y++; + } + ASSERT_LT(x, y); } - ASSERT_LT(x, y); } TEST(RocprimDiscardIteratorTests, ReduceByKey) diff --git a/test/rocprim/test_intrinsics.cpp b/test/rocprim/test_intrinsics.cpp index 4c070ad20..b04638099 100644 --- a/test/rocprim/test_intrinsics.cpp +++ b/test/rocprim/test_intrinsics.cpp @@ -113,78 +113,86 @@ TYPED_TEST(RocprimIntrinsicsTests, ShuffleUp) const size_t hardware_warp_size = ::rocprim::warp_size(); const size_t size = hardware_warp_size; - // Generate input - auto input = test_utils::get_random_data(size, T(-100), T(100)); - std::vector output(input.size()); - - T* device_data; - HIP_CHECK( - hipMalloc( - &device_data, - input.size() * sizeof(typename decltype(input)::value_type) - ) - ); - - for(unsigned int i = hardware_warp_size; i > 1; i = i/2) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - const unsigned int logical_warp_size = i; - SCOPED_TRACE(testing::Message() << "where logical_warp_size = " << i); + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate input + auto input = test_utils::get_random_data(size, T(-100), T(100), seed_value); + std::vector output(input.size()); - auto deltas = test_utils::get_random_data( - std::max(1, logical_warp_size/2), - 1U, - std::max(1, logical_warp_size - 1) + T* device_data; + HIP_CHECK( + hipMalloc( + &device_data, + input.size() * sizeof(typename decltype(input)::value_type) + ) ); - for(auto delta : deltas) + for(unsigned int i = hardware_warp_size; i > 1; i = i/2) { - SCOPED_TRACE(testing::Message() << "where delta = " << delta); - // Calculate expected results on host - std::vector expected(size, 0); - for(size_t i = 0; i < input.size()/logical_warp_size; i++) + const unsigned int logical_warp_size = i; + SCOPED_TRACE(testing::Message() << "where logical_warp_size = " << i); + + auto deltas = test_utils::get_random_data( + std::max(1, logical_warp_size/2), + 1U, + std::max(1, logical_warp_size - 1), + seed_index + ); + + for(auto delta : deltas) { - for(size_t j = 0; j < logical_warp_size; j++) + SCOPED_TRACE(testing::Message() << "where delta = " << delta); + // Calculate expected results on host + std::vector expected(size, 0); + for(size_t i = 0; i < input.size()/logical_warp_size; i++) { - size_t index = j + logical_warp_size * i; - auto up_index = j > delta-1 ? index-delta : index; - expected[index] = input[up_index]; + for(size_t j = 0; j < logical_warp_size; j++) + { + size_t index = j + logical_warp_size * i; + auto up_index = j > delta-1 ? index-delta : index; + expected[index] = input[up_index]; + } } - } - - // Writing to device memory - HIP_CHECK( - hipMemcpy( - device_data, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(shuffle_up_kernel), - dim3(1), dim3(hardware_warp_size), 0, 0, - device_data, delta, logical_warp_size - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_data, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_EQ(output[i], expected[i]) << "where index = " << i; + // Writing to device memory + HIP_CHECK( + hipMemcpy( + device_data, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(shuffle_up_kernel), + dim3(1), dim3(hardware_warp_size), 0, 0, + device_data, delta, logical_warp_size + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Read from device memory + HIP_CHECK( + hipMemcpy( + output.data(), device_data, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + for(size_t i = 0; i < output.size(); i++) + { + ASSERT_EQ(output[i], expected[i]) << "where index = " << i; + } } } + hipFree(device_data); } - hipFree(device_data); + } template @@ -203,78 +211,86 @@ TYPED_TEST(RocprimIntrinsicsTests, ShuffleDown) const size_t hardware_warp_size = ::rocprim::warp_size(); const size_t size = hardware_warp_size; - // Generate input - auto input = test_utils::get_random_data(size, T(-100), T(100)); - std::vector output(input.size()); - - T* device_data; - HIP_CHECK( - hipMalloc( - &device_data, - input.size() * sizeof(typename decltype(input)::value_type) - ) - ); - - for(unsigned int i = hardware_warp_size; i > 1; i = i/2) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - const unsigned int logical_warp_size = i; - SCOPED_TRACE(testing::Message() << "where logical_warp_size = " << i); + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - auto deltas = test_utils::get_random_data( - std::max(1, logical_warp_size/2), - 1U, - std::max(1, logical_warp_size - 1) + // Generate input + auto input = test_utils::get_random_data(size, T(-100), T(100), seed_value); + std::vector output(input.size()); + + T* device_data; + HIP_CHECK( + hipMalloc( + &device_data, + input.size() * sizeof(typename decltype(input)::value_type) + ) ); - for(auto delta : deltas) + for(unsigned int i = hardware_warp_size; i > 1; i = i/2) { - SCOPED_TRACE(testing::Message() << "where delta = " << delta); - // Calculate expected results on host - std::vector expected(size, 0); - for(size_t i = 0; i < input.size()/logical_warp_size; i++) + const unsigned int logical_warp_size = i; + SCOPED_TRACE(testing::Message() << "where logical_warp_size = " << i); + + auto deltas = test_utils::get_random_data( + std::max(1, logical_warp_size/2), + 1U, + std::max(1, logical_warp_size - 1), + seed_index + ); + + for(auto delta : deltas) { - for(size_t j = 0; j < logical_warp_size; j++) + SCOPED_TRACE(testing::Message() << "where delta = " << delta); + // Calculate expected results on host + std::vector expected(size, 0); + for(size_t i = 0; i < input.size()/logical_warp_size; i++) { - size_t index = j + logical_warp_size * i; - auto down_index = j+delta < logical_warp_size ? index+delta : index; - expected[index] = input[down_index]; + for(size_t j = 0; j < logical_warp_size; j++) + { + size_t index = j + logical_warp_size * i; + auto down_index = j+delta < logical_warp_size ? index+delta : index; + expected[index] = input[down_index]; + } } - } - - // Writing to device memory - HIP_CHECK( - hipMemcpy( - device_data, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(shuffle_down_kernel), - dim3(1), dim3(hardware_warp_size), 0, 0, - device_data, delta, logical_warp_size - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_data, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_EQ(output[i], expected[i]) << "where index = " << i; + // Writing to device memory + HIP_CHECK( + hipMemcpy( + device_data, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(shuffle_down_kernel), + dim3(1), dim3(hardware_warp_size), 0, 0, + device_data, delta, logical_warp_size + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Read from device memory + HIP_CHECK( + hipMemcpy( + output.data(), device_data, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + for(size_t i = 0; i < output.size(); i++) + { + ASSERT_EQ(output[i], expected[i]) << "where index = " << i; + } } } + hipFree(device_data); } - hipFree(device_data); + } template @@ -295,140 +311,51 @@ TYPED_TEST(RocprimIntrinsicsTests, ShuffleIndex) const size_t hardware_warp_size = ::rocprim::warp_size(); const size_t size = hardware_warp_size; - // Generate input - auto input = test_utils::get_random_data(size, T(-100), T(100)); - std::vector output(input.size()); - - T* device_data; - int * device_src_lanes; - HIP_CHECK( - hipMalloc( - &device_data, - input.size() * sizeof(typename decltype(input)::value_type) - ) - ); - HIP_CHECK( - hipMalloc( - &device_src_lanes, - hardware_warp_size * sizeof(int) - ) - ); - - for(unsigned int i = hardware_warp_size; i > 1; i = i/2) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - const unsigned int logical_warp_size = i; - SCOPED_TRACE(testing::Message() << "where logical_warp_size = " << i); - - auto src_lanes = test_utils::get_random_data( - hardware_warp_size/logical_warp_size, - 0, std::max(0, logical_warp_size-1) - ); + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); - // Calculate expected results on host - std::vector expected(size, 0); - for(size_t i = 0; i < input.size()/logical_warp_size; i++) - { - int src_lane = src_lanes[i]; - for(size_t j = 0; j < logical_warp_size; j++) - { - size_t index = j + logical_warp_size * i; - if(src_lane >= int(logical_warp_size) || src_lane < 0) src_lane = index; - expected[index] = input[src_lane + logical_warp_size * i]; - } - } + // Generate input + auto input = test_utils::get_random_data(size, T(-100), T(100), seed_value); + std::vector output(input.size()); - // Writing to device memory - HIP_CHECK( - hipMemcpy( - device_data, input.data(), - input.size() * sizeof(typename decltype(input)::value_type), - hipMemcpyHostToDevice - ) - ); + T* device_data; + int * device_src_lanes; HIP_CHECK( - hipMemcpy( - device_src_lanes, src_lanes.data(), - src_lanes.size() * sizeof(typename decltype(src_lanes)::value_type), - hipMemcpyHostToDevice + hipMalloc( + &device_data, + input.size() * sizeof(typename decltype(input)::value_type) ) ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(shuffle_index_kernel), - dim3(1), dim3(hardware_warp_size), 0, 0, - device_data, device_src_lanes, logical_warp_size - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory HIP_CHECK( - hipMemcpy( - output.data(), device_data, - output.size() * sizeof(T), - hipMemcpyDeviceToHost + hipMalloc( + &device_src_lanes, + hardware_warp_size * sizeof(int) ) ); - for(size_t i = 0; i < output.size(); i++) + for(unsigned int i = hardware_warp_size; i > 1; i = i/2) { - ASSERT_EQ(output[i], expected[i]) << "where index = " << i; - } - } - hipFree(device_data); - hipFree(device_src_lanes); -} - -TEST(RocprimIntrinsicsTests, ShuffleUpCustomStruct) -{ - using T = custom_notaligned; - const size_t hardware_warp_size = ::rocprim::warp_size(); - const size_t size = hardware_warp_size; - - // Generate input - std::vector random_data = test_utils::get_random_data(4 * size, -100, 100); - std::vector input(size); - std::vector output(input.size()); - for(size_t i = 0; i < 4 * input.size(); i+=4) - { - input[i/4].i = random_data[i]; - input[i/4].d = random_data[i+1]; - input[i/4].f = random_data[i+2]; - input[i/4].u = random_data[i+3]; - } - - T* device_data; - HIP_CHECK( - hipMalloc( - &device_data, - input.size() * sizeof(typename decltype(input)::value_type) - ) - ); + const unsigned int logical_warp_size = i; + SCOPED_TRACE(testing::Message() << "where logical_warp_size = " << i); - for(unsigned int i = hardware_warp_size; i > 1; i = i/2) - { - const unsigned int logical_warp_size = i; - SCOPED_TRACE(testing::Message() << "where logical_warp_size = " << i); - - auto deltas = test_utils::get_random_data( - std::max(1, logical_warp_size/2), - 1U, - std::max(1, logical_warp_size - 1) - ); + auto src_lanes = test_utils::get_random_data( + hardware_warp_size/logical_warp_size, + 0, std::max(0, logical_warp_size-1), + seed_index + ); - for(auto delta : deltas) - { - SCOPED_TRACE(testing::Message() << "where delta = " << delta); // Calculate expected results on host - std::vector expected(size); + std::vector expected(size, 0); for(size_t i = 0; i < input.size()/logical_warp_size; i++) { + int src_lane = src_lanes[i]; for(size_t j = 0; j < logical_warp_size; j++) { size_t index = j + logical_warp_size * i; - auto up_index = j > delta-1 ? index-delta : index; - expected[index] = input[up_index]; + if(src_lane >= int(logical_warp_size) || src_lane < 0) src_lane = index; + expected[index] = input[src_lane + logical_warp_size * i]; } } @@ -436,16 +363,23 @@ TEST(RocprimIntrinsicsTests, ShuffleUpCustomStruct) HIP_CHECK( hipMemcpy( device_data, input.data(), - input.size() * sizeof(T), + input.size() * sizeof(typename decltype(input)::value_type), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK( + hipMemcpy( + device_src_lanes, src_lanes.data(), + src_lanes.size() * sizeof(typename decltype(src_lanes)::value_type), hipMemcpyHostToDevice ) ); // Launching kernel hipLaunchKernelGGL( - HIP_KERNEL_NAME(shuffle_up_kernel), + HIP_KERNEL_NAME(shuffle_index_kernel), dim3(1), dim3(hardware_warp_size), 0, 0, - device_data, delta, logical_warp_size + device_data, device_src_lanes, logical_warp_size ); HIP_CHECK(hipPeekAtLastError()); HIP_CHECK(hipDeviceSynchronize()); @@ -464,93 +398,199 @@ TEST(RocprimIntrinsicsTests, ShuffleUpCustomStruct) ASSERT_EQ(output[i], expected[i]) << "where index = " << i; } } + hipFree(device_data); + hipFree(device_src_lanes); } - hipFree(device_data); + } -TEST(RocprimIntrinsicsTests, ShuffleUpCustomAlignedStruct) +TEST(RocprimIntrinsicsTests, ShuffleUpCustomStruct) { - using T = custom_16aligned; - const size_t hardware_warp_size = ::rocprim::warp_size(); + using T = custom_notaligned; + const size_t hardware_warp_size = ::rocprim::warp_size(); const size_t size = hardware_warp_size; - // Generate input - std::vector random_data = test_utils::get_random_data(3 * size, -100, 100); - std::vector input(size); - std::vector output(input.size()); - for(size_t i = 0; i < 3 * input.size(); i+=3) - { - input[i/3].i = random_data[i]; - input[i/3].u = random_data[i+1]; - input[i/3].f = random_data[i+2]; - } - - T* device_data; - HIP_CHECK( - hipMalloc( - &device_data, - input.size() * sizeof(typename decltype(input)::value_type) - ) - ); - - for(unsigned int i = hardware_warp_size; i > 1; i = i/2) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - const unsigned int logical_warp_size = i; - SCOPED_TRACE(testing::Message() << "where logical_warp_size = " << i); + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate input + std::vector random_data = test_utils::get_random_data(4 * size, -100, 100, seed_value); + std::vector input(size); + std::vector output(input.size()); + for(size_t i = 0; i < 4 * input.size(); i+=4) + { + input[i/4].i = random_data[i]; + input[i/4].d = random_data[i+1]; + input[i/4].f = random_data[i+2]; + input[i/4].u = random_data[i+3]; + } - auto deltas = test_utils::get_random_data( - std::max(1, logical_warp_size/2), - 1U, - std::max(1, logical_warp_size - 1) + T* device_data; + HIP_CHECK( + hipMalloc( + &device_data, + input.size() * sizeof(typename decltype(input)::value_type) + ) ); - for(auto delta : deltas) + for(unsigned int i = hardware_warp_size; i > 1; i = i/2) { - SCOPED_TRACE(testing::Message() << "where delta = " << delta); - // Calculate expected results on host - std::vector expected(size); - for(size_t i = 0; i < input.size()/logical_warp_size; i++) + const unsigned int logical_warp_size = i; + SCOPED_TRACE(testing::Message() << "where logical_warp_size = " << i); + + auto deltas = test_utils::get_random_data( + std::max(1, logical_warp_size/2), + 1U, + std::max(1, logical_warp_size - 1), + seed_index + ); + + for(auto delta : deltas) { - for(size_t j = 0; j < logical_warp_size; j++) + SCOPED_TRACE(testing::Message() << "where delta = " << delta); + // Calculate expected results on host + std::vector expected(size); + for(size_t i = 0; i < input.size()/logical_warp_size; i++) { - size_t index = j + logical_warp_size * i; - auto up_index = j > delta-1 ? index-delta : index; - expected[index] = input[up_index]; + for(size_t j = 0; j < logical_warp_size; j++) + { + size_t index = j + logical_warp_size * i; + auto up_index = j > delta-1 ? index-delta : index; + expected[index] = input[up_index]; + } + } + + // Writing to device memory + HIP_CHECK( + hipMemcpy( + device_data, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(shuffle_up_kernel), + dim3(1), dim3(hardware_warp_size), 0, 0, + device_data, delta, logical_warp_size + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Read from device memory + HIP_CHECK( + hipMemcpy( + output.data(), device_data, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + for(size_t i = 0; i < output.size(); i++) + { + ASSERT_EQ(output[i], expected[i]) << "where index = " << i; } } + } + hipFree(device_data); + } + +} - // Writing to device memory - HIP_CHECK( - hipMemcpy( - device_data, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); +TEST(RocprimIntrinsicsTests, ShuffleUpCustomAlignedStruct) +{ + using T = custom_16aligned; + const size_t hardware_warp_size = ::rocprim::warp_size(); + const size_t size = hardware_warp_size; - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(shuffle_up_kernel), - dim3(1), dim3(hardware_warp_size), 0, 0, - device_data, delta, logical_warp_size - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + { + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate input + std::vector random_data = test_utils::get_random_data(3 * size, -100, 100, seed_value); + std::vector input(size); + std::vector output(input.size()); + for(size_t i = 0; i < 3 * input.size(); i+=3) + { + input[i/3].i = random_data[i]; + input[i/3].u = random_data[i+1]; + input[i/3].f = random_data[i+2]; + } - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_data, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) + T* device_data; + HIP_CHECK( + hipMalloc( + &device_data, + input.size() * sizeof(typename decltype(input)::value_type) + ) + ); + + for(unsigned int i = hardware_warp_size; i > 1; i = i/2) + { + const unsigned int logical_warp_size = i; + SCOPED_TRACE(testing::Message() << "where logical_warp_size = " << i); + + auto deltas = test_utils::get_random_data( + std::max(1, logical_warp_size/2), + 1U, + std::max(1, logical_warp_size - 1), + seed_index ); - for(size_t i = 0; i < output.size(); i++) + for(auto delta : deltas) { - ASSERT_EQ(output[i], expected[i]) << "where index = " << i; + SCOPED_TRACE(testing::Message() << "where delta = " << delta); + // Calculate expected results on host + std::vector expected(size); + for(size_t i = 0; i < input.size()/logical_warp_size; i++) + { + for(size_t j = 0; j < logical_warp_size; j++) + { + size_t index = j + logical_warp_size * i; + auto up_index = j > delta-1 ? index-delta : index; + expected[index] = input[up_index]; + } + } + + // Writing to device memory + HIP_CHECK( + hipMemcpy( + device_data, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(shuffle_up_kernel), + dim3(1), dim3(hardware_warp_size), 0, 0, + device_data, delta, logical_warp_size + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Read from device memory + HIP_CHECK( + hipMemcpy( + output.data(), device_data, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + for(size_t i = 0; i < output.size(); i++) + { + ASSERT_EQ(output[i], expected[i]) << "where index = " << i; + } } } + hipFree(device_data); } - hipFree(device_data); + } diff --git a/test/rocprim/test_seed.hpp b/test/rocprim/test_seed.hpp new file mode 100644 index 000000000..aa25a44b5 --- /dev/null +++ b/test/rocprim/test_seed.hpp @@ -0,0 +1,28 @@ +// Copyright (c) 2020 Advanced Micro Devices, Inc. All rights reserved. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +#ifndef TEST_SEED_HPP_ +#define TEST_SEED_HPP_ + +static constexpr bool use_seed = true; +static constexpr unsigned int seeds [] = {0, 2, 10, 1000}; +static constexpr size_t seed_size = sizeof(seeds) / sizeof(seeds[0]); + +#endif // TEST_SEED_HPP_ \ No newline at end of file diff --git a/test/rocprim/test_texture_cache_iterator.cpp b/test/rocprim/test_texture_cache_iterator.cpp index 1f55b8722..d3fa1b671 100644 --- a/test/rocprim/test_texture_cache_iterator.cpp +++ b/test/rocprim/test_texture_cache_iterator.cpp @@ -86,64 +86,70 @@ TYPED_TEST(RocprimTextureCacheIteratorTests, Transform) std::vector input(size); - for(size_t i = 0; i < size; i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - input[i] = T(test_utils::get_random_value(1, 200)); + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + for(size_t i = 0; i < size; i++) + { + input[i] = T(test_utils::get_random_value(1, 200, seed_value)); + } + + std::vector output(size); + T * d_input; + T * d_output; + HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); + HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(T))); + HIP_CHECK( + hipMemcpy( + d_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + Iterator x; + x.bind_texture(d_input, sizeof(T) * input.size()); + + // Calculate expected results on host + std::vector expected(size); + std::transform( + input.begin(), + input.end(), + expected.begin(), + transform() + ); + + // Run + HIP_CHECK( + rocprim::transform( + x, d_output, size, + transform(), stream, debug_synchronous + ) + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Copy output to host + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Validating results + for(size_t i = 0; i < output.size(); i++) + { + ASSERT_EQ(output[i], expected[i]) << "where index = " << i; + } + + x.unbind_texture(); + hipFree(d_input); + hipFree(d_output); } - - std::vector output(size); - T * d_input; - T * d_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(T))); - HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(T))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - Iterator x; - x.bind_texture(d_input, sizeof(T) * input.size()); - - // Calculate expected results on host - std::vector expected(size); - std::transform( - input.begin(), - input.end(), - expected.begin(), - transform() - ); - - // Run - HIP_CHECK( - rocprim::transform( - x, d_output, size, - transform(), stream, debug_synchronous - ) - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Copy output to host - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Validating results - for(size_t i = 0; i < output.size(); i++) - { - ASSERT_EQ(output[i], expected[i]) << "where index = " << i; - } - - x.unbind_texture(); - hipFree(d_input); - hipFree(d_output); } diff --git a/test/rocprim/test_transform_iterator.cpp b/test/rocprim/test_transform_iterator.cpp index 1190f5f1a..eab954bf8 100644 --- a/test/rocprim/test_transform_iterator.cpp +++ b/test/rocprim/test_transform_iterator.cpp @@ -100,93 +100,101 @@ TYPED_TEST(RocprimTransformIteratorTests, TransformReduce) hipStream_t stream = 0; // default const size_t size = 1024; - // Generate data - std::vector input = test_utils::get_random_data(size, 1, 200); - std::vector output(1); - - input_type * d_input; - value_type * d_output; - HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(input_type))); - HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(value_type))); - HIP_CHECK( - hipMemcpy( - d_input, input.data(), - input.size() * sizeof(input_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - auto reduce_op = rocprim::plus(); - unary_function transform; - - // Calculate expected results on host - iterator_type x(input.data(), transform); - value_type expected = std::accumulate(x, x + size, value_type(0), reduce_op); - - auto d_iter = iterator_type(d_input, transform); - // temp storage - size_t temp_storage_size_bytes; - // Get size of d_temp_storage - HIP_CHECK( - rocprim::reduce( - nullptr, - temp_storage_size_bytes, - d_iter, - d_output, - value_type(0), - input.size(), - reduce_op, - stream - ) - ); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0); - - // allocate temporary storage - void * d_temp_storage = nullptr; - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - - // Run - HIP_CHECK( - rocprim::reduce( - d_temp_storage, - temp_storage_size_bytes, - d_iter, - d_output, - value_type(0), - input.size(), - reduce_op, - stream, - TestFixture::debug_synchronous - ) - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(value_type), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if output values are as expected - if(std::is_integral::value) - { - ASSERT_EQ(output[0], expected); - } - else if(std::is_floating_point::value) + + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - auto tolerance = std::max(std::abs(0.1f * expected), value_type(0.01f)); - ASSERT_NEAR(output[0], expected, tolerance); + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector input = test_utils::get_random_data(size, 1, 200, seed_value); + std::vector output(1); + + input_type * d_input; + value_type * d_output; + HIP_CHECK(hipMalloc(&d_input, input.size() * sizeof(input_type))); + HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(value_type))); + HIP_CHECK( + hipMemcpy( + d_input, input.data(), + input.size() * sizeof(input_type), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + auto reduce_op = rocprim::plus(); + unary_function transform; + + // Calculate expected results on host + iterator_type x(input.data(), transform); + value_type expected = std::accumulate(x, x + size, value_type(0), reduce_op); + + auto d_iter = iterator_type(d_input, transform); + // temp storage + size_t temp_storage_size_bytes; + // Get size of d_temp_storage + HIP_CHECK( + rocprim::reduce( + nullptr, + temp_storage_size_bytes, + d_iter, + d_output, + value_type(0), + input.size(), + reduce_op, + stream + ) + ); + + // temp_storage_size_bytes must be >0 + ASSERT_GT(temp_storage_size_bytes, 0); + + // allocate temporary storage + void * d_temp_storage = nullptr; + HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); + HIP_CHECK(hipDeviceSynchronize()); + + // Run + HIP_CHECK( + rocprim::reduce( + d_temp_storage, + temp_storage_size_bytes, + d_iter, + d_output, + value_type(0), + input.size(), + reduce_op, + stream, + TestFixture::debug_synchronous + ) + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(value_type), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Check if output values are as expected + if(std::is_integral::value) + { + ASSERT_EQ(output[0], expected); + } + else if(std::is_floating_point::value) + { + auto tolerance = std::max(std::abs(0.1f * expected), value_type(0.01f)); + ASSERT_NEAR(output[0], expected, tolerance); + } + + hipFree(d_input); + hipFree(d_output); + hipFree(d_temp_storage); } - hipFree(d_input); - hipFree(d_output); - hipFree(d_temp_storage); } diff --git a/test/rocprim/test_utils.hpp b/test/rocprim/test_utils.hpp index 16f1e23c3..4ec96b2ec 100644 --- a/test/rocprim/test_utils.hpp +++ b/test/rocprim/test_utils.hpp @@ -37,6 +37,9 @@ #include "identity_iterator.hpp" // Bounds checking iterator #include "bounds_checking_iterator.hpp" +// Seed values +#include "test_seed.hpp" + // For better Google Test reporting and debug output of half values inline @@ -207,11 +210,12 @@ struct half_minimum }; template -inline auto get_random_data(size_t size, T min, T max) +inline auto get_random_data(size_t size, T min, T max, int seed_value) -> typename std::enable_if::value, std::vector>::type { std::random_device rd; std::default_random_engine gen(rd()); + gen.seed(seed_value); std::uniform_int_distribution distribution(min, max); std::vector data(size); std::generate(data.begin(), data.end(), [&]() { return distribution(gen); }); @@ -219,11 +223,12 @@ inline auto get_random_data(size_t size, T min, T max) } template -inline auto get_random_data(size_t size, T min, T max) +inline auto get_random_data(size_t size, T min, T max, int seed_value) -> typename std::enable_if::value, std::vector>::type { std::random_device rd; std::default_random_engine gen(rd()); + gen.seed(seed_value); // Generate floats when T is half using dis_type = typename std::conditional::value, float, T>::type; std::uniform_real_distribution distribution(min, max); @@ -233,11 +238,12 @@ inline auto get_random_data(size_t size, T min, T max) } template -inline std::vector get_random_data01(size_t size, float p) +inline std::vector get_random_data01(size_t size, float p, int seed_value) { const size_t max_random_size = 1024 * 1024; std::random_device rd; std::default_random_engine gen(rd()); + gen.seed(seed_value); std::bernoulli_distribution distribution(p); std::vector data(size); std::generate( @@ -252,10 +258,10 @@ inline std::vector get_random_data01(size_t size, float p) } template -inline auto get_random_value(T min, T max) +inline auto get_random_value(T min, T max, int seed_value) -> typename std::enable_if::value, T>::type { - return get_random_data(1, min, max)[0]; + return get_random_data(1, min, max, seed_value)[0]; } // Can't use std::prefix_sum for inclusive/exclusive scan, because @@ -707,7 +713,7 @@ struct numeric_limits : public std::conditional< }; template -inline auto get_random_data(size_t size, typename T::value_type min, typename T::value_type max) +inline auto get_random_data(size_t size, typename T::value_type min, typename T::value_type max, int seed_value) -> typename std::enable_if< is_custom_test_type::value && std::is_integral::value, std::vector @@ -715,6 +721,7 @@ inline auto get_random_data(size_t size, typename T::value_type min, typename T: { std::random_device rd; std::default_random_engine gen(rd()); + gen.seed(seed_value); std::uniform_int_distribution distribution(min, max); std::vector data(size); std::generate(data.begin(), data.end(), [&]() { return T(distribution(gen), distribution(gen)); }); @@ -722,7 +729,7 @@ inline auto get_random_data(size_t size, typename T::value_type min, typename T: } template -inline auto get_random_data(size_t size, typename T::value_type min, typename T::value_type max) +inline auto get_random_data(size_t size, typename T::value_type min, typename T::value_type max, int seed_value) -> typename std::enable_if< is_custom_test_type::value && std::is_floating_point::value, std::vector @@ -730,6 +737,7 @@ inline auto get_random_data(size_t size, typename T::value_type min, typename T: { std::random_device rd; std::default_random_engine gen(rd()); + gen.seed(seed_value); std::uniform_real_distribution distribution(min, max); std::vector data(size); std::generate(data.begin(), data.end(), [&]() { return T(distribution(gen), distribution(gen)); }); @@ -737,7 +745,7 @@ inline auto get_random_data(size_t size, typename T::value_type min, typename T: } template -inline auto get_random_data(size_t size, typename T::value_type min, typename T::value_type max) +inline auto get_random_data(size_t size, typename T::value_type min, typename T::value_type max, int seed_value) -> typename std::enable_if< is_custom_test_array_type::value && std::is_integral::value, std::vector @@ -745,6 +753,7 @@ inline auto get_random_data(size_t size, typename T::value_type min, typename T: { std::random_device rd; std::default_random_engine gen(rd()); + gen.seed(seed_value); std::uniform_int_distribution distribution(min, max); std::vector data(size); std::generate( @@ -763,10 +772,10 @@ inline auto get_random_data(size_t size, typename T::value_type min, typename T: } template -inline auto get_random_value(typename T::value_type min, typename T::value_type max) +inline auto get_random_value(typename T::value_type min, typename T::value_type max, int seed_value) -> typename std::enable_if::value || is_custom_test_array_type::value, T>::type { - return get_random_data(1, min, max)[0]; + return get_random_data(1, min, max, seed_value)[0]; } template @@ -908,7 +917,6 @@ void assert_eq(const rocprim::half& result, const rocprim::half& expected) ASSERT_EQ(half_to_native(result), half_to_native(expected)); } - } // end test_utils namespace #endif // TEST_TEST_UTILS_HPP_ diff --git a/test/rocprim/test_warp_reduce.cpp b/test/rocprim/test_warp_reduce.cpp index f4cfe2db1..5449a478b 100644 --- a/test/rocprim/test_warp_reduce.cpp +++ b/test/rocprim/test_warp_reduce.cpp @@ -87,60 +87,67 @@ TYPED_TEST(RocprimWarpReduceTests, ReduceSum) return; } - // Generate data - std::vector input = test_utils::get_random_data(size, 2, 50); // used for input - std::vector output(input.size() / logical_warp_size, 0); - - // Calculate expected results on host - std::vector expected(output.size(), 1); - binary_op_type binary_op; - for(size_t i = 0; i < output.size(); i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - T value = 0; - for(size_t j = 0; j < logical_warp_size; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector input = test_utils::get_random_data(size, 2, 50, seed_value); // used for input + std::vector output(input.size() / logical_warp_size, 0); + + // Calculate expected results on host + std::vector expected(output.size(), 1); + binary_op_type binary_op; + for(size_t i = 0; i < output.size(); i++) { - auto idx = i * logical_warp_size + j; - value = apply(binary_op, value, input[idx]); + T value = 0; + for(size_t j = 0; j < logical_warp_size; j++) + { + auto idx = i * logical_warp_size + j; + value = apply(binary_op, value, input[idx]); + } + expected[i] = value; } - expected[i] = value; - } - T* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_reduce_sum_kernel), - dim3(size/block_size), dim3(block_size), 0, 0, - device_input, device_output - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - test_utils::assert_near(output, expected, 0.01); - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); + T* device_input; + HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + + HIP_CHECK( + hipMemcpy( + device_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_reduce_sum_kernel), + dim3(size/block_size), dim3(block_size), 0, 0, + device_input, device_output + ); + + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Read from device memory + HIP_CHECK( + hipMemcpy( + output.data(), device_output, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + test_utils::assert_near(output, expected, 0.01); + + HIP_CHECK(hipFree(device_input)); + HIP_CHECK(hipFree(device_output)); + } + } template< @@ -182,64 +189,71 @@ TYPED_TEST(RocprimWarpReduceTests, AllReduceSum) return; } - // Generate data - std::vector input = test_utils::get_random_data(size, 2, 50); // used for input - std::vector output(input.size(), 0); - - // Calculate expected results on host - std::vector expected(output.size(), 0); - binary_op_type binary_op; - for(size_t i = 0; i < output.size() / logical_warp_size; i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - T value = 0; - for(size_t j = 0; j < logical_warp_size; j++) - { - auto idx = i * logical_warp_size + j; - value = apply(binary_op, value, input[idx]); - } - for (size_t j = 0; j < logical_warp_size; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector input = test_utils::get_random_data(size, 2, 50, seed_value); // used for input + std::vector output(input.size(), 0); + + // Calculate expected results on host + std::vector expected(output.size(), 0); + binary_op_type binary_op; + for(size_t i = 0; i < output.size() / logical_warp_size; i++) { - auto idx = i * logical_warp_size + j; - expected[idx] = value; + T value = 0; + for(size_t j = 0; j < logical_warp_size; j++) + { + auto idx = i * logical_warp_size + j; + value = apply(binary_op, value, input[idx]); + } + for (size_t j = 0; j < logical_warp_size; j++) + { + auto idx = i * logical_warp_size + j; + expected[idx] = value; + } } - } - T* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_allreduce_sum_kernel), - dim3(size/block_size), dim3(block_size), 0, 0, - device_input, device_output - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - test_utils::assert_near(output, expected, 0.01); - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); + T* device_input; + HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + + HIP_CHECK( + hipMemcpy( + device_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_allreduce_sum_kernel), + dim3(size/block_size), dim3(block_size), 0, 0, + device_input, device_output + ); + + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Read from device memory + HIP_CHECK( + hipMemcpy( + output.data(), device_output, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + test_utils::assert_near(output, expected, 0.01); + + HIP_CHECK(hipFree(device_input)); + HIP_CHECK(hipFree(device_output)); + } + } template< @@ -285,60 +299,67 @@ TYPED_TEST(RocprimWarpReduceTests, ReduceSumValid) return; } - // Generate data - std::vector input = test_utils::get_random_data(size, 2, 50); // used for input - std::vector output(input.size() / logical_warp_size, 0); - - // Calculate expected results on host - std::vector expected(output.size(), 1); - binary_op_type binary_op; - for(size_t i = 0; i < output.size(); i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - T value = 0; - for(size_t j = 0; j < valid; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector input = test_utils::get_random_data(size, 2, 50, seed_value); // used for input + std::vector output(input.size() / logical_warp_size, 0); + + // Calculate expected results on host + std::vector expected(output.size(), 1); + binary_op_type binary_op; + for(size_t i = 0; i < output.size(); i++) { - auto idx = i * logical_warp_size + j; - value = apply(binary_op, value, input[idx]); + T value = 0; + for(size_t j = 0; j < valid; j++) + { + auto idx = i * logical_warp_size + j; + value = apply(binary_op, value, input[idx]); + } + expected[i] = value; } - expected[i] = value; - } - T* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_reduce_sum_kernel), - dim3(size/block_size), dim3(block_size), 0, 0, - device_input, device_output, valid - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - test_utils::assert_near(output, expected, 0.01); - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); + T* device_input; + HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + + HIP_CHECK( + hipMemcpy( + device_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_reduce_sum_kernel), + dim3(size/block_size), dim3(block_size), 0, 0, + device_input, device_output, valid + ); + + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Read from device memory + HIP_CHECK( + hipMemcpy( + output.data(), device_output, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + test_utils::assert_near(output, expected, 0.01); + + HIP_CHECK(hipFree(device_input)); + HIP_CHECK(hipFree(device_output)); + } + } template< @@ -381,64 +402,71 @@ TYPED_TEST(RocprimWarpReduceTests, AllReduceSumValid) return; } - // Generate data - std::vector input = test_utils::get_random_data(size, 2, 50); // used for input - std::vector output(input.size(), 0); - - // Calculate expected results on host - std::vector expected(output.size(), 0); - binary_op_type binary_op; - for(size_t i = 0; i < output.size() / logical_warp_size; i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - T value = 0; - for(size_t j = 0; j < valid; j++) - { - auto idx = i * logical_warp_size + j; - value = apply(binary_op, value, input[idx]); - } - for (size_t j = 0; j < logical_warp_size; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector input = test_utils::get_random_data(size, 2, 50, seed_value); // used for input + std::vector output(input.size(), 0); + + // Calculate expected results on host + std::vector expected(output.size(), 0); + binary_op_type binary_op; + for(size_t i = 0; i < output.size() / logical_warp_size; i++) { - auto idx = i * logical_warp_size + j; - expected[idx] = value; + T value = 0; + for(size_t j = 0; j < valid; j++) + { + auto idx = i * logical_warp_size + j; + value = apply(binary_op, value, input[idx]); + } + for (size_t j = 0; j < logical_warp_size; j++) + { + auto idx = i * logical_warp_size + j; + expected[idx] = value; + } } - } - T* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_allreduce_sum_kernel), - dim3(size/block_size), dim3(block_size), 0, 0, - device_input, device_output, valid - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - test_utils::assert_near(output, expected, 0.01); - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); + T* device_input; + HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + + HIP_CHECK( + hipMemcpy( + device_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_allreduce_sum_kernel), + dim3(size/block_size), dim3(block_size), 0, 0, + device_input, device_output, valid + ); + + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Read from device memory + HIP_CHECK( + hipMemcpy( + output.data(), device_output, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + test_utils::assert_near(output, expected, 0.01); + + HIP_CHECK(hipFree(device_input)); + HIP_CHECK(hipFree(device_output)); + } + } TYPED_TEST(RocprimWarpReduceTests, ReduceSumCustomStruct) @@ -460,68 +488,75 @@ TYPED_TEST(RocprimWarpReduceTests, ReduceSumCustomStruct) return; } - // Generate data - std::vector input(size); + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - auto random_values = - test_utils::get_random_data(2 * input.size(), 2, 50); - for(size_t i = 0; i < input.size(); i++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector input(size); { - input[i].x = random_values[i]; - input[i].y = random_values[i + input.size()]; + auto random_values = + test_utils::get_random_data(2 * input.size(), 2, 50, seed_value); + for(size_t i = 0; i < input.size(); i++) + { + input[i].x = random_values[i]; + input[i].y = random_values[i + input.size()]; + } } - } - std::vector output(input.size() / logical_warp_size); + std::vector output(input.size() / logical_warp_size); - // Calculate expected results on host - std::vector expected(output.size()); - for(size_t i = 0; i < output.size(); i++) - { - T value(0, 0); - for(size_t j = 0; j < logical_warp_size; j++) + // Calculate expected results on host + std::vector expected(output.size()); + for(size_t i = 0; i < output.size(); i++) { - auto idx = i * logical_warp_size + j; - value = value + input[idx]; + T value(0, 0); + for(size_t j = 0; j < logical_warp_size; j++) + { + auto idx = i * logical_warp_size + j; + value = value + input[idx]; + } + expected[i] = value; } - expected[i] = value; - } - T* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_reduce_sum_kernel), - dim3(size/block_size), dim3(block_size), 0, 0, - device_input, device_output - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - test_utils::assert_near(output, expected, 0.01); - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); + T* device_input; + HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + + HIP_CHECK( + hipMemcpy( + device_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_reduce_sum_kernel), + dim3(size/block_size), dim3(block_size), 0, 0, + device_input, device_output + ); + + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Read from device memory + HIP_CHECK( + hipMemcpy( + output.data(), device_output, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + test_utils::assert_near(output, expected, 0.01); + + HIP_CHECK(hipFree(device_input)); + HIP_CHECK(hipFree(device_output)); + } + } template< @@ -566,93 +601,100 @@ TYPED_TEST(RocprimWarpReduceTests, HeadSegmentedReduceSum) return; } - // Generate data - std::vector input = test_utils::get_random_data(size, 1, 10); // used for input - std::vector flags = test_utils::get_random_data01(size, 0.25f); - for(size_t i = 0; i < flags.size(); i+= logical_warp_size) - { - flags[i] = 1; - } - std::vector output(input.size()); - - T* device_input; - flag_type* device_flags; - T* device_output; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - HIP_CHECK(hipMalloc(&device_flags, flags.size() * sizeof(typename decltype(flags)::value_type))); - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - device_flags, flags.data(), - flags.size() * sizeof(flag_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Calculate expected results on host - std::vector expected(output.size()); - binary_op_type binary_op; - size_t segment_head_index = 0; - T reduction = input[0]; - for(size_t i = 0; i < output.size(); i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - if(i%logical_warp_size == 0 || flags[i]) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector input = test_utils::get_random_data(size, 1, 10, seed_value); // used for input + std::vector flags = test_utils::get_random_data01(size, 0.25f, seed_value); + for(size_t i = 0; i < flags.size(); i+= logical_warp_size) { - expected[segment_head_index] = reduction; - segment_head_index = i; - reduction = input[i]; + flags[i] = 1; } - else + std::vector output(input.size()); + + T* device_input; + flag_type* device_flags; + T* device_output; + HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + HIP_CHECK(hipMalloc(&device_flags, flags.size() * sizeof(typename decltype(flags)::value_type))); + HIP_CHECK( + hipMemcpy( + device_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK( + hipMemcpy( + device_flags, flags.data(), + flags.size() * sizeof(flag_type), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Calculate expected results on host + std::vector expected(output.size()); + binary_op_type binary_op; + size_t segment_head_index = 0; + T reduction = input[0]; + for(size_t i = 0; i < output.size(); i++) { - reduction = apply(binary_op, reduction, input[i]); + if(i%logical_warp_size == 0 || flags[i]) + { + expected[segment_head_index] = reduction; + segment_head_index = i; + reduction = input[i]; + } + else + { + reduction = apply(binary_op, reduction, input[i]); + } } - } - expected[segment_head_index] = reduction; - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(head_segmented_warp_reduce_kernel< - T, flag_type, block_size, logical_warp_size - >), - dim3(size/block_size), dim3(block_size), 0, 0, - device_input, device_flags, device_output - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - std::vector output_segment(output.size(), 0); - std::vector expected_segment(output.size(), 0); - for(size_t i = 0; i < output.size(); i++) - { - if(flags[i]) + expected[segment_head_index] = reduction; + + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(head_segmented_warp_reduce_kernel< + T, flag_type, block_size, logical_warp_size + >), + dim3(size/block_size), dim3(block_size), 0, 0, + device_input, device_flags, device_output + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Read from device memory + HIP_CHECK( + hipMemcpy( + output.data(), device_output, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + std::vector output_segment(output.size(), 0); + std::vector expected_segment(output.size(), 0); + for(size_t i = 0; i < output.size(); i++) { - output_segment[i] = output[i]; - expected_segment[i] = expected[i]; + if(flags[i]) + { + output_segment[i] = output[i]; + expected_segment[i] = expected[i]; + } } - } - test_utils::assert_near(output_segment, expected_segment, 0.01); + test_utils::assert_near(output_segment, expected_segment, 0.01); - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_flags)); - HIP_CHECK(hipFree(device_output)); + HIP_CHECK(hipFree(device_input)); + HIP_CHECK(hipFree(device_flags)); + HIP_CHECK(hipFree(device_output)); + } + } template< @@ -697,100 +739,107 @@ TYPED_TEST(RocprimWarpReduceTests, TailSegmentedReduceSum) return; } - // Generate data - std::vector input = test_utils::get_random_data(size, 1, 10); // used for input - std::vector flags = test_utils::get_random_data01(size, 0.25f); - for(size_t i = logical_warp_size - 1; i < flags.size(); i+= logical_warp_size) - { - flags[i] = 1; - } - std::vector output(input.size()); - - T* device_input; - flag_type* device_flags; - T* device_output; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - HIP_CHECK(hipMalloc(&device_flags, flags.size() * sizeof(typename decltype(flags)::value_type))); - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - device_flags, flags.data(), - flags.size() * sizeof(flag_type), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Calculate expected results on host - std::vector expected(output.size()); - binary_op_type binary_op; - std::vector segment_indexes; - size_t segment_index = 0; - T reduction; - for(size_t i = 0; i < output.size(); i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - // single value segments - if(flags[i]) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector input = test_utils::get_random_data(size, 1, 10, seed_value); // used for input + std::vector flags = test_utils::get_random_data01(size, 0.25f, seed_value); + for(size_t i = logical_warp_size - 1; i < flags.size(); i+= logical_warp_size) { - expected[i] = input[i]; - segment_indexes.push_back(i); + flags[i] = 1; } - else + std::vector output(input.size()); + + T* device_input; + flag_type* device_flags; + T* device_output; + HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + HIP_CHECK(hipMalloc(&device_flags, flags.size() * sizeof(typename decltype(flags)::value_type))); + HIP_CHECK( + hipMemcpy( + device_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK( + hipMemcpy( + device_flags, flags.data(), + flags.size() * sizeof(flag_type), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Calculate expected results on host + std::vector expected(output.size()); + binary_op_type binary_op; + std::vector segment_indexes; + size_t segment_index = 0; + T reduction; + for(size_t i = 0; i < output.size(); i++) { - segment_index = i; - reduction = input[i]; - auto next = i + 1; - while(next < output.size() && !flags[next]) + // single value segments + if(flags[i]) { - reduction = apply(binary_op, reduction, input[next]); + expected[i] = input[i]; + segment_indexes.push_back(i); + } + else + { + segment_index = i; + reduction = input[i]; + auto next = i + 1; + while(next < output.size() && !flags[next]) + { + reduction = apply(binary_op, reduction, input[next]); + i++; + next++; + } i++; - next++; + expected[segment_index] = apply(binary_op, reduction, input[i]); + segment_indexes.push_back(segment_index); } - i++; - expected[segment_index] = apply(binary_op, reduction, input[i]); - segment_indexes.push_back(segment_index); } - } - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(tail_segmented_warp_reduce_kernel< - T, flag_type, block_size, logical_warp_size - >), - dim3(size/block_size), dim3(block_size), 0, 0, - device_input, device_flags, device_output - ); - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - std::vector output_segment(segment_indexes.size()); - std::vector expected_segment(segment_indexes.size()); - for(size_t i = 0; i < segment_indexes.size(); i++) - { - auto index = segment_indexes[i]; - output_segment[i] = output[index]; - expected_segment[i] = expected[index]; - } - test_utils::assert_near(output_segment, expected_segment, 0.01); + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(tail_segmented_warp_reduce_kernel< + T, flag_type, block_size, logical_warp_size + >), + dim3(size/block_size), dim3(block_size), 0, 0, + device_input, device_flags, device_output + ); + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Read from device memory + HIP_CHECK( + hipMemcpy( + output.data(), device_output, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + std::vector output_segment(segment_indexes.size()); + std::vector expected_segment(segment_indexes.size()); + for(size_t i = 0; i < segment_indexes.size(); i++) + { + auto index = segment_indexes[i]; + output_segment[i] = output[index]; + expected_segment[i] = expected[index]; + } + test_utils::assert_near(output_segment, expected_segment, 0.01); - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_flags)); - HIP_CHECK(hipFree(device_output)); + HIP_CHECK(hipFree(device_input)); + HIP_CHECK(hipFree(device_flags)); + HIP_CHECK(hipFree(device_output)); + } + } diff --git a/test/rocprim/test_warp_scan.cpp b/test/rocprim/test_warp_scan.cpp index 04ea39f75..9689a9ba8 100644 --- a/test/rocprim/test_warp_scan.cpp +++ b/test/rocprim/test_warp_scan.cpp @@ -87,60 +87,67 @@ TYPED_TEST(RocprimWarpScanTests, InclusiveScan) return; } - // Generate data - std::vector input = test_utils::get_random_data(size, 2, 50); - std::vector output(size); - std::vector expected(output.size(), 0); - - // Calculate expected results on host - binary_op_type binary_op; - for(size_t i = 0; i < input.size() / logical_warp_size; i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - for(size_t j = 0; j < logical_warp_size; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector input = test_utils::get_random_data(size, 2, 50, seed_value); + std::vector output(size); + std::vector expected(output.size(), 0); + + // Calculate expected results on host + binary_op_type binary_op; + for(size_t i = 0; i < input.size() / logical_warp_size; i++) { - auto idx = i * logical_warp_size + j; - expected[idx] = apply(binary_op, input[idx], expected[j > 0 ? idx-1 : idx]); + for(size_t j = 0; j < logical_warp_size; j++) + { + auto idx = i * logical_warp_size + j; + expected[idx] = apply(binary_op, input[idx], expected[j > 0 ? idx-1 : idx]); + } } - } - // Writing to device memory - T* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_inclusive_scan_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_output - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - test_utils::assert_near(output, expected, 0.01); - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); + // Writing to device memory + T* device_input; + HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + + HIP_CHECK( + hipMemcpy( + device_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_inclusive_scan_kernel), + dim3(grid_size), dim3(block_size), 0, 0, + device_input, device_output + ); + + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Read from device memory + HIP_CHECK( + hipMemcpy( + output.data(), device_output, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + // Validating results + test_utils::assert_near(output, expected, 0.01); + + HIP_CHECK(hipFree(device_input)); + HIP_CHECK(hipFree(device_output)); + } + } template< @@ -191,80 +198,87 @@ TYPED_TEST(RocprimWarpScanTests, InclusiveScanReduce) return; } - // Generate data - std::vector input = test_utils::get_random_data(size, 2, 50); - std::vector output(size); - std::vector output_reductions(size / logical_warp_size); - std::vector expected(output.size(), 0); - std::vector expected_reductions(output_reductions.size(), 0); - - // Calculate expected results on host - binary_op_type binary_op; - for(size_t i = 0; i < output.size() / logical_warp_size; i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - for(size_t j = 0; j < logical_warp_size; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector input = test_utils::get_random_data(size, 2, 50, seed_value); + std::vector output(size); + std::vector output_reductions(size / logical_warp_size); + std::vector expected(output.size(), 0); + std::vector expected_reductions(output_reductions.size(), 0); + + // Calculate expected results on host + binary_op_type binary_op; + for(size_t i = 0; i < output.size() / logical_warp_size; i++) { - auto idx = i * logical_warp_size + j; - expected[idx] = apply(binary_op, input[idx], expected[j > 0 ? idx-1 : idx]); + for(size_t j = 0; j < logical_warp_size; j++) + { + auto idx = i * logical_warp_size + j; + expected[idx] = apply(binary_op, input[idx], expected[j > 0 ? idx-1 : idx]); + } + expected_reductions[i] = expected[(i+1) * logical_warp_size - 1]; } - expected_reductions[i] = expected[(i+1) * logical_warp_size - 1]; - } - // Writing to device memory - T* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - T* device_output_reductions; - HIP_CHECK( - hipMalloc( - &device_output_reductions, - output_reductions.size() * sizeof(typename decltype(output_reductions)::value_type) - ) - ); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_inclusive_scan_reduce_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_output, device_output_reductions - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - output_reductions.data(), device_output_reductions, - output_reductions.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - test_utils::assert_near(output, expected, 0.01); - test_utils::assert_near(output_reductions, expected_reductions, 0.01); - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_reductions)); + // Writing to device memory + T* device_input; + HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + T* device_output_reductions; + HIP_CHECK( + hipMalloc( + &device_output_reductions, + output_reductions.size() * sizeof(typename decltype(output_reductions)::value_type) + ) + ); + + HIP_CHECK( + hipMemcpy( + device_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_inclusive_scan_reduce_kernel), + dim3(grid_size), dim3(block_size), 0, 0, + device_input, device_output, device_output_reductions + ); + + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Read from device memory + HIP_CHECK( + hipMemcpy( + output.data(), device_output, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + HIP_CHECK( + hipMemcpy( + output_reductions.data(), device_output_reductions, + output_reductions.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + // Validating results + test_utils::assert_near(output, expected, 0.01); + test_utils::assert_near(output_reductions, expected_reductions, 0.01); + + HIP_CHECK(hipFree(device_input)); + HIP_CHECK(hipFree(device_output)); + HIP_CHECK(hipFree(device_output_reductions)); + } + } template< @@ -307,62 +321,69 @@ TYPED_TEST(RocprimWarpScanTests, ExclusiveScan) return; } - // Generate data - std::vector input = test_utils::get_random_data(size, 2, 50); - std::vector output(size); - std::vector expected(input.size(), 0); - const T init = test_utils::get_random_value(0, 100); - - // Calculate expected results on host - binary_op_type binary_op; - for(size_t i = 0; i < input.size() / logical_warp_size; i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - expected[i * logical_warp_size] = init; - for(size_t j = 1; j < logical_warp_size; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector input = test_utils::get_random_data(size, 2, 50, seed_value); + std::vector output(size); + std::vector expected(input.size(), 0); + const T init = test_utils::get_random_value(0, 100, seed_value); + + // Calculate expected results on host + binary_op_type binary_op; + for(size_t i = 0; i < input.size() / logical_warp_size; i++) { - auto idx = i * logical_warp_size + j; - expected[idx] = apply(binary_op, input[idx-1], expected[idx-1]); + expected[i * logical_warp_size] = init; + for(size_t j = 1; j < logical_warp_size; j++) + { + auto idx = i * logical_warp_size + j; + expected[idx] = apply(binary_op, input[idx-1], expected[idx-1]); + } } - } - // Writing to device memory - T* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_exclusive_scan_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_output, init - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - test_utils::assert_near(output, expected, 0.01); - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); + // Writing to device memory + T* device_input; + HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + + HIP_CHECK( + hipMemcpy( + device_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_exclusive_scan_kernel), + dim3(grid_size), dim3(block_size), 0, 0, + device_input, device_output, init + ); + + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Read from device memory + HIP_CHECK( + hipMemcpy( + output.data(), device_output, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + // Validating results + test_utils::assert_near(output, expected, 0.01); + + HIP_CHECK(hipFree(device_input)); + HIP_CHECK(hipFree(device_output)); + } + } template< @@ -414,88 +435,95 @@ TYPED_TEST(RocprimWarpScanTests, ExclusiveReduceScan) return; } - // Generate data - std::vector input = test_utils::get_random_data(size, 2, 50); - std::vector output(size); - std::vector output_reductions(size / logical_warp_size); - std::vector expected(input.size(), 0); - std::vector expected_reductions(output_reductions.size(), 0); - const T init = test_utils::get_random_value(0, 100); - - // Calculate expected results on host - binary_op_type binary_op; - for(size_t i = 0; i < input.size() / logical_warp_size; i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - expected[i * logical_warp_size] = init; - for(size_t j = 1; j < logical_warp_size; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector input = test_utils::get_random_data(size, 2, 50, seed_value); + std::vector output(size); + std::vector output_reductions(size / logical_warp_size); + std::vector expected(input.size(), 0); + std::vector expected_reductions(output_reductions.size(), 0); + const T init = test_utils::get_random_value(0, 100, seed_value); + + // Calculate expected results on host + binary_op_type binary_op; + for(size_t i = 0; i < input.size() / logical_warp_size; i++) { - auto idx = i * logical_warp_size + j; - expected[idx] = apply(binary_op, input[idx-1], expected[idx-1]); - } + expected[i * logical_warp_size] = init; + for(size_t j = 1; j < logical_warp_size; j++) + { + auto idx = i * logical_warp_size + j; + expected[idx] = apply(binary_op, input[idx-1], expected[idx-1]); + } - expected_reductions[i] = 0; - for(size_t j = 0; j < logical_warp_size; j++) - { - auto idx = i * logical_warp_size + j; - expected_reductions[i] = apply(binary_op, expected_reductions[i], input[idx]); + expected_reductions[i] = 0; + for(size_t j = 0; j < logical_warp_size; j++) + { + auto idx = i * logical_warp_size + j; + expected_reductions[i] = apply(binary_op, expected_reductions[i], input[idx]); + } } - } - // Writing to device memory - T* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - T* device_output_reductions; - HIP_CHECK( - hipMalloc( - &device_output_reductions, - output_reductions.size() * sizeof(typename decltype(output_reductions)::value_type) - ) - ); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_exclusive_scan_reduce_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_output, device_output_reductions, init - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - output_reductions.data(), device_output_reductions, - output_reductions.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - test_utils::assert_near(output, expected, 0.01); - test_utils::assert_near(output_reductions, expected_reductions, 0.01); - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); - HIP_CHECK(hipFree(device_output_reductions)); + // Writing to device memory + T* device_input; + HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + T* device_output_reductions; + HIP_CHECK( + hipMalloc( + &device_output_reductions, + output_reductions.size() * sizeof(typename decltype(output_reductions)::value_type) + ) + ); + + HIP_CHECK( + hipMemcpy( + device_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_exclusive_scan_reduce_kernel), + dim3(grid_size), dim3(block_size), 0, 0, + device_input, device_output, device_output_reductions, init + ); + + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Read from device memory + HIP_CHECK( + hipMemcpy( + output.data(), device_output, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + HIP_CHECK( + hipMemcpy( + output_reductions.data(), device_output_reductions, + output_reductions.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + // Validating results + test_utils::assert_near(output, expected, 0.01); + test_utils::assert_near(output_reductions, expected_reductions, 0.01); + + HIP_CHECK(hipFree(device_input)); + HIP_CHECK(hipFree(device_output)); + HIP_CHECK(hipFree(device_output_reductions)); + } + } template< @@ -544,90 +572,97 @@ TYPED_TEST(RocprimWarpScanTests, Scan) return; } - // Generate data - std::vector input = test_utils::get_random_data(size, 2, 50); - std::vector output_inclusive(size); - std::vector output_exclusive(size); - std::vector expected_inclusive(output_inclusive.size(), 0); - std::vector expected_exclusive(output_exclusive.size(), 0); - const T init = test_utils::get_random_value(0, 100); - - // Calculate expected results on host - binary_op_type binary_op; - for(size_t i = 0; i < input.size() / logical_warp_size; i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - expected_exclusive[i * logical_warp_size] = init; - for(size_t j = 0; j < logical_warp_size; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector input = test_utils::get_random_data(size, 2, 50, seed_value); + std::vector output_inclusive(size); + std::vector output_exclusive(size); + std::vector expected_inclusive(output_inclusive.size(), 0); + std::vector expected_exclusive(output_exclusive.size(), 0); + const T init = test_utils::get_random_value(0, 100, seed_value); + + // Calculate expected results on host + binary_op_type binary_op; + for(size_t i = 0; i < input.size() / logical_warp_size; i++) { - auto idx = i * logical_warp_size + j; - expected_inclusive[idx] = apply(binary_op, input[idx], expected_inclusive[j > 0 ? idx-1 : idx]); - if(j > 0) + expected_exclusive[i * logical_warp_size] = init; + for(size_t j = 0; j < logical_warp_size; j++) { - expected_exclusive[idx] = apply(binary_op, input[idx-1], expected_exclusive[idx-1]); + auto idx = i * logical_warp_size + j; + expected_inclusive[idx] = apply(binary_op, input[idx], expected_inclusive[j > 0 ? idx-1 : idx]); + if(j > 0) + { + expected_exclusive[idx] = apply(binary_op, input[idx-1], expected_exclusive[idx-1]); + } } } - } - // Writing to device memory - T* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - T* device_inclusive_output; - HIP_CHECK( - hipMalloc( - &device_inclusive_output, - output_inclusive.size() * sizeof(typename decltype(output_inclusive)::value_type) - ) - ); - T* device_exclusive_output; - HIP_CHECK( - hipMalloc( - &device_exclusive_output, - output_exclusive.size() * sizeof(typename decltype(output_exclusive)::value_type) - ) - ); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_scan_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_inclusive_output, device_exclusive_output, init - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output_inclusive.data(), device_inclusive_output, - output_inclusive.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - output_exclusive.data(), device_exclusive_output, - output_exclusive.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - test_utils::assert_near(output_inclusive, expected_inclusive, 0.01); - test_utils::assert_near(output_exclusive, expected_exclusive, 0.01); - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_inclusive_output)); - HIP_CHECK(hipFree(device_exclusive_output)); + // Writing to device memory + T* device_input; + HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); + T* device_inclusive_output; + HIP_CHECK( + hipMalloc( + &device_inclusive_output, + output_inclusive.size() * sizeof(typename decltype(output_inclusive)::value_type) + ) + ); + T* device_exclusive_output; + HIP_CHECK( + hipMalloc( + &device_exclusive_output, + output_exclusive.size() * sizeof(typename decltype(output_exclusive)::value_type) + ) + ); + + HIP_CHECK( + hipMemcpy( + device_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_scan_kernel), + dim3(grid_size), dim3(block_size), 0, 0, + device_input, device_inclusive_output, device_exclusive_output, init + ); + + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Read from device memory + HIP_CHECK( + hipMemcpy( + output_inclusive.data(), device_inclusive_output, + output_inclusive.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + HIP_CHECK( + hipMemcpy( + output_exclusive.data(), device_exclusive_output, + output_exclusive.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + // Validating results + test_utils::assert_near(output_inclusive, expected_inclusive, 0.01); + test_utils::assert_near(output_exclusive, expected_exclusive, 0.01); + + HIP_CHECK(hipFree(device_input)); + HIP_CHECK(hipFree(device_inclusive_output)); + HIP_CHECK(hipFree(device_exclusive_output)); + } + } template< @@ -681,112 +716,120 @@ TYPED_TEST(RocprimWarpScanTests, ScanReduce) return; } - // Generate data - std::vector input = test_utils::get_random_data(size, 2, 50); - std::vector output_inclusive(size); - std::vector output_exclusive(size); - std::vector output_reductions(size / logical_warp_size); - std::vector expected_inclusive(output_inclusive.size(), 0); - std::vector expected_exclusive(output_exclusive.size(), 0); - std::vector expected_reductions(output_reductions.size(), 0); - const T init = test_utils::get_random_value(0, 100); - - // Calculate expected results on host - binary_op_type binary_op; - for(size_t i = 0; i < input.size() / logical_warp_size; i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - expected_exclusive[i * logical_warp_size] = init; - for(size_t j = 0; j < logical_warp_size; j++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector input = test_utils::get_random_data(size, 2, 50, seed_value); + std::vector output_inclusive(size); + std::vector output_exclusive(size); + std::vector output_reductions(size / logical_warp_size); + std::vector expected_inclusive(output_inclusive.size(), 0); + std::vector expected_exclusive(output_exclusive.size(), 0); + std::vector expected_reductions(output_reductions.size(), 0); + const T init = test_utils::get_random_value(0, 100, seed_value); + + // Calculate expected results on host + binary_op_type binary_op; + for(size_t i = 0; i < input.size() / logical_warp_size; i++) { - auto idx = i * logical_warp_size + j; - expected_inclusive[idx] = apply(binary_op, input[idx], expected_inclusive[j > 0 ? idx-1 : idx]); - if(j > 0) + expected_exclusive[i * logical_warp_size] = init; + for(size_t j = 0; j < logical_warp_size; j++) { - expected_exclusive[idx] = apply(binary_op, input[idx-1], expected_exclusive[idx-1]); + auto idx = i * logical_warp_size + j; + expected_inclusive[idx] = apply(binary_op, input[idx], expected_inclusive[j > 0 ? idx-1 : idx]); + if(j > 0) + { + expected_exclusive[idx] = apply(binary_op, input[idx-1], expected_exclusive[idx-1]); + } } + expected_reductions[i] = expected_inclusive[(i+1) * logical_warp_size - 1]; } - expected_reductions[i] = expected_inclusive[(i+1) * logical_warp_size - 1]; - } - // Writing to device memory - T* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - T* device_inclusive_output; - HIP_CHECK( - hipMalloc( - &device_inclusive_output, - output_inclusive.size() * sizeof(typename decltype(output_inclusive)::value_type) - ) - ); - T* device_exclusive_output; - HIP_CHECK( - hipMalloc( - &device_exclusive_output, - output_exclusive.size() * sizeof(typename decltype(output_exclusive)::value_type) - ) - ); - T* device_output_reductions; - HIP_CHECK( - hipMalloc( - &device_output_reductions, - output_reductions.size() * sizeof(typename decltype(output_reductions)::value_type) - ) - ); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_scan_reduce_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, - device_inclusive_output, device_exclusive_output, device_output_reductions, init - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output_inclusive.data(), device_inclusive_output, - output_inclusive.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - output_exclusive.data(), device_exclusive_output, - output_exclusive.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - output_reductions.data(), device_output_reductions, - output_reductions.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - test_utils::assert_near(output_inclusive, expected_inclusive, 0.01); - test_utils::assert_near(output_exclusive, expected_exclusive, 0.01); - test_utils::assert_near(output_reductions, expected_reductions, 0.01); - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_inclusive_output)); - HIP_CHECK(hipFree(device_exclusive_output)); + // Writing to device memory + T* device_input; + HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); + T* device_inclusive_output; + HIP_CHECK( + hipMalloc( + &device_inclusive_output, + output_inclusive.size() * sizeof(typename decltype(output_inclusive)::value_type) + ) + ); + T* device_exclusive_output; + HIP_CHECK( + hipMalloc( + &device_exclusive_output, + output_exclusive.size() * sizeof(typename decltype(output_exclusive)::value_type) + ) + ); + T* device_output_reductions; + HIP_CHECK( + hipMalloc( + &device_output_reductions, + output_reductions.size() * sizeof(typename decltype(output_reductions)::value_type) + ) + ); + + HIP_CHECK( + hipMemcpy( + device_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_scan_reduce_kernel), + dim3(grid_size), dim3(block_size), 0, 0, + device_input, + device_inclusive_output, device_exclusive_output, device_output_reductions, init + ); + + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Read from device memory + HIP_CHECK( + hipMemcpy( + output_inclusive.data(), device_inclusive_output, + output_inclusive.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + HIP_CHECK( + hipMemcpy( + output_exclusive.data(), device_exclusive_output, + output_exclusive.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + HIP_CHECK( + hipMemcpy( + output_reductions.data(), device_output_reductions, + output_reductions.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + // Validating results + test_utils::assert_near(output_inclusive, expected_inclusive, 0.01); + test_utils::assert_near(output_exclusive, expected_exclusive, 0.01); + test_utils::assert_near(output_reductions, expected_reductions, 0.01); + + HIP_CHECK(hipFree(device_input)); + HIP_CHECK(hipFree(device_inclusive_output)); + HIP_CHECK(hipFree(device_exclusive_output)); + } + } + TYPED_TEST(RocprimWarpScanTests, InclusiveScanCustomType) { using base_type = typename TestFixture::params::type; @@ -806,68 +849,73 @@ TYPED_TEST(RocprimWarpScanTests, InclusiveScanCustomType) return; } - // Generate data - std::vector input(size); - std::vector output(size); - std::vector expected(output.size(), T(0)); - - // Initializing input data + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - auto random_values = - test_utils::get_random_data(2 * input.size(), 0, 100); - for(size_t i = 0; i < input.size(); i++) + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector input(size); + std::vector output(size); + std::vector expected(output.size(), T(0)); + // Initializing input data { - input[i].x = random_values[i]; - input[i].y = random_values[i + input.size()]; + auto random_values = + test_utils::get_random_data(2 * input.size(), 0, 100, seed_value); + for(size_t i = 0; i < input.size(); i++) + { + input[i].x = random_values[i]; + input[i].y = random_values[i + input.size()]; + } } - } - // Calculate expected results on host - for(size_t i = 0; i < input.size() / logical_warp_size; i++) - { - for(size_t j = 0; j < logical_warp_size; j++) + // Calculate expected results on host + for(size_t i = 0; i < input.size() / logical_warp_size; i++) { - auto idx = i * logical_warp_size + j; - expected[idx] = input[idx] + expected[j > 0 ? idx-1 : idx]; + for(size_t j = 0; j < logical_warp_size; j++) + { + auto idx = i * logical_warp_size + j; + expected[idx] = input[idx] + expected[j > 0 ? idx-1 : idx]; + } } - } - // Writing to device memory - T* device_input; - HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); - T* device_output; - HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); - - HIP_CHECK( - hipMemcpy( - device_input, input.data(), - input.size() * sizeof(T), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(warp_inclusive_scan_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - device_input, device_output - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), device_output, - output.size() * sizeof(T), - hipMemcpyDeviceToHost - ) - ); - - // Validating results - test_utils::assert_near(output, expected, 0.01); - - HIP_CHECK(hipFree(device_input)); - HIP_CHECK(hipFree(device_output)); + // Writing to device memory + T* device_input; + HIP_CHECK(hipMalloc(&device_input, input.size() * sizeof(typename decltype(input)::value_type))); + T* device_output; + HIP_CHECK(hipMalloc(&device_output, output.size() * sizeof(typename decltype(output)::value_type))); + + HIP_CHECK( + hipMemcpy( + device_input, input.data(), + input.size() * sizeof(T), + hipMemcpyHostToDevice + ) + ); + + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(warp_inclusive_scan_kernel), + dim3(grid_size), dim3(block_size), 0, 0, + device_input, device_output + ); + + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Read from device memory + HIP_CHECK( + hipMemcpy( + output.data(), device_output, + output.size() * sizeof(T), + hipMemcpyDeviceToHost + ) + ); + + // Validating results + test_utils::assert_near(output, expected, 0.01); + + HIP_CHECK(hipFree(device_input)); + HIP_CHECK(hipFree(device_output)); + } } diff --git a/test/rocprim/test_warp_sort.cpp b/test/rocprim/test_warp_sort.cpp index de455f454..02f45e14f 100644 --- a/test/rocprim/test_warp_sort.cpp +++ b/test/rocprim/test_warp_sort.cpp @@ -92,51 +92,58 @@ TYPED_TEST(RocprimWarpSortShuffleBasedTests, Sort) return; } - // Generate data - std::vector output = test_utils::get_random_data(size, 0, 100); - - // Calculate expected results on host - std::vector expected(output); - binary_op_type binary_op; - for(size_t i = 0; i < output.size() / logical_warp_size; i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - std::sort(expected.begin() + (i * logical_warp_size), expected.begin() + ((i + 1) * logical_warp_size), binary_op); - } + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector output = test_utils::get_random_data(size, 0, 100, seed_value); + + // Calculate expected results on host + std::vector expected(output); + binary_op_type binary_op; + for(size_t i = 0; i < output.size() / logical_warp_size; i++) + { + std::sort(expected.begin() + (i * logical_warp_size), expected.begin() + ((i + 1) * logical_warp_size), binary_op); + } + + // Writing to device memory + T* d_output; + HIP_CHECK( + hipMalloc(&d_output, output.size() * sizeof(typename decltype(output)::value_type)) + ); + + HIP_CHECK( + hipMemcpy( + d_output, output.data(), + output.size() * sizeof(typename decltype(output)::value_type), + hipMemcpyHostToDevice + ) + ); + + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(test_hip_warp_sort), + dim3(grid_size), dim3(block_size), 0, 0, + d_output + ); + + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); - // Writing to device memory - T* d_output; - HIP_CHECK( - hipMalloc(&d_output, output.size() * sizeof(typename decltype(output)::value_type)) - ); - - HIP_CHECK( - hipMemcpy( - d_output, output.data(), - output.size() * sizeof(typename decltype(output)::value_type), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(test_hip_warp_sort), - dim3(grid_size), dim3(block_size), 0, 0, - d_output - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(typename decltype(output)::value_type), - hipMemcpyDeviceToHost - ) - ); - - test_utils::assert_near(output, expected, 0.01); + // Read from device memory + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(typename decltype(output)::value_type), + hipMemcpyDeviceToHost + ) + ); + + test_utils::assert_near(output, expected, 0.01); + } + } template< @@ -174,101 +181,108 @@ TYPED_TEST(RocprimWarpSortShuffleBasedTests, SortKeyInt) return; } - // Generate data - std::vector output_key = test_utils::get_random_data(size, 0, 100); - std::vector output_value = test_utils::get_random_data(size, 0, 100); - - // Combine vectors to form pairs with key and value - std::vector target(size); - for(unsigned i = 0; i < target.size(); i++) + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) { - target[i].x = output_key[i]; - target[i].y = output_value[i]; - } + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector output_key = test_utils::get_random_data(size, 0, 100, seed_value); + std::vector output_value = test_utils::get_random_data(size, 0, 100, seed_value); + + // Combine vectors to form pairs with key and value + std::vector target(size); + for(unsigned i = 0; i < target.size(); i++) + { + target[i].x = output_key[i]; + target[i].y = output_value[i]; + } + + // Calculate expected results on host + std::vector expected(target); + for(size_t i = 0; i < expected.size() / logical_warp_size; i++) + { + std::sort(expected.begin() + (i * logical_warp_size), + expected.begin() + ((i + 1) * logical_warp_size) + ); + } + + // Writing to device memory + T* d_output_key; + T* d_output_value; + HIP_CHECK( + hipMalloc(&d_output_key, output_key.size() * sizeof(typename decltype(output_key)::value_type)) + ); + HIP_CHECK( + hipMalloc(&d_output_value, output_value.size() * sizeof(typename decltype(output_value)::value_type)) + ); - // Calculate expected results on host - std::vector expected(target); - for(size_t i = 0; i < expected.size() / logical_warp_size; i++) - { - std::sort(expected.begin() + (i * logical_warp_size), - expected.begin() + ((i + 1) * logical_warp_size) + HIP_CHECK( + hipMemcpy( + d_output_key, output_key.data(), + output_key.size() * sizeof(typename decltype(output_key)::value_type), + hipMemcpyHostToDevice + ) ); - } - // Writing to device memory - T* d_output_key; - T* d_output_value; - HIP_CHECK( - hipMalloc(&d_output_key, output_key.size() * sizeof(typename decltype(output_key)::value_type)) - ); - HIP_CHECK( - hipMalloc(&d_output_value, output_value.size() * sizeof(typename decltype(output_value)::value_type)) - ); - - HIP_CHECK( - hipMemcpy( - d_output_key, output_key.data(), - output_key.size() * sizeof(typename decltype(output_key)::value_type), - hipMemcpyHostToDevice - ) - ); - - HIP_CHECK( - hipMemcpy( - d_output_value, output_value.data(), - output_value.size() * sizeof(typename decltype(output_value)::value_type), - hipMemcpyHostToDevice - ) - ); - - // Launching kernel - hipLaunchKernelGGL( - HIP_KERNEL_NAME(test_hip_sort_key_value_kernel), - dim3(grid_size), dim3(block_size), 0, 0, - d_output_key, d_output_value - ); - - HIP_CHECK(hipPeekAtLastError()); - HIP_CHECK(hipDeviceSynchronize()); - - // Read from device memory - HIP_CHECK( - hipMemcpy( - output_key.data(), d_output_key, - output_key.size() * sizeof(typename decltype(output_key)::value_type), - hipMemcpyDeviceToHost - ) - ); - - HIP_CHECK( - hipMemcpy( - output_value.data(), d_output_value, - output_value.size() * sizeof(typename decltype(output_value)::value_type), - hipMemcpyDeviceToHost - ) - ); - - std::vector expected_key(expected.size()); - std::vector expected_value(expected.size()); - for(size_t i = 0; i < expected.size(); i++) - { - expected_key[i] = expected[i].x; - expected_value[i] = expected[i].y; - } + HIP_CHECK( + hipMemcpy( + d_output_value, output_value.data(), + output_value.size() * sizeof(typename decltype(output_value)::value_type), + hipMemcpyHostToDevice + ) + ); - // Keys are sorted, Values order not guaranteed - // Sort subsets where key was the same to make sure all values are still present - value_op_type value_op; - eq_op_type eq_op; - for (size_t i = 0; i < output_key.size();) - { - auto j = i; - for (; j < output_key.size() && eq_op(output_key[j], output_key[i]); ++j) { } - std::sort(output_value.begin() + i, output_value.begin() + j, value_op); - std::sort(expected_value.begin() + i, expected_value.begin() + j, value_op); - i = j; - } + // Launching kernel + hipLaunchKernelGGL( + HIP_KERNEL_NAME(test_hip_sort_key_value_kernel), + dim3(grid_size), dim3(block_size), 0, 0, + d_output_key, d_output_value + ); + + HIP_CHECK(hipPeekAtLastError()); + HIP_CHECK(hipDeviceSynchronize()); + + // Read from device memory + HIP_CHECK( + hipMemcpy( + output_key.data(), d_output_key, + output_key.size() * sizeof(typename decltype(output_key)::value_type), + hipMemcpyDeviceToHost + ) + ); - test_utils::assert_near(output_key, expected_key, 0.01); - test_utils::assert_near(output_value, expected_value, 0.01); + HIP_CHECK( + hipMemcpy( + output_value.data(), d_output_value, + output_value.size() * sizeof(typename decltype(output_value)::value_type), + hipMemcpyDeviceToHost + ) + ); + + std::vector expected_key(expected.size()); + std::vector expected_value(expected.size()); + for(size_t i = 0; i < expected.size(); i++) + { + expected_key[i] = expected[i].x; + expected_value[i] = expected[i].y; + } + + // Keys are sorted, Values order not guaranteed + // Sort subsets where key was the same to make sure all values are still present + value_op_type value_op; + eq_op_type eq_op; + for (size_t i = 0; i < output_key.size();) + { + auto j = i; + for (; j < output_key.size() && eq_op(output_key[j], output_key[i]); ++j) { } + std::sort(output_value.begin() + i, output_value.begin() + j, value_op); + std::sort(expected_value.begin() + i, expected_value.begin() + j, value_op); + i = j; + } + + test_utils::assert_near(output_key, expected_key, 0.01); + test_utils::assert_near(output_value, expected_value, 0.01); + } + } diff --git a/test/rocprim/test_zip_iterator.cpp b/test/rocprim/test_zip_iterator.cpp index 79fca0416..51fb87c2d 100644 --- a/test/rocprim/test_zip_iterator.cpp +++ b/test/rocprim/test_zip_iterator.cpp @@ -159,95 +159,102 @@ TEST(RocprimZipIteratorTests, Transform) // using default stream hipStream_t stream = 0; - // Generate data - std::vector input1 = test_utils::get_random_data(size, 1, 100); - std::vector input2 = test_utils::get_random_data(size, 1, 100); - std::vector input3 = test_utils::get_random_data(size, 1, 100); - std::vector output(input1.size()); - - T1 * d_input1; - T2 * d_input2; - T3 * d_input3; - U * d_output; - HIP_CHECK(hipMalloc(&d_input1, input1.size() * sizeof(T1))); - HIP_CHECK(hipMalloc(&d_input2, input2.size() * sizeof(T2))); - HIP_CHECK(hipMalloc(&d_input3, input3.size() * sizeof(T3))); - HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(U))); - HIP_CHECK( - hipMemcpy( - d_input1, input1.data(), - input1.size() * sizeof(T1), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - d_input2, input2.data(), - input2.size() * sizeof(T2), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - d_input3, input3.data(), - input3.size() * sizeof(T3), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Calculate expected results on host - std::vector expected(input1.size()); - std::transform( - rocprim::make_zip_iterator( - rocprim::make_tuple(input1.begin(), input2.begin(), input3.begin()) - ), - rocprim::make_zip_iterator( - rocprim::make_tuple(input1.end(), input2.end(), input3.end()) - ), - expected.begin(), - tuple3_transform_op() - ); - - // Run - HIP_CHECK( - rocprim::transform( + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + { + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector input1 = test_utils::get_random_data(size, 1, 100, seed_value); + std::vector input2 = test_utils::get_random_data(size, 1, 100, seed_value); + std::vector input3 = test_utils::get_random_data(size, 1, 100, seed_value); + std::vector output(input1.size()); + + T1 * d_input1; + T2 * d_input2; + T3 * d_input3; + U * d_output; + HIP_CHECK(hipMalloc(&d_input1, input1.size() * sizeof(T1))); + HIP_CHECK(hipMalloc(&d_input2, input2.size() * sizeof(T2))); + HIP_CHECK(hipMalloc(&d_input3, input3.size() * sizeof(T3))); + HIP_CHECK(hipMalloc(&d_output, output.size() * sizeof(U))); + HIP_CHECK( + hipMemcpy( + d_input1, input1.data(), + input1.size() * sizeof(T1), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK( + hipMemcpy( + d_input2, input2.data(), + input2.size() * sizeof(T2), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK( + hipMemcpy( + d_input3, input3.data(), + input3.size() * sizeof(T3), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Calculate expected results on host + std::vector expected(input1.size()); + std::transform( rocprim::make_zip_iterator( - rocprim::make_tuple( - d_input1, d_input2, d_input3 - ) + rocprim::make_tuple(input1.begin(), input2.begin(), input3.begin()) ), - d_output, - input1.size(), - tuple3_transform_op(), - stream, - debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Copy output to host - HIP_CHECK( - hipMemcpy( - output.data(), d_output, - output.size() * sizeof(U), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if output values are as expected - for(size_t i = 0; i < output.size(); i++) - { - auto diff = std::max(std::abs(0.01f * expected[i]), U(0.01f)); - if(std::is_integral::value) diff = 0; - ASSERT_NEAR(output[i], expected[i], diff) << "where index = " << i; - } + rocprim::make_zip_iterator( + rocprim::make_tuple(input1.end(), input2.end(), input3.end()) + ), + expected.begin(), + tuple3_transform_op() + ); - hipFree(d_input1); - hipFree(d_input2); - hipFree(d_input3); - hipFree(d_output); + // Run + HIP_CHECK( + rocprim::transform( + rocprim::make_zip_iterator( + rocprim::make_tuple( + d_input1, d_input2, d_input3 + ) + ), + d_output, + input1.size(), + tuple3_transform_op(), + stream, + debug_synchronous + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Copy output to host + HIP_CHECK( + hipMemcpy( + output.data(), d_output, + output.size() * sizeof(U), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Check if output values are as expected + for(size_t i = 0; i < output.size(); i++) + { + auto diff = std::max(std::abs(0.01f * expected[i]), U(0.01f)); + if(std::is_integral::value) diff = 0; + ASSERT_NEAR(output[i], expected[i], diff) << "where index = " << i; + } + + hipFree(d_input1); + hipFree(d_input2); + hipFree(d_input3); + hipFree(d_output); + } + } template @@ -289,138 +296,145 @@ TEST(RocprimZipIteratorTests, TransformReduce) // using default stream hipStream_t stream = 0; - // Generate data - std::vector input1 = test_utils::get_random_data(size, 1, 100); - std::vector input2 = test_utils::get_random_data(size, 1, 50); - std::vector input3 = test_utils::get_random_data(size, 1, 10); - std::vector output1(1, 0); - std::vector output2(1, 0); - - T1* d_input1; - T2* d_input2; - T3* d_input3; - U1* d_output1; - U2* d_output2; - HIP_CHECK(hipMalloc(&d_input1, input1.size() * sizeof(T1))); - HIP_CHECK(hipMalloc(&d_input2, input2.size() * sizeof(T2))); - HIP_CHECK(hipMalloc(&d_input3, input3.size() * sizeof(T3))); - HIP_CHECK(hipMalloc(&d_output1, output1.size() * sizeof(U1))); - HIP_CHECK(hipMalloc(&d_output2, output2.size() * sizeof(U2))); - - // Copy input data to device - HIP_CHECK( - hipMemcpy( - d_input1, input1.data(), - input1.size() * sizeof(T1), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - d_input2, input2.data(), - input2.size() * sizeof(T2), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK( - hipMemcpy( - d_input3, input3.data(), - input3.size() * sizeof(T3), - hipMemcpyHostToDevice - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Calculate expected results on host - U1 expected1 = std::accumulate(input1.begin(), input1.end(), T1(0)); - U2 expected2 = std::accumulate(input2.begin(), input2.end(), T2(0)) - + std::accumulate(input3.begin(), input3.end(), T2(0)); - - // temp storage - size_t temp_storage_size_bytes; - // Get size of d_temp_storage - HIP_CHECK( - rocprim::reduce( - nullptr, - temp_storage_size_bytes, - rocprim::make_transform_iterator( + for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + { + unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); + + // Generate data + std::vector input1 = test_utils::get_random_data(size, 1, 100, seed_value); + std::vector input2 = test_utils::get_random_data(size, 1, 50, seed_value); + std::vector input3 = test_utils::get_random_data(size, 1, 10, seed_value); + std::vector output1(1, 0); + std::vector output2(1, 0); + + T1* d_input1; + T2* d_input2; + T3* d_input3; + U1* d_output1; + U2* d_output2; + HIP_CHECK(hipMalloc(&d_input1, input1.size() * sizeof(T1))); + HIP_CHECK(hipMalloc(&d_input2, input2.size() * sizeof(T2))); + HIP_CHECK(hipMalloc(&d_input3, input3.size() * sizeof(T3))); + HIP_CHECK(hipMalloc(&d_output1, output1.size() * sizeof(U1))); + HIP_CHECK(hipMalloc(&d_output2, output2.size() * sizeof(U2))); + + // Copy input data to device + HIP_CHECK( + hipMemcpy( + d_input1, input1.data(), + input1.size() * sizeof(T1), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK( + hipMemcpy( + d_input2, input2.data(), + input2.size() * sizeof(T2), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK( + hipMemcpy( + d_input3, input3.data(), + input3.size() * sizeof(T3), + hipMemcpyHostToDevice + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Calculate expected results on host + U1 expected1 = std::accumulate(input1.begin(), input1.end(), T1(0)); + U2 expected2 = std::accumulate(input2.begin(), input2.end(), T2(0)) + + std::accumulate(input3.begin(), input3.end(), T2(0)); + + // temp storage + size_t temp_storage_size_bytes; + // Get size of d_temp_storage + HIP_CHECK( + rocprim::reduce( + nullptr, + temp_storage_size_bytes, + rocprim::make_transform_iterator( + rocprim::make_zip_iterator( + rocprim::make_tuple(d_input1, d_input2, d_input3) + ), + tuple3to2_transform_op() + ), rocprim::make_zip_iterator( - rocprim::make_tuple(d_input1, d_input2, d_input3) + rocprim::make_tuple(d_output1, d_output2) + ), + input1.size(), + tuple2_reduce_op(), + stream, + debug_synchronous + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // temp_storage_size_bytes must be >0 + ASSERT_GT(temp_storage_size_bytes, 0); + + // allocate temporary storage + void * d_temp_storage = nullptr; + HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); + HIP_CHECK(hipDeviceSynchronize()); + ASSERT_NE(d_temp_storage, nullptr); + + // Run + HIP_CHECK( + rocprim::reduce( + d_temp_storage, + temp_storage_size_bytes, + rocprim::make_transform_iterator( + rocprim::make_zip_iterator( + rocprim::make_tuple(d_input1, d_input2, d_input3) + ), + tuple3to2_transform_op() ), - tuple3to2_transform_op() - ), - rocprim::make_zip_iterator( - rocprim::make_tuple(d_output1, d_output2) - ), - input1.size(), - tuple2_reduce_op(), - stream, - debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // temp_storage_size_bytes must be >0 - ASSERT_GT(temp_storage_size_bytes, 0); - - // allocate temporary storage - void * d_temp_storage = nullptr; - HIP_CHECK(hipMalloc(&d_temp_storage, temp_storage_size_bytes)); - HIP_CHECK(hipDeviceSynchronize()); - ASSERT_NE(d_temp_storage, nullptr); - - // Run - HIP_CHECK( - rocprim::reduce( - d_temp_storage, - temp_storage_size_bytes, - rocprim::make_transform_iterator( rocprim::make_zip_iterator( - rocprim::make_tuple(d_input1, d_input2, d_input3) + rocprim::make_tuple(d_output1, d_output2) ), - tuple3to2_transform_op() - ), - rocprim::make_zip_iterator( - rocprim::make_tuple(d_output1, d_output2) - ), - input1.size(), - tuple2_reduce_op(), - stream, - debug_synchronous - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Copy output to host - HIP_CHECK( - hipMemcpy( - output1.data(), d_output1, - output1.size() * sizeof(U1), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK( - hipMemcpy( - output2.data(), d_output2, - output2.size() * sizeof(U2), - hipMemcpyDeviceToHost - ) - ); - HIP_CHECK(hipDeviceSynchronize()); - - // Check if output values are as expected - auto diff1 = std::max(std::abs(0.01f * expected1), U1(0.01f)); - if(std::is_integral::value) diff1 = 0; - ASSERT_NEAR(output1[0], expected1, diff1); - - auto diff2 = std::max(std::abs(0.01f * expected2), U2(0.01f)); - if(std::is_integral::value) diff2 = 0; - ASSERT_NEAR(output2[0], expected2, diff2); - - hipFree(d_input1); - hipFree(d_input2); - hipFree(d_input3); - hipFree(d_output1); - hipFree(d_output2); - hipFree(d_temp_storage); + input1.size(), + tuple2_reduce_op(), + stream, + debug_synchronous + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Copy output to host + HIP_CHECK( + hipMemcpy( + output1.data(), d_output1, + output1.size() * sizeof(U1), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK( + hipMemcpy( + output2.data(), d_output2, + output2.size() * sizeof(U2), + hipMemcpyDeviceToHost + ) + ); + HIP_CHECK(hipDeviceSynchronize()); + + // Check if output values are as expected + auto diff1 = std::max(std::abs(0.01f * expected1), U1(0.01f)); + if(std::is_integral::value) diff1 = 0; + ASSERT_NEAR(output1[0], expected1, diff1); + + auto diff2 = std::max(std::abs(0.01f * expected2), U2(0.01f)); + if(std::is_integral::value) diff2 = 0; + ASSERT_NEAR(output2[0], expected2, diff2); + + hipFree(d_input1); + hipFree(d_input2); + hipFree(d_input3); + hipFree(d_output1); + hipFree(d_output2); + hipFree(d_temp_storage); + } + } From e56e20c8302972a94c2111b7355f3ead00525d3a Mon Sep 17 00:00:00 2001 From: Andres Arpi Date: Tue, 28 Jan 2020 20:05:50 +0100 Subject: [PATCH 17/25] Change the seed_value selection mechanism. --- README.md | 24 ++++++++++ test/rocprim/test_arg_index_iterator.cpp | 8 ++-- test/rocprim/test_block_discontinuity.cpp | 12 ++--- test/rocprim/test_block_histogram.cpp | 4 +- test/rocprim/test_block_load_store.cpp | 12 ++--- test/rocprim/test_block_radix_sort.cpp | 8 ++-- test/rocprim/test_block_reduce.cpp | 16 +++---- test/rocprim/test_block_scan.cpp | 48 +++++++++---------- test/rocprim/test_block_sort.cpp | 12 ++--- test/rocprim/test_constant_iterator.cpp | 4 +- test/rocprim/test_counting_iterator.cpp | 4 +- test/rocprim/test_device_binary_search.cpp | 12 ++--- test/rocprim/test_device_histogram.cpp | 16 +++---- test/rocprim/test_device_merge.cpp | 8 ++-- test/rocprim/test_device_merge_sort.cpp | 8 ++-- test/rocprim/test_device_partition.cpp | 8 ++-- test/rocprim/test_device_radix_sort.cpp | 16 +++---- test/rocprim/test_device_reduce.cpp | 12 ++--- test/rocprim/test_device_reduce_by_key.cpp | 4 +- .../rocprim/test_device_run_length_encode.cpp | 8 ++-- test/rocprim/test_device_scan.cpp | 16 +++---- .../test_device_segmented_radix_sort.cpp | 16 +++---- test/rocprim/test_device_segmented_reduce.cpp | 4 +- test/rocprim/test_device_segmented_scan.cpp | 16 +++---- test/rocprim/test_device_select.cpp | 12 ++--- test/rocprim/test_device_transform.cpp | 8 ++-- test/rocprim/test_discard_iterator.cpp | 8 ++-- test/rocprim/test_intrinsics.cpp | 20 ++++---- test/rocprim/test_seed.hpp | 2 +- test/rocprim/test_texture_cache_iterator.cpp | 4 +- test/rocprim/test_transform_iterator.cpp | 4 +- test/rocprim/test_warp_reduce.cpp | 28 +++++------ test/rocprim/test_warp_scan.cpp | 28 +++++------ test/rocprim/test_warp_sort.cpp | 8 ++-- test/rocprim/test_zip_iterator.cpp | 8 ++-- 35 files changed, 225 insertions(+), 201 deletions(-) diff --git a/README.md b/README.md index ccc3a3805..914208b82 100644 --- a/README.md +++ b/README.md @@ -92,6 +92,30 @@ ctest ./test/rocprim/ ``` +## Using custom seeds for the tests + +Go to the `rocPRIM/test/rocprim/test_seed.hpp` file. +```cpp +//(1) +static constexpr int random_seeds_count = 10; + +//(2) +static constexpr unsigned int seeds [] = {0, 2, 10, 1000}; + +//(3) +static constexpr size_t seed_size = sizeof(seeds) / sizeof(seeds[0]); +``` + +(1) defines a constant that sets how many passes over the tests will be done with runtime-generated seeds. Modify at will. + +(2) defines the user generated seeds. Each of the elements of the array will be used as seed for all tests. Modify at will. If no static seeds are desired, the array should be left empty. + +```cpp +static constexpr unsigned int seeds [] = {}; +``` + +(3) this line should never be modified. + ## Running Benchmarks ```shell diff --git a/test/rocprim/test_arg_index_iterator.cpp b/test/rocprim/test_arg_index_iterator.cpp index aec08929c..770258698 100644 --- a/test/rocprim/test_arg_index_iterator.cpp +++ b/test/rocprim/test_arg_index_iterator.cpp @@ -66,9 +66,9 @@ TYPED_TEST(RocprimArgIndexIteratorTests, Equal) using T = typename TestFixture::input_type; using Iterator = typename rocprim::arg_index_iterator; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); std::vector input = test_utils::get_random_data(5, 1, 200, seed_value); @@ -118,9 +118,9 @@ TYPED_TEST(RocprimArgIndexIteratorTests, ReduceArgMinimum) hipStream_t stream = 0; // default - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data diff --git a/test/rocprim/test_block_discontinuity.cpp b/test/rocprim/test_block_discontinuity.cpp index 36ad8f8c3..5682347b2 100644 --- a/test/rocprim/test_block_discontinuity.cpp +++ b/test/rocprim/test_block_discontinuity.cpp @@ -254,9 +254,9 @@ auto test_block_discontinuity() return; } - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -366,9 +366,9 @@ auto test_block_discontinuity() return; } - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -478,9 +478,9 @@ auto test_block_discontinuity() return; } - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data diff --git a/test/rocprim/test_block_histogram.cpp b/test/rocprim/test_block_histogram.cpp index d30a68d51..13cdccea1 100644 --- a/test/rocprim/test_block_histogram.cpp +++ b/test/rocprim/test_block_histogram.cpp @@ -138,9 +138,9 @@ void test_block_histogram_input_arrays() const size_t bin_sizes = bin * 37; const size_t grid_size = size / items_per_block; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data diff --git a/test/rocprim/test_block_load_store.cpp b/test/rocprim/test_block_load_store.cpp index 5eb7fc19d..fb453a480 100644 --- a/test/rocprim/test_block_load_store.cpp +++ b/test/rocprim/test_block_load_store.cpp @@ -280,9 +280,9 @@ TYPED_TEST(RocprimBlockLoadStoreClassTests, LoadStoreClass) return; } - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -380,9 +380,9 @@ TYPED_TEST(RocprimBlockLoadStoreClassTests, LoadStoreClassValid) const size_t valid = items_per_block - 32; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -493,9 +493,9 @@ TYPED_TEST(RocprimBlockLoadStoreClassTests, LoadStoreClassDefault) const size_t valid = items_per_thread + 1; int _default = -1; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data diff --git a/test/rocprim/test_block_radix_sort.cpp b/test/rocprim/test_block_radix_sort.cpp index 7f69420fb..b61738daa 100644 --- a/test/rocprim/test_block_radix_sort.cpp +++ b/test/rocprim/test_block_radix_sort.cpp @@ -228,9 +228,9 @@ auto test_block_radix_sort() const size_t size = items_per_block * 19; const size_t grid_size = size / items_per_block; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -329,9 +329,9 @@ auto test_block_radix_sort() const size_t size = items_per_block * 19; const size_t grid_size = size / items_per_block; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data diff --git a/test/rocprim/test_block_reduce.cpp b/test/rocprim/test_block_reduce.cpp index cde4cc03e..d0fe25e62 100644 --- a/test/rocprim/test_block_reduce.cpp +++ b/test/rocprim/test_block_reduce.cpp @@ -134,9 +134,9 @@ TYPED_TEST(RocprimBlockReduceSingleValueTests, Reduce) const size_t size = block_size * 58; const size_t grid_size = size / block_size; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -193,9 +193,9 @@ TYPED_TEST(RocprimBlockReduceSingleValueTests, ReduceMultiplies) const size_t size = block_size * 58; const size_t grid_size = size / block_size; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -312,9 +312,9 @@ TYPED_TEST(RocprimBlockReduceSingleValueTests, ReduceValid) using binary_op_type = typename std::conditional::value, test_utils::half_plus, rp::plus>::type; constexpr size_t block_size = TestFixture::block_size; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const unsigned int valid_items = test_utils::get_random_value(block_size - 10, block_size, seed_value); @@ -429,9 +429,9 @@ void test_block_reduce_input_arrays() const size_t size = items_per_block * 19; const size_t grid_size = size / items_per_block; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data diff --git a/test/rocprim/test_block_scan.cpp b/test/rocprim/test_block_scan.cpp index 622d2b48b..e1aa41bd4 100644 --- a/test/rocprim/test_block_scan.cpp +++ b/test/rocprim/test_block_scan.cpp @@ -271,9 +271,9 @@ TYPED_TEST(RocprimBlockScanSingleValueTests, InclusiveScan) const size_t size = block_size * 58; const size_t grid_size = size / block_size; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -326,9 +326,9 @@ TYPED_TEST(RocprimBlockScanSingleValueTests, InclusiveScanReduce) const size_t size = block_size * 58; const size_t grid_size = size / block_size; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -392,9 +392,9 @@ TYPED_TEST(RocprimBlockScanSingleValueTests, InclusiveScanPrefixCallback) const size_t size = block_size * 58; const size_t grid_size = size / block_size; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -460,9 +460,9 @@ TYPED_TEST(RocprimBlockScanSingleValueTests, ExclusiveScan) const size_t size = block_size * 58; const size_t grid_size = size / block_size; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -516,9 +516,9 @@ TYPED_TEST(RocprimBlockScanSingleValueTests, ExclusiveScanReduce) const size_t size = block_size * 58; const size_t grid_size = size / block_size; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -592,9 +592,9 @@ TYPED_TEST(RocprimBlockScanSingleValueTests, ExclusiveScanPrefixCallback) const size_t size = block_size * 58; const size_t grid_size = size / block_size; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -905,9 +905,9 @@ auto test_block_scan_input_arrays() const size_t size = items_per_block * 19; const size_t grid_size = size / items_per_block; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -988,9 +988,9 @@ auto test_block_scan_input_arrays() const size_t size = items_per_block * 19; const size_t grid_size = size / items_per_block; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -1102,9 +1102,9 @@ auto test_block_scan_input_arrays() const size_t size = items_per_block * 19; const size_t grid_size = size / items_per_block; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -1218,9 +1218,9 @@ auto test_block_scan_input_arrays() const size_t size = items_per_block * 19; const size_t grid_size = size / items_per_block; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -1304,9 +1304,9 @@ auto test_block_scan_input_arrays() const size_t size = items_per_block * 19; const size_t grid_size = size / items_per_block; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -1415,9 +1415,9 @@ auto test_block_scan_input_arrays() const size_t size = items_per_block * 19; const size_t grid_size = size / items_per_block; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data diff --git a/test/rocprim/test_block_sort.cpp b/test/rocprim/test_block_sort.cpp index f7f54003a..5c366b26e 100644 --- a/test/rocprim/test_block_sort.cpp +++ b/test/rocprim/test_block_sort.cpp @@ -72,9 +72,9 @@ TYPED_TEST(RocprimBlockSortTests, SortKey) const size_t size = block_size * 1134; const size_t grid_size = size / block_size; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -167,9 +167,9 @@ TYPED_TEST(RocprimBlockSortTests, SortKeyValue) const size_t size = block_size * 1134; const size_t grid_size = size / block_size; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -303,9 +303,9 @@ TYPED_TEST(RocprimBlockSortTests, CustomSortKeyValue) const size_t size = block_size * 1134; const size_t grid_size = size / block_size; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data diff --git a/test/rocprim/test_constant_iterator.cpp b/test/rocprim/test_constant_iterator.cpp index 21e4ad732..58b532a4a 100644 --- a/test/rocprim/test_constant_iterator.cpp +++ b/test/rocprim/test_constant_iterator.cpp @@ -83,9 +83,9 @@ TYPED_TEST(RocprimConstantIteratorTests, Transform) hipStream_t stream = 0; // default - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Create constant_iterator with random starting point diff --git a/test/rocprim/test_counting_iterator.cpp b/test/rocprim/test_counting_iterator.cpp index cd108ef79..3ad6f92b8 100644 --- a/test/rocprim/test_counting_iterator.cpp +++ b/test/rocprim/test_counting_iterator.cpp @@ -83,9 +83,9 @@ TYPED_TEST(RocprimCountingIteratorTests, Transform) hipStream_t stream = 0; // default - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Create counting_iterator with random starting point diff --git a/test/rocprim/test_device_binary_search.cpp b/test/rocprim/test_device_binary_search.cpp index 763a8b407..8ce308a95 100644 --- a/test/rocprim/test_device_binary_search.cpp +++ b/test/rocprim/test_device_binary_search.cpp @@ -97,9 +97,9 @@ TYPED_TEST(RocprimDeviceBinarySearch, LowerBound) compare_op_type compare_op; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); for(size_t size : get_sizes(seed_value)) @@ -211,9 +211,9 @@ TYPED_TEST(RocprimDeviceBinarySearch, UpperBound) compare_op_type compare_op; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); for(size_t size : get_sizes(seed_value)) @@ -325,9 +325,9 @@ TYPED_TEST(RocprimDeviceBinarySearch, BinarySearch) compare_op_type compare_op; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); for(size_t size : get_sizes(seed_value)) diff --git a/test/rocprim/test_device_histogram.cpp b/test/rocprim/test_device_histogram.cpp index c0035de57..f3e8f571d 100644 --- a/test/rocprim/test_device_histogram.cpp +++ b/test/rocprim/test_device_histogram.cpp @@ -195,9 +195,9 @@ TYPED_TEST(RocprimDeviceHistogramEven, Even) const size_t row_stride_bytes = row_stride * sizeof(sample_type); const size_t size = std::max(1, rows * row_stride); - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -411,9 +411,9 @@ TYPED_TEST(RocprimDeviceHistogramRange, Range) } levels.push_back(level); - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); std::vector input = get_random_samples(size, levels[0], levels[bins], seed_value); @@ -627,9 +627,9 @@ TYPED_TEST(RocprimDeviceHistogramMultiEven, MultiEven) const size_t size = std::max(1, rows * row_stride); - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -877,9 +877,9 @@ TYPED_TEST(RocprimDeviceHistogramMultiRange, MultiRange) const size_t row_stride_bytes = row_stride * sizeof(sample_type); const size_t size = std::max(1, rows * row_stride); - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data diff --git a/test/rocprim/test_device_merge.cpp b/test/rocprim/test_device_merge.cpp index 3e9b494e0..5dae9947f 100644 --- a/test/rocprim/test_device_merge.cpp +++ b/test/rocprim/test_device_merge.cpp @@ -117,9 +117,9 @@ TYPED_TEST(RocprimDeviceMergeTests, MergeKey) // compare function compare_op_type compare_op; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -250,9 +250,9 @@ TYPED_TEST(RocprimDeviceMergeTests, MergeKeyValue) // compare function compare_op_type compare_op; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data diff --git a/test/rocprim/test_device_merge_sort.cpp b/test/rocprim/test_device_merge_sort.cpp index ea4483533..fe72d1ed6 100644 --- a/test/rocprim/test_device_merge_sort.cpp +++ b/test/rocprim/test_device_merge_sort.cpp @@ -105,9 +105,9 @@ TYPED_TEST(RocprimDeviceSortTests, SortKey) bool in_place = false; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); for(size_t size : get_sizes(seed_value)) @@ -216,9 +216,9 @@ TYPED_TEST(RocprimDeviceSortTests, SortKeyValue) bool in_place = false; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); for(size_t size : get_sizes(seed_value)) diff --git a/test/rocprim/test_device_partition.cpp b/test/rocprim/test_device_partition.cpp index aea4d70c1..83309aeb4 100644 --- a/test/rocprim/test_device_partition.cpp +++ b/test/rocprim/test_device_partition.cpp @@ -99,9 +99,9 @@ TYPED_TEST(RocprimDevicePartitionTests, Flagged) hipStream_t stream = 0; // default stream - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const std::vector sizes = get_sizes(seed_value); @@ -344,9 +344,9 @@ TYPED_TEST(RocprimDevicePartitionTests, Predicate) return false; }; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const std::vector sizes = get_sizes(seed_value); diff --git a/test/rocprim/test_device_radix_sort.cpp b/test/rocprim/test_device_radix_sort.cpp index fdc168e23..b890febdb 100644 --- a/test/rocprim/test_device_radix_sort.cpp +++ b/test/rocprim/test_device_radix_sort.cpp @@ -161,9 +161,9 @@ TYPED_TEST(RocprimDeviceRadixSort, SortKeys) bool in_place = false; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); for(size_t size : get_sizes(seed_value)) @@ -291,9 +291,9 @@ TYPED_TEST(RocprimDeviceRadixSort, SortPairs) bool in_place = false; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); for(size_t size : get_sizes(seed_value)) @@ -465,9 +465,9 @@ TYPED_TEST(RocprimDeviceRadixSort, SortKeysDoubleBuffer) const bool debug_synchronous = false; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const std::vector sizes = get_sizes(seed_value); @@ -581,9 +581,9 @@ TYPED_TEST(RocprimDeviceRadixSort, SortPairsDoubleBuffer) const bool debug_synchronous = false; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const std::vector sizes = get_sizes(seed_value); diff --git a/test/rocprim/test_device_reduce.cpp b/test/rocprim/test_device_reduce.cpp index dc460c999..5177b2984 100644 --- a/test/rocprim/test_device_reduce.cpp +++ b/test/rocprim/test_device_reduce.cpp @@ -156,9 +156,9 @@ TYPED_TEST(RocprimDeviceReduceTests, Reduce) const bool debug_synchronous = TestFixture::debug_synchronous; static constexpr bool use_identity_iterator = TestFixture::use_identity_iterator; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const std::vector sizes = get_sizes(seed_value); @@ -262,9 +262,9 @@ TYPED_TEST(RocprimDeviceReduceTests, ReduceMinimum) const bool debug_synchronous = TestFixture::debug_synchronous; static constexpr bool use_identity_iterator = TestFixture::use_identity_iterator; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const std::vector sizes = get_sizes(seed_value); @@ -393,9 +393,9 @@ TYPED_TEST(RocprimDeviceReduceTests, ReduceArgMinimum) const bool debug_synchronous = TestFixture::debug_synchronous; static constexpr bool use_identity_iterator = TestFixture::use_identity_iterator; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const std::vector sizes = get_sizes(seed_value); diff --git a/test/rocprim/test_device_reduce_by_key.cpp b/test/rocprim/test_device_reduce_by_key.cpp index 782ccb68a..62342d0ce 100644 --- a/test/rocprim/test_device_reduce_by_key.cpp +++ b/test/rocprim/test_device_reduce_by_key.cpp @@ -153,9 +153,9 @@ TYPED_TEST(RocprimDeviceReduceByKey, ReduceByKey) const unsigned int seed = 123; std::default_random_engine gen(seed); - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); for(size_t size : get_sizes(seed_value)) diff --git a/test/rocprim/test_device_run_length_encode.cpp b/test/rocprim/test_device_run_length_encode.cpp index af9dd3f04..48f4a3756 100644 --- a/test/rocprim/test_device_run_length_encode.cpp +++ b/test/rocprim/test_device_run_length_encode.cpp @@ -120,9 +120,9 @@ TYPED_TEST(RocprimDeviceRunLengthEncode, Encode) const unsigned int seed = 123; std::default_random_engine gen(seed); - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); for(size_t size : get_sizes(seed_value)) @@ -274,9 +274,9 @@ TYPED_TEST(RocprimDeviceRunLengthEncode, NonTrivialRuns) const unsigned int seed = 123; std::default_random_engine gen(seed); - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); for(size_t size : get_sizes(seed_value)) diff --git a/test/rocprim/test_device_scan.cpp b/test/rocprim/test_device_scan.cpp index d42976c9d..7bd8af1bd 100644 --- a/test/rocprim/test_device_scan.cpp +++ b/test/rocprim/test_device_scan.cpp @@ -181,9 +181,9 @@ TYPED_TEST(RocprimDeviceScanTests, InclusiveScan) const bool debug_synchronous = TestFixture::debug_synchronous; static constexpr bool use_identity_iterator = TestFixture::use_identity_iterator; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const std::vector sizes = get_sizes(seed_value); @@ -281,9 +281,9 @@ TYPED_TEST(RocprimDeviceScanTests, ExclusiveScan) const bool debug_synchronous = TestFixture::debug_synchronous; static constexpr bool use_identity_iterator = TestFixture::use_identity_iterator; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const std::vector sizes = get_sizes(seed_value); @@ -384,9 +384,9 @@ TYPED_TEST(RocprimDeviceScanTests, InclusiveScanByKey) using scan_op_type = typename TestFixture::scan_op_type; const bool debug_synchronous = TestFixture::debug_synchronous; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const std::vector sizes = get_sizes(seed_value); @@ -527,9 +527,9 @@ TYPED_TEST(RocprimDeviceScanTests, ExclusiveScanByKey) using scan_op_type = typename TestFixture::scan_op_type; const bool debug_synchronous = TestFixture::debug_synchronous; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const std::vector sizes = get_sizes(seed_value); diff --git a/test/rocprim/test_device_segmented_radix_sort.cpp b/test/rocprim/test_device_segmented_radix_sort.cpp index 6dc2d607b..663c647b9 100644 --- a/test/rocprim/test_device_segmented_radix_sort.cpp +++ b/test/rocprim/test_device_segmented_radix_sort.cpp @@ -174,9 +174,9 @@ TYPED_TEST(RocprimDeviceSegmentedRadixSort, SortKeys) TestFixture::params::max_segment_length ); - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const std::vector sizes = get_sizes(seed_value); @@ -327,9 +327,9 @@ TYPED_TEST(RocprimDeviceSegmentedRadixSort, SortPairs) TestFixture::params::max_segment_length ); - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const std::vector sizes = get_sizes(seed_value); @@ -519,9 +519,9 @@ TYPED_TEST(RocprimDeviceSegmentedRadixSort, SortKeysDoubleBuffer) TestFixture::params::max_segment_length ); - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const std::vector sizes = get_sizes(seed_value); @@ -677,9 +677,9 @@ TYPED_TEST(RocprimDeviceSegmentedRadixSort, SortPairsDoubleBuffer) TestFixture::params::max_segment_length ); - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const std::vector sizes = get_sizes(seed_value); diff --git a/test/rocprim/test_device_segmented_reduce.cpp b/test/rocprim/test_device_segmented_reduce.cpp index 0abffa25a..a97f67f99 100644 --- a/test/rocprim/test_device_segmented_reduce.cpp +++ b/test/rocprim/test_device_segmented_reduce.cpp @@ -130,9 +130,9 @@ TYPED_TEST(RocprimDeviceSegmentedReduce, Reduce) TestFixture::params::max_segment_length ); - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); for(size_t size : get_sizes(seed_value)) diff --git a/test/rocprim/test_device_segmented_scan.cpp b/test/rocprim/test_device_segmented_scan.cpp index fcffb3854..a8d19d389 100644 --- a/test/rocprim/test_device_segmented_scan.cpp +++ b/test/rocprim/test_device_segmented_scan.cpp @@ -128,9 +128,9 @@ TYPED_TEST(RocprimDeviceSegmentedScan, InclusiveScan) hipStream_t stream = 0; // default stream - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const std::vector sizes = get_sizes(seed_value); @@ -261,9 +261,9 @@ TYPED_TEST(RocprimDeviceSegmentedScan, ExclusiveScan) hipStream_t stream = 0; // default stream - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const std::vector sizes = get_sizes(seed_value); @@ -382,9 +382,9 @@ TYPED_TEST(RocprimDeviceSegmentedScan, InclusiveScanUsingHeadFlags) hipStream_t stream = 0; // default stream - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const std::vector sizes = get_sizes(seed_value); @@ -519,9 +519,9 @@ TYPED_TEST(RocprimDeviceSegmentedScan, ExclusiveScanUsingHeadFlags) hipStream_t stream = 0; // default stream - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const std::vector sizes = get_sizes(seed_value); diff --git a/test/rocprim/test_device_select.cpp b/test/rocprim/test_device_select.cpp index bf3e68b9c..b01e52e27 100644 --- a/test/rocprim/test_device_select.cpp +++ b/test/rocprim/test_device_select.cpp @@ -98,9 +98,9 @@ TYPED_TEST(RocprimDeviceSelectTests, Flagged) hipStream_t stream = 0; // default stream - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const std::vector sizes = get_sizes(seed_value); @@ -259,9 +259,9 @@ TYPED_TEST(RocprimDeviceSelectTests, SelectOp) hipStream_t stream = 0; // default stream - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const std::vector sizes = get_sizes(seed_value); @@ -454,9 +454,9 @@ TYPED_TEST(RocprimDeviceSelectTests, Unique) hipStream_t stream = 0; // default stream - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const auto sizes = get_sizes(seed_value); diff --git a/test/rocprim/test_device_transform.cpp b/test/rocprim/test_device_transform.cpp index 2775a0839..6b151f1f4 100644 --- a/test/rocprim/test_device_transform.cpp +++ b/test/rocprim/test_device_transform.cpp @@ -128,9 +128,9 @@ TYPED_TEST(RocprimDeviceTransformTests, Transform) static constexpr bool use_identity_iterator = TestFixture::use_identity_iterator; const bool debug_synchronous = TestFixture::debug_synchronous; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const std::vector sizes = get_sizes(seed_value); @@ -224,9 +224,9 @@ TYPED_TEST(RocprimDeviceTransformTests, BinaryTransform) static constexpr bool use_identity_iterator = TestFixture::use_identity_iterator; const bool debug_synchronous = TestFixture::debug_synchronous; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const std::vector sizes = get_sizes(seed_value); diff --git a/test/rocprim/test_discard_iterator.cpp b/test/rocprim/test_discard_iterator.cpp index 86733908d..595dc0705 100644 --- a/test/rocprim/test_discard_iterator.cpp +++ b/test/rocprim/test_discard_iterator.cpp @@ -40,9 +40,9 @@ TEST(RocprimDiscardIteratorTests, Equal) { using Iterator = typename rocprim::discard_iterator; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); Iterator x(test_utils::get_random_value(0, 200, seed_value)); @@ -65,9 +65,9 @@ TEST(RocprimDiscardIteratorTests, Less) { using Iterator = typename rocprim::discard_iterator; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); Iterator x(test_utils::get_random_value(0, 200, seed_value)); diff --git a/test/rocprim/test_intrinsics.cpp b/test/rocprim/test_intrinsics.cpp index b04638099..3aa960aae 100644 --- a/test/rocprim/test_intrinsics.cpp +++ b/test/rocprim/test_intrinsics.cpp @@ -113,9 +113,9 @@ TYPED_TEST(RocprimIntrinsicsTests, ShuffleUp) const size_t hardware_warp_size = ::rocprim::warp_size(); const size_t size = hardware_warp_size; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate input @@ -211,9 +211,9 @@ TYPED_TEST(RocprimIntrinsicsTests, ShuffleDown) const size_t hardware_warp_size = ::rocprim::warp_size(); const size_t size = hardware_warp_size; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate input @@ -311,9 +311,9 @@ TYPED_TEST(RocprimIntrinsicsTests, ShuffleIndex) const size_t hardware_warp_size = ::rocprim::warp_size(); const size_t size = hardware_warp_size; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate input @@ -410,9 +410,9 @@ TEST(RocprimIntrinsicsTests, ShuffleUpCustomStruct) const size_t hardware_warp_size = ::rocprim::warp_size(); const size_t size = hardware_warp_size; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate input @@ -506,9 +506,9 @@ TEST(RocprimIntrinsicsTests, ShuffleUpCustomAlignedStruct) const size_t hardware_warp_size = ::rocprim::warp_size(); const size_t size = hardware_warp_size; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate input diff --git a/test/rocprim/test_seed.hpp b/test/rocprim/test_seed.hpp index aa25a44b5..5f5b092f4 100644 --- a/test/rocprim/test_seed.hpp +++ b/test/rocprim/test_seed.hpp @@ -21,7 +21,7 @@ #ifndef TEST_SEED_HPP_ #define TEST_SEED_HPP_ -static constexpr bool use_seed = true; +static constexpr int random_seeds_count = 10; static constexpr unsigned int seeds [] = {0, 2, 10, 1000}; static constexpr size_t seed_size = sizeof(seeds) / sizeof(seeds[0]); diff --git a/test/rocprim/test_texture_cache_iterator.cpp b/test/rocprim/test_texture_cache_iterator.cpp index d3fa1b671..fed178672 100644 --- a/test/rocprim/test_texture_cache_iterator.cpp +++ b/test/rocprim/test_texture_cache_iterator.cpp @@ -86,9 +86,9 @@ TYPED_TEST(RocprimTextureCacheIteratorTests, Transform) std::vector input(size); - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); for(size_t i = 0; i < size; i++) diff --git a/test/rocprim/test_transform_iterator.cpp b/test/rocprim/test_transform_iterator.cpp index eab954bf8..6bd227dfc 100644 --- a/test/rocprim/test_transform_iterator.cpp +++ b/test/rocprim/test_transform_iterator.cpp @@ -101,9 +101,9 @@ TYPED_TEST(RocprimTransformIteratorTests, TransformReduce) const size_t size = 1024; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data diff --git a/test/rocprim/test_warp_reduce.cpp b/test/rocprim/test_warp_reduce.cpp index 5449a478b..9726d86ef 100644 --- a/test/rocprim/test_warp_reduce.cpp +++ b/test/rocprim/test_warp_reduce.cpp @@ -87,9 +87,9 @@ TYPED_TEST(RocprimWarpReduceTests, ReduceSum) return; } - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -189,9 +189,9 @@ TYPED_TEST(RocprimWarpReduceTests, AllReduceSum) return; } - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -299,9 +299,9 @@ TYPED_TEST(RocprimWarpReduceTests, ReduceSumValid) return; } - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -402,9 +402,9 @@ TYPED_TEST(RocprimWarpReduceTests, AllReduceSumValid) return; } - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -488,9 +488,9 @@ TYPED_TEST(RocprimWarpReduceTests, ReduceSumCustomStruct) return; } - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -601,9 +601,9 @@ TYPED_TEST(RocprimWarpReduceTests, HeadSegmentedReduceSum) return; } - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -739,9 +739,9 @@ TYPED_TEST(RocprimWarpReduceTests, TailSegmentedReduceSum) return; } - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data diff --git a/test/rocprim/test_warp_scan.cpp b/test/rocprim/test_warp_scan.cpp index 9689a9ba8..11dcf04c7 100644 --- a/test/rocprim/test_warp_scan.cpp +++ b/test/rocprim/test_warp_scan.cpp @@ -87,9 +87,9 @@ TYPED_TEST(RocprimWarpScanTests, InclusiveScan) return; } - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -198,9 +198,9 @@ TYPED_TEST(RocprimWarpScanTests, InclusiveScanReduce) return; } - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -321,9 +321,9 @@ TYPED_TEST(RocprimWarpScanTests, ExclusiveScan) return; } - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -435,9 +435,9 @@ TYPED_TEST(RocprimWarpScanTests, ExclusiveReduceScan) return; } - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -572,9 +572,9 @@ TYPED_TEST(RocprimWarpScanTests, Scan) return; } - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -716,9 +716,9 @@ TYPED_TEST(RocprimWarpScanTests, ScanReduce) return; } - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -849,9 +849,9 @@ TYPED_TEST(RocprimWarpScanTests, InclusiveScanCustomType) return; } - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data diff --git a/test/rocprim/test_warp_sort.cpp b/test/rocprim/test_warp_sort.cpp index 02f45e14f..5723e73fd 100644 --- a/test/rocprim/test_warp_sort.cpp +++ b/test/rocprim/test_warp_sort.cpp @@ -92,9 +92,9 @@ TYPED_TEST(RocprimWarpSortShuffleBasedTests, Sort) return; } - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -181,9 +181,9 @@ TYPED_TEST(RocprimWarpSortShuffleBasedTests, SortKeyInt) return; } - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data diff --git a/test/rocprim/test_zip_iterator.cpp b/test/rocprim/test_zip_iterator.cpp index 51fb87c2d..eeb52ca06 100644 --- a/test/rocprim/test_zip_iterator.cpp +++ b/test/rocprim/test_zip_iterator.cpp @@ -159,9 +159,9 @@ TEST(RocprimZipIteratorTests, Transform) // using default stream hipStream_t stream = 0; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data @@ -296,9 +296,9 @@ TEST(RocprimZipIteratorTests, TransformReduce) // using default stream hipStream_t stream = 0; - for (size_t seed_index = 0; seed_index < seed_size; seed_index++) + for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - unsigned int seed_value = use_seed ? seeds[seed_index] : rand(); + unsigned int seed_value = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); // Generate data From dd02a52ff55b61e1bdef63e08fbb413ccc34a476 Mon Sep 17 00:00:00 2001 From: Eiden Yoshida <47196116+eidenyoshida@users.noreply.github.com> Date: Thu, 6 Feb 2020 16:33:32 -0700 Subject: [PATCH 18/25] Refactor Jenkinsfiles to use new docker containers (#120) --- .jenkins/common.groovy | 84 ++++++++++++++++++++++++++ .jenkins/precheckin.groovy | 82 +++++++++++++++++++++++++ Jenkinsfile | 105 --------------------------------- docker/dockerfile-build-centos | 2 +- docker/dockerfile-build-sles | 3 +- 5 files changed, 169 insertions(+), 107 deletions(-) create mode 100644 .jenkins/common.groovy create mode 100644 .jenkins/precheckin.groovy delete mode 100644 Jenkinsfile diff --git a/.jenkins/common.groovy b/.jenkins/common.groovy new file mode 100644 index 000000000..30c893508 --- /dev/null +++ b/.jenkins/common.groovy @@ -0,0 +1,84 @@ +// This file is for internal AMD use. +// If you are interested in running your own Jenkins, please raise a github issue for assistance. + +def runCompileCommand(platform, project, jobName) +{ + project.paths.construct_build_prefix() + + def command + + if(jobName.contains('hipclang')) + { + command = """#!/usr/bin/env bash + set -x + cd ${project.paths.project_build_prefix} + LD_LIBRARY_PATH=/opt/rocm/hcc/lib CXX=/opt/rocm/bin/hipcc ${project.paths.build_command} --hip-clang + """ + } + else + { + command = """#!/usr/bin/env bash + set -x + cd ${project.paths.project_build_prefix} + LD_LIBRARY_PATH=/opt/rocm/hcc/lib CXX=/opt/rocm/bin/hcc ${project.paths.build_command} + """ + } + + platform.runCommand(this, command) +} + +def runTestCommand (platform, project) +{ + String sudo = auxiliary.sudo(platform.jenkinsLabel) + def testCommand = 'ctest --output-on-failure' + + def command = """#!/usr/bin/env bash + set -x + cd ${project.paths.project_build_prefix} + cd ${project.testDirectory} + ${sudo} LD_LIBRARY_PATH=/opt/rocm/lib ${testCommand} + """ + + platform.runCommand(this, command) +} + +def runPackageCommand(platform, project, jobName) +{ + def command + + if(platform.jenkinsLabel.contains('centos')) + { + command = """ + set -x + cd ${project.paths.project_build_prefix}/${project.testDirectory} + make package + rm -rf package && mkdir -p package + mv *.rpm package/ + rpm -qlp package/*.rpm + """ + + platform.runCommand(this, command) + platform.archiveArtifacts(this, """${project.paths.project_build_prefix}/${project.testDirectory}/package/*.rpm""") + } + else if(jobName.contains('hipclang')) + { + packageCommand = null + } + else + { + command = """ + set -x + cd ${project.paths.project_build_prefix}/${project.testDirectory} + make package + rm -rf package && mkdir -p package + mv *.deb package/ + dpkg -c package/*.deb + """ + + platform.runCommand(this, command) + platform.archiveArtifacts(this, """${project.paths.project_build_prefix}/${project.testDirectory}/package/*.deb""") + } +} + +return this + diff --git a/.jenkins/precheckin.groovy b/.jenkins/precheckin.groovy new file mode 100644 index 000000000..ce2156b93 --- /dev/null +++ b/.jenkins/precheckin.groovy @@ -0,0 +1,82 @@ +#!/usr/bin/env groovy +@Library('rocJenkins@pong') _ +import com.amd.project.* +import com.amd.docker.* +import java.nio.file.Path; + +def runCI = +{ + nodeDetails, jobName-> + + def prj = new rocProject('rocPRIM', 'PreCheckin') + + def nodes = new dockerNodes(nodeDetails, jobName, prj) + + def commonGroovy + + boolean formatCheck = false + + def compileCommand = + { + platform, project-> + + commonGroovy = load "${project.paths.project_src_prefix}/.jenkins/common.groovy" + commonGroovy.runCompileCommand(platform, project, jobName) + } + + def testCommand = + { + platform, project-> + + commonGroovy.runTestCommand(platform, project) + } + + def packageCommand = + { + platform, project-> + + commonGroovy.runPackageCommand(platform, project, jobName) + } + + buildProject(prj, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) +} + +ci: { + String urlJobName = auxiliary.getTopJobName(env.BUILD_URL) + + def propertyList = ["compute-rocm-dkms-no-npi":[pipelineTriggers([cron('0 1 * * 0')])], + "compute-rocm-dkms-no-npi-hipclang":[pipelineTriggers([cron('0 1 * * 0')])], + "rocm-docker":[]] + propertyList = auxiliary.appendPropertyList(propertyList) + + def jobNameList = ["compute-rocm-dkms-no-npi":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), + "compute-rocm-dkms-no-npi-hipclang":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']]), + "rocm-docker":([ubuntu16:['gfx900'],centos7:['gfx906'],sles15sp1:['gfx908']])] + jobNameList = auxiliary.appendJobNameList(jobNameList) + + propertyList.each + { + jobName, property-> + if (urlJobName == jobName) + properties(auxiliary.addCommonProperties(property)) + } + + jobNameList.each + { + jobName, nodeDetails-> + if (urlJobName == jobName) + stage(jobName) { + runCI(nodeDetails, jobName) + } + } + + // For url job names that are outside of the standardJobNameSet i.e. compute-rocm-dkms-no-npi-1901 + Set standardJobNameSet = ["compute-rocm-dkms-no-npi", "compute-rocm-dkms-no-npi-hipclang", "rocm-docker"] + if(!standardJobNameSet.contains(urlJobName)) + { + properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) + stage(jobName) { + runCI([ubuntu16:['gfx906']], urlJobName) + } + } +} diff --git a/Jenkinsfile b/Jenkinsfile deleted file mode 100644 index a536d2b65..000000000 --- a/Jenkinsfile +++ /dev/null @@ -1,105 +0,0 @@ -#!/usr/bin/env groovy -@Library('rocJenkins') _ -import com.amd.project.* -import com.amd.docker.* - -//////////////////////////////////////////////////////////////////////// -import java.nio.file.Path; - -rocprimCI: -{ - - def rocprim = new rocProject('rocPRIM') - - def nodes = new dockerNodes(['gfx803 && centos7', 'ubuntu && gfx908', 'gfx900 && centos7', 'gfx906 && centos7', 'sles'], rocprim) - - boolean formatCheck = false - - def compileCommand = - { - platform, project-> - - project.paths.construct_build_prefix() - - def command - - if(platform.jenkinsLabel.contains('hip-clang')) - { - command = """#!/usr/bin/env bash - set -x - cd ${project.paths.project_build_prefix} - LD_LIBRARY_PATH=/opt/rocm/hcc/lib CXX=/opt/rocm/bin/hipcc ${project.paths.build_command} --hip-clang - """ - } - else - { - command = """#!/usr/bin/env bash - set -x - cd ${project.paths.project_build_prefix} - LD_LIBRARY_PATH=/opt/rocm/hcc/lib CXX=/opt/rocm/bin/hcc ${project.paths.build_command} - """ - } - - platform.runCommand(this, command) - } - - def testCommand = - { - platform, project-> - - String sudo = auxiliary.sudo(platform.jenkinsLabel) - def testCommand = 'ctest --output-on-failure' - - def command = """#!/usr/bin/env bash - set -x - cd ${project.paths.project_build_prefix} - cd ${project.testDirectory} - ${sudo} LD_LIBRARY_PATH=/opt/rocm/lib ${testCommand} - """ - - platform.runCommand(this, command) - } - - def packageCommand = - { - platform, project-> - - def command - - if(platform.jenkinsLabel.contains('centos')) - { - command = """ - set -x - cd ${project.paths.project_build_prefix}/${project.testDirectory} - make package - rm -rf package && mkdir -p package - mv *.rpm package/ - rpm -qlp package/*.rpm - """ - - platform.runCommand(this, command) - platform.archiveArtifacts(this, """${project.paths.project_build_prefix}/${project.testDirectory}/package/*.rpm""") - } - else if(platform.jenkinsLabel.contains('hip-clang')) - { - packageCommand = null - } - else - { - command = """ - set -x - cd ${project.paths.project_build_prefix}/${project.testDirectory} - make package - rm -rf package && mkdir -p package - mv *.deb package/ - dpkg -c package/*.deb - """ - - platform.runCommand(this, command) - platform.archiveArtifacts(this, """${project.paths.project_build_prefix}/${project.testDirectory}/package/*.deb""") - } - } - - buildProject(rocprim, formatCheck, nodes.dockerArray, compileCommand, testCommand, packageCommand) - -} diff --git a/docker/dockerfile-build-centos b/docker/dockerfile-build-centos index 6d7ff4618..a58722db8 100644 --- a/docker/dockerfile-build-centos +++ b/docker/dockerfile-build-centos @@ -7,7 +7,7 @@ LABEL maintainer="rocprim-maintainer@amd.com" USER root ARG user_uid -RUN yum install -y \ +RUN yum install -y --nogpgcheck\ sudo \ rock-dkms \ centos-release-scl \ diff --git a/docker/dockerfile-build-sles b/docker/dockerfile-build-sles index 00afab40e..bef918c9b 100644 --- a/docker/dockerfile-build-sles +++ b/docker/dockerfile-build-sles @@ -27,7 +27,8 @@ RUN zypper refresh && zypper -n install\ python2-PyYAML \ libboost_program_options1_66_0-devel\ libcxxtools9 \ - libnuma1 + libnuma1 \ + dkms # docker pipeline runs containers with particular uid # create a jenkins user with this specific uid so it can use sudo priviledges From 1b25b122e17caa92e3d95a3b22b13aa4070224de Mon Sep 17 00:00:00 2001 From: Eiden Yoshida <47196116+eidenyoshida@users.noreply.github.com> Date: Tue, 11 Feb 2020 12:21:59 -0700 Subject: [PATCH 19/25] Check against against jobnamelist instead of standardjobnameset (#121) --- .jenkins/precheckin.groovy | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/.jenkins/precheckin.groovy b/.jenkins/precheckin.groovy index ce2156b93..4c73972b3 100644 --- a/.jenkins/precheckin.groovy +++ b/.jenkins/precheckin.groovy @@ -60,7 +60,7 @@ ci: { if (urlJobName == jobName) properties(auxiliary.addCommonProperties(property)) } - + jobNameList.each { jobName, nodeDetails-> @@ -70,12 +70,11 @@ ci: { } } - // For url job names that are outside of the standardJobNameSet i.e. compute-rocm-dkms-no-npi-1901 - Set standardJobNameSet = ["compute-rocm-dkms-no-npi", "compute-rocm-dkms-no-npi-hipclang", "rocm-docker"] - if(!standardJobNameSet.contains(urlJobName)) + // For url job names that are not listed by the jobNameList i.e. compute-rocm-dkms-no-npi-1901 + if(!jobNameList.keySet().contains(urlJobName)) { properties(auxiliary.addCommonProperties([pipelineTriggers([cron('0 1 * * *')])])) - stage(jobName) { + stage(urlJobName) { runCI([ubuntu16:['gfx906']], urlJobName) } } From 4eb7bb248aa922aa7ca50c1045b500a32ee33d80 Mon Sep 17 00:00:00 2001 From: saadrahim <44449863+saadrahim@users.noreply.github.com> Date: Wed, 12 Feb 2020 15:57:42 -0700 Subject: [PATCH 20/25] Changing emails to distribution list (#122) --- docker/dockerfile-install-centos | 2 +- docker/dockerfile-install-ubuntu | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/dockerfile-install-centos b/docker/dockerfile-install-centos index ea10ef507..f8ed1fa19 100644 --- a/docker/dockerfile-install-centos +++ b/docker/dockerfile-install-centos @@ -2,7 +2,7 @@ ARG base_image FROM ${base_image} -LABEL maintainer="saad.rahim@amd.com" +LABEL maintainer="rocprim-maintainer@amd.com" # Copy the rpm package of rocprim into the container from host COPY *.rpm /tmp/ diff --git a/docker/dockerfile-install-ubuntu b/docker/dockerfile-install-ubuntu index 9f7920b35..cc29ee204 100755 --- a/docker/dockerfile-install-ubuntu +++ b/docker/dockerfile-install-ubuntu @@ -2,7 +2,7 @@ ARG base_image FROM ${base_image} -LABEL maintainer="saad.rahim@amd.com" +LABEL maintainer="rocprim-maintainer@amd.com" # Copy the deb package of rocprim into the container from host COPY *.deb /tmp/ From df6c698c2a840e523849ee98787331bddce20edc Mon Sep 17 00:00:00 2001 From: Jason Burmark Date: Fri, 28 Feb 2020 09:49:19 -0800 Subject: [PATCH 21/25] Allow including device_radix_sort separately (#124) Include the device_transform.hpp header in device_radix_sort.hpp to fix a compilation issue that occurs when including device_radix_sort alone. --- rocprim/include/rocprim/device/device_radix_sort.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/rocprim/include/rocprim/device/device_radix_sort.hpp b/rocprim/include/rocprim/device/device_radix_sort.hpp index 83561263f..e40910d05 100644 --- a/rocprim/include/rocprim/device/device_radix_sort.hpp +++ b/rocprim/include/rocprim/device/device_radix_sort.hpp @@ -35,6 +35,7 @@ #include "../types.hpp" #include "device_radix_sort_config.hpp" +#include "device_transform.hpp" #include "detail/device_radix_sort.hpp" /// \addtogroup devicemodule From 5af3f3ab80ef6643cf35cf054dae0fadbce8091b Mon Sep 17 00:00:00 2001 From: saadrahim <44449863+saadrahim@users.noreply.github.com> Date: Fri, 28 Feb 2020 14:54:24 -0700 Subject: [PATCH 22/25] Revert "Add half-overload for load/store-volatile" (#127) This reverts commit f1ab775c716973eaff5fbee70fe2c4403d6ab5b6. --- rocprim/include/rocprim/detail/various.hpp | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/rocprim/include/rocprim/detail/various.hpp b/rocprim/include/rocprim/detail/various.hpp index c646b062a..65973d37c 100644 --- a/rocprim/include/rocprim/detail/various.hpp +++ b/rocprim/include/rocprim/detail/various.hpp @@ -180,12 +180,6 @@ auto store_volatile(T * output, T value) } } -ROCPRIM_DEVICE inline -void store_volatile(half * output, half value) -{ - *reinterpret_cast(output) = value; -} - template ROCPRIM_DEVICE inline auto load_volatile(T * input) @@ -215,13 +209,6 @@ auto load_volatile(T * input) return retval; } -ROCPRIM_DEVICE inline -half load_volatile(half * input) -{ - half retval = *reinterpret_cast(input); - return retval; -} - // A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions template struct raw_storage From d24f119a10948c66d2280efa66c00a7518a60aa7 Mon Sep 17 00:00:00 2001 From: iotamudelta Date: Tue, 3 Mar 2020 18:54:08 -0600 Subject: [PATCH 23/25] Switch to hipMemcpyWithStream (#123) --- rocprim/include/rocprim/device/device_run_length_encode.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/rocprim/include/rocprim/device/device_run_length_encode.hpp b/rocprim/include/rocprim/device/device_run_length_encode.hpp index 2ffb297f2..6b63f6a3d 100644 --- a/rocprim/include/rocprim/device/device_run_length_encode.hpp +++ b/rocprim/include/rocprim/device/device_run_length_encode.hpp @@ -378,9 +378,7 @@ hipError_t run_length_encode_non_trivial_runs(void * temporary_storage, // Read count of all runs (including trivial runs) count_type all_runs_count; - error = hipMemcpyAsync(&all_runs_count, all_runs_count_tmp, sizeof(count_type), hipMemcpyDeviceToHost, stream); - if(error != hipSuccess) return error; - error = hipStreamSynchronize(stream); + error = hipMemcpyWithStream(&all_runs_count, all_runs_count_tmp, sizeof(count_type), hipMemcpyDeviceToHost, stream); if(error != hipSuccess) return error; // Select non-trivial runs From a910e87765327ce6d378453bbe3ffea8482fbf20 Mon Sep 17 00:00:00 2001 From: saadrahim <44449863+saadrahim@users.noreply.github.com> Date: Wed, 4 Mar 2020 14:27:44 -0700 Subject: [PATCH 24/25] Updating version for rocm 3.3 release (#129) --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 02e846e75..2987facd8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ # MIT License # -# Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. +# Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -73,7 +73,7 @@ include(cmake/Dependencies.cmake) set(AMDGPU_TARGETS gfx803;gfx900;gfx906;gfx908 CACHE STRING "List of specific machine types for library to target") # Setup VERSION -set(VERSION_STRING "2.9.0") +set(VERSION_STRING "2.10.0") rocm_setup_version(VERSION ${VERSION_STRING}) # Print configuration summary From 87e7c25ddfb5fd9a71f65a5d9680bb4f28916f50 Mon Sep 17 00:00:00 2001 From: amdkila <47991923+amdkila@users.noreply.github.com> Date: Fri, 29 Nov 2019 15:58:42 -0700 Subject: [PATCH 25/25] Remove rock-dkms from dockerfile (#113) --- docker/dockerfile-build-sles | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/dockerfile-build-sles b/docker/dockerfile-build-sles index bef918c9b..00fd64a62 100644 --- a/docker/dockerfile-build-sles +++ b/docker/dockerfile-build-sles @@ -13,7 +13,7 @@ ARG user_uid # * rocfft-test: gfortran, googletest # * rocfft-bench: libboost-program-options-dev # * libhsakmt.so: libnuma1 -RUN zypper refresh && zypper -n install\ +RUN zypper -n update && zypper -n install\ sudo \ ca-certificates \ git \