Skip to content

Commit

Permalink
initial HIP support
Browse files Browse the repository at this point in the history
  • Loading branch information
rabauke committed Oct 18, 2023
1 parent 9b06232 commit b2fedc0
Show file tree
Hide file tree
Showing 40 changed files with 281 additions and 97 deletions.
6 changes: 6 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ find_package(TBB)

include(CheckLanguage)
check_language(CUDA)
check_language(HIP)

include_directories(..)
link_libraries(trng4::trng4)
Expand Down Expand Up @@ -61,3 +62,8 @@ if(CMAKE_CUDA_COMPILER)
add_executable_and_copy_dlls(pi_leap_cuda pi_leap_cuda.cu)
set_property(TARGET pi_leap_cuda PROPERTY CUDA_STANDARD 11)
endif()
if(CMAKE_HIP_COMPILER)
enable_language(HIP)
add_executable_and_copy_dlls(pi_block_hip pi_block_hip.hip)
set_source_files_properties(pi_block_hip.hip PROPERTIES LANGUAGE HIP)
endif()
72 changes: 72 additions & 0 deletions examples/pi_block_hip.hip
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
// Copyright (c) 2000-2022, Heiko Bauke
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
//
// * Neither the name of the copyright holder nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
// OF THE POSSIBILITY OF SUCH DAMAGE.

#include <cstdlib>
#include <iostream>
#include <vector>
#include <trng/yarn5s.hpp>
#include <trng/uniform01_dist.hpp>
#include <hip/hip_runtime.h>


__global__ void parallel_pi(long samples, long *in, trng::yarn5s r) {
long rank = hipThreadIdx_x;
long size = hipBlockDim_x;
r.jump(2 * (rank * samples / size)); // jump ahead
trng::uniform01_dist<float> u; // random number distribution
in[rank] = 0; // local number of points in circle
for (long i = rank * samples / size; i < (rank + 1) * samples / size; ++i) {
const float x = u(r), y = u(r); // choose random x- and y-coordinates
if (x * x + y * y <= 1) // is point in circle?
++in[rank]; // increase thread-local counter
}
}

int main(int argc, char *argv[]) {
const long samples{1000000l}; // total number of points in square
const int size{128}; // number of threads
long *in_device;
hipMalloc(&in_device, size * sizeof(*in_device));
trng::yarn5s r;
// start parallel Monte Carlo
parallel_pi<<<1, size>>>(samples, in_device, r);
// gather results
std::vector<long> in(size);
hipMemcpy(in.data(), in_device, size * sizeof(*in_device), hipMemcpyDeviceToHost);
hipFree(in_device);
long sum{0};
for (int rank{0}; rank < size; ++rank)
sum += in[rank];
// print result
std::cout << "pi = " << 4.0 * sum / samples << std::endl;
return EXIT_SUCCESS;
}
4 changes: 2 additions & 2 deletions trng/beta_dist.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ namespace trng {
if (x < 0 or x > 1)
return 0;
if ((x == 0 and P.alpha() - 1 < 0) or (x == 1 and P.beta() - 1 < 0)) {
#if !(defined __CUDA_ARCH__)
#if !(defined TRNG_CUDA)
errno = EDOM;
#endif
return math::numeric_limits<result_type>::quiet_NaN();
Expand All @@ -185,7 +185,7 @@ namespace trng {
TRNG_CUDA_ENABLE
result_type icdf(result_type x) const {
if (x < 0 or x > 1) {
#if !(defined __CUDA_ARCH__)
#if !(defined TRNG_CUDA)
errno = EDOM;
#endif
return math::numeric_limits<result_type>::quiet_NaN();
Expand Down
2 changes: 1 addition & 1 deletion trng/cauchy_dist.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ namespace trng {
TRNG_CUDA_ENABLE
result_type icdf(result_type x) const {
if (x <= 0 or x >= 1) {
#if !(defined __CUDA_ARCH__)
#if !(defined TRNG_CUDA)
errno = EDOM;
#endif
return math::numeric_limits<result_type>::quiet_NaN();
Expand Down
2 changes: 1 addition & 1 deletion trng/chi_square_dist.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ namespace trng {
TRNG_CUDA_ENABLE
result_type icdf(result_type x) const {
if (x <= 0 or x >= 1) {
#if !(defined __CUDA_ARCH__)
#if !(defined TRNG_CUDA)
errno = EDOM;
#endif
return math::numeric_limits<result_type>::quiet_NaN();
Expand Down
7 changes: 6 additions & 1 deletion trng/cuda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,18 @@

#define TRNG_CUDA_HPP

#if defined __CUDACC__
#if defined __CUDACC__ && !(defined __HIPCC__)

#define TRNG_CUDA 1
#define TRNG_CUDA_ENABLE __device__ __host__

#include <cuda.h>

#elif defined __HIPCC__

#define TRNG_CUDA 1
#define TRNG_CUDA_ENABLE __device__ __host__

#else

#define TRNG_CUDA_ENABLE
Expand Down
2 changes: 1 addition & 1 deletion trng/exponential_dist.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ namespace trng {
TRNG_CUDA_ENABLE
result_type icdf(result_type x) const {
if (x < 0 or x > 1) {
#if !(defined __CUDA_ARCH__)
#if !(defined TRNG_CUDA)
errno = EDOM;
#endif
return math::numeric_limits<result_type>::quiet_NaN();
Expand Down
2 changes: 1 addition & 1 deletion trng/extreme_value_dist.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ namespace trng {
TRNG_CUDA_ENABLE
result_type icdf(result_type x) const {
if (x <= 0 or x >= 1) {
#if !(defined __CUDA_ARCH__)
#if !(defined TRNG_CUDA)
errno = EDOM;
#endif
return math::numeric_limits<result_type>::quiet_NaN();
Expand Down
2 changes: 1 addition & 1 deletion trng/gamma_dist.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ namespace trng {
TRNG_CUDA_ENABLE
result_type icdf(result_type x) const {
if (x <= 0 or x >= 1) {
#if !(defined __CUDA_ARCH__)
#if !(defined TRNG_CUDA)
errno = EDOM;
#endif
return math::numeric_limits<result_type>::quiet_NaN();
Expand Down
6 changes: 3 additions & 3 deletions trng/int_math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ namespace trng {

TRNG_CUDA_ENABLE
inline int32_t modulo_inverse(int32_t a, int32_t m) {
#if !(defined __CUDA_ARCH__)
#if !(defined TRNG_CUDA)
if (a <= 0 or m <= 1)
utility::throw_this(
std::invalid_argument("invalid argument in trng::int_math::modulo_inverse"));
Expand All @@ -130,7 +130,7 @@ namespace trng {
f = flast - q * f;
flast = temp;
}
#if !(defined __CUDA_ARCH__)
#if !(defined TRNG_CUDA)
if (a == 0)
utility::throw_this(std::runtime_error("no inverse in trng::int_math::modulo_inverse"));
#endif
Expand Down Expand Up @@ -189,7 +189,7 @@ namespace trng {
}
}
// test if a solution exists
#if !(defined __CUDA_ARCH__)
#if !(defined TRNG_CUDA)
for (int i{rank}; i < n; ++i)
if (b[p[i]] != 0)
utility::throw_this(
Expand Down
2 changes: 1 addition & 1 deletion trng/lcg64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ namespace trng {

TRNG_CUDA_ENABLE
inline void lcg64::split(unsigned int s, unsigned int n) {
#if !(defined __CUDA_ARCH__)
#if !(defined TRNG_CUDA)
if (s < 1 or n >= s)
utility::throw_this(std::invalid_argument("invalid argument for trng::lcg64::split"));
#endif
Expand Down
2 changes: 1 addition & 1 deletion trng/lcg64_count_shift.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ namespace trng {

TRNG_CUDA_ENABLE
inline void lcg64_count_shift::split(unsigned int s, unsigned int n) {
#if !(defined __CUDA_ARCH__)
#if !(defined TRNG_CUDA)
if (s < 1 or n >= s)
utility::throw_this(
std::invalid_argument("invalid argument for trng::lcg64_count_shift::split"));
Expand Down
2 changes: 1 addition & 1 deletion trng/lcg64_shift.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ namespace trng {

TRNG_CUDA_ENABLE
inline void lcg64_shift::split(unsigned int s, unsigned int n) {
#if !(defined __CUDA_ARCH__)
#if !(defined TRNG_CUDA)
if (s < 1 or n >= s)
utility::throw_this(
std::invalid_argument("invalid argument for trng::lcg64_shift::split"));
Expand Down
47 changes: 45 additions & 2 deletions trng/limits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
#include <cfloat>
#include <trng/cuda.hpp>

#if defined TRNG_CUDA
#if defined __CUDACC__ && !(defined __HIPCC__)
#include <math_constants.h>
#include <cuda/std/limits>
#endif
Expand All @@ -47,8 +47,51 @@ namespace trng {

namespace math {

#if defined TRNG_CUDA
#if defined __CUDACC__ && !(defined __HIPCC__)
using cuda::std::numeric_limits;
#elif defined __HIPCC__
template<typename T>
class numeric_limits {
public:
static constexpr bool is_specialized = ::std::numeric_limits<T>::is_specialized;
static constexpr T min() noexcept { return ::std::numeric_limits<T>::min(); }
static constexpr T max() noexcept { return ::std::numeric_limits<T>::max(); }
static constexpr int digits = ::std::numeric_limits<T>::digits;
static constexpr int digits10 = ::std::numeric_limits<T>::digits10;
static constexpr bool is_signed = ::std::numeric_limits<T>::is_signed;
static constexpr bool is_integer = ::std::numeric_limits<T>::is_integer;
static constexpr bool is_exact = ::std::numeric_limits<T>::is_exact;
static constexpr int radix = ::std::numeric_limits<T>::radix;
static constexpr T epsilon() noexcept { return ::std::numeric_limits<T>::epsilon(); }
static constexpr T round_error() noexcept {
return ::std::numeric_limits<T>::round_error();
}
static constexpr int min_exponent = ::std::numeric_limits<T>::min_exponent;
static constexpr int min_exponent10 = ::std::numeric_limits<T>::min_exponent10;
static constexpr int max_exponent = ::std::numeric_limits<T>::max_exponent;
static constexpr int max_exponent10 = ::std::numeric_limits<T>::max_exponent10;
static constexpr bool has_infinity = ::std::numeric_limits<T>::has_infinity;
static constexpr bool has_quiet_NaN = ::std::numeric_limits<T>::has_quiet_NaN;
static constexpr bool has_signaling_NaN = ::std::numeric_limits<T>::has_signaling_NaN;
static constexpr ::std::float_denorm_style has_denorm =
::std::numeric_limits<T>::has_denorm;
static constexpr bool has_denorm_loss = ::std::numeric_limits<T>::has_denorm_loss;
static constexpr T infinity() noexcept { return ::std::numeric_limits<T>::infinity(); }
static constexpr T quiet_NaN() noexcept { return ::std::numeric_limits<T>::quiet_NaN(); }
static constexpr T signaling_NaN() noexcept {
return ::std::numeric_limits<T>::signaling_NaN();
}
static constexpr T denorm_min() noexcept {
return ::std::numeric_limits<T>::denorm_min();
}
static constexpr bool is_iec559 = ::std::numeric_limits<T>::is_iec559;
static constexpr bool is_bounded = ::std::numeric_limits<T>::is_bounded;
static constexpr bool is_modulo = ::std::numeric_limits<T>::is_modulo;
static constexpr bool traps = ::std::numeric_limits<T>::traps;
static constexpr bool tinyness_before = ::std::numeric_limits<T>::tinyness_before;
static constexpr ::std::float_round_style round_style =
::std::numeric_limits<T>::round_style;
};
#else
using std::numeric_limits;
#endif
Expand Down
2 changes: 1 addition & 1 deletion trng/logistic_dist.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ namespace trng {
TRNG_CUDA_ENABLE
result_type icdf(result_type x) const {
if (x < 0 or x > 1) {
#if !(defined __CUDA_ARCH__)
#if !(defined TRNG_CUDA)
errno = EDOM;
#endif
return math::numeric_limits<result_type>::quiet_NaN();
Expand Down
2 changes: 1 addition & 1 deletion trng/lognormal_dist.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ namespace trng {
TRNG_CUDA_ENABLE
result_type icdf(result_type x) const {
if (x < 0 or x > 1) {
#if !(defined __CUDA_ARCH__)
#if !(defined TRNG_CUDA)
errno = EDOM;
#endif
return math::numeric_limits<result_type>::quiet_NaN();
Expand Down
2 changes: 1 addition & 1 deletion trng/maxwell_dist.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ namespace trng {
TRNG_CUDA_ENABLE
result_type icdf(result_type x) const {
if (x < 0 or x > 1) {
#if !(defined __CUDA_ARCH__)
#if !(defined TRNG_CUDA)
errno = EDOM;
#endif
return math::numeric_limits<result_type>::quiet_NaN();
Expand Down
2 changes: 1 addition & 1 deletion trng/mrg2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ namespace trng {
// Parallel random number generator concept
TRNG_CUDA_ENABLE
inline void mrg2::split(unsigned int s, unsigned int n) {
#if !(defined __CUDA_ARCH__)
#if !(defined TRNG_CUDA)
if (s < 1 or n >= s)
utility::throw_this(std::invalid_argument("invalid argument for trng::mrg2::split"));
#endif
Expand Down
2 changes: 1 addition & 1 deletion trng/mrg3.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ namespace trng {
// Parallel random number generator concept
TRNG_CUDA_ENABLE
inline void mrg3::split(unsigned int s, unsigned int n) {
#if !(defined __CUDA_ARCH__)
#if !(defined TRNG_CUDA)
if (s < 1 or n >= s)
utility::throw_this(std::invalid_argument("invalid argument for trng::mrg3::split"));
#endif
Expand Down
2 changes: 1 addition & 1 deletion trng/mrg3s.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ namespace trng {
// Parallel random number generator concept
TRNG_CUDA_ENABLE
inline void mrg3s::split(unsigned int s, unsigned int n) {
#if !(defined __CUDA_ARCH__)
#if !(defined TRNG_CUDA)
if (s < 1 or n >= s)
utility::throw_this(std::invalid_argument("invalid argument for trng::mrg3s::split"));
#endif
Expand Down
2 changes: 1 addition & 1 deletion trng/mrg4.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ namespace trng {
// Parallel random number generator concept
TRNG_CUDA_ENABLE
inline void mrg4::split(unsigned int s, unsigned int n) {
#if !(defined __CUDA_ARCH__)
#if !(defined TRNG_CUDA)
if (s < 1 or n >= s)
utility::throw_this(std::invalid_argument("invalid argument for trng::mrg4::split"));
#endif
Expand Down
2 changes: 1 addition & 1 deletion trng/mrg5.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ namespace trng {
// Parallel random number generator concept
TRNG_CUDA_ENABLE
inline void mrg5::split(unsigned int s, unsigned int n) {
#if !(defined __CUDA_ARCH__)
#if !(defined TRNG_CUDA)
if (s < 1 or n >= s)
utility::throw_this(std::invalid_argument("invalid argument for trng::mrg5::split"));
#endif
Expand Down
2 changes: 1 addition & 1 deletion trng/mrg5s.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ namespace trng {
// Parallel random number generator concept
TRNG_CUDA_ENABLE
inline void mrg5s::split(unsigned int s, unsigned int n) {
#if !(defined __CUDA_ARCH__)
#if !(defined TRNG_CUDA)
if (s < 1 or n >= s)
utility::throw_this(std::invalid_argument("invalid argument for trng::mrg5s::split"));
#endif
Expand Down
2 changes: 1 addition & 1 deletion trng/pareto_dist.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ namespace trng {
// inverse cumulative density function
result_type icdf(result_type x) const {
if (x <= 0 or x >= 1) {
#if !(defined __CUDA_ARCH__)
#if !(defined TRNG_CUDA)
errno = EDOM;
#endif
return math::numeric_limits<result_type>::quiet_NaN();
Expand Down
2 changes: 1 addition & 1 deletion trng/powerlaw_dist.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ namespace trng {
TRNG_CUDA_ENABLE
result_type icdf(result_type x) const {
if (x <= 0 or x >= 1) {
#if !(defined __CUDA_ARCH__)
#if !(defined TRNG_CUDA)
errno = EDOM;
#endif
return math::numeric_limits<result_type>::quiet_NaN();
Expand Down
2 changes: 1 addition & 1 deletion trng/rayleigh_dist.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ namespace trng {
TRNG_CUDA_ENABLE
result_type icdf(result_type x) const {
if (x < 0 or x > 1) {
#if !(defined __CUDA_ARCH__)
#if !(TRNG_CUDA)
errno = EDOM;
#endif
return math::numeric_limits<result_type>::quiet_NaN();
Expand Down
Loading

0 comments on commit b2fedc0

Please sign in to comment.