forked from NVIDIA/CUDALibrarySamples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcurand_philox_lognormal_example.cpp
executable file
·131 lines (94 loc) · 3.62 KB
/
curand_philox_lognormal_example.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
/*
* This program uses the host CURAND API to generate 100
* pseudorandom floats.
*/
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <stdexcept>
#include <vector>
#include <cuda_runtime.h>
#include <curand.h>
#include "curand_utils.h"
using data_type = float;
void run_on_device(const int &n, const data_type &mean, const data_type &stddev,
const unsigned long long &offset,
const unsigned long long &seed,
const curandOrdering_t &order, const curandRngType_t &rng,
const cudaStream_t &stream, curandGenerator_t &gen,
std::vector<data_type> &h_data) {
data_type *d_data = nullptr;
/* C data to device */
CUDA_CHECK(cudaMalloc(reinterpret_cast<void **>(&d_data),
sizeof(data_type) * h_data.size()));
/* Create pseudo-random number generator */
CURAND_CHECK(curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_PHILOX4_32_10));
/* Set cuRAND to stream */
CURAND_CHECK(curandSetStream(gen, stream));
/* Set offset */
CURAND_CHECK(curandSetGeneratorOffset(gen, offset));
/* Set ordering */
CURAND_CHECK(curandSetGeneratorOrdering(gen, order));
/* Set seed */
CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(gen, seed));
/* Generate n floats on device */
CURAND_CHECK(
curandGenerateLogNormal(gen, d_data, h_data.size(), mean, stddev));
/* Copy data to host */
CUDA_CHECK(cudaMemcpyAsync(h_data.data(), d_data,
sizeof(data_type) * h_data.size(),
cudaMemcpyDeviceToHost, stream));
/* Sync stream */
CUDA_CHECK(cudaStreamSynchronize(stream));
/* Cleanup */
CUDA_CHECK(cudaFree(d_data));
}
void run_on_host(const int &n, const data_type &mean, const data_type &stddev,
const unsigned long long &offset,
const unsigned long long &seed, const curandOrdering_t &order,
const curandRngType_t &rng, const cudaStream_t &stream,
curandGenerator_t &gen, std::vector<data_type> &h_data) {
/* Create pseudo-random number generator */
CURAND_CHECK(
curandCreateGeneratorHost(&gen, CURAND_RNG_PSEUDO_PHILOX4_32_10));
/* Set cuRAND to stream */
CURAND_CHECK(curandSetStream(gen, stream));
/* Set offset */
CURAND_CHECK(curandSetGeneratorOffset(gen, offset));
/* Set ordering */
CURAND_CHECK(curandSetGeneratorOrdering(gen, order));
/* Set seed */
CURAND_CHECK(curandSetPseudoRandomGeneratorSeed(gen, seed));
/* Generate n floats on host */
CURAND_CHECK(
curandGenerateLogNormal(gen, h_data.data(), h_data.size(), mean, stddev));
/* Cleanup */
CURAND_CHECK(curandDestroyGenerator(gen));
}
int main(int argc, char *argv[]) {
cudaStream_t stream = NULL;
curandGenerator_t gen = NULL;
curandRngType_t rng = CURAND_RNG_PSEUDO_PHILOX4_32_10;
curandOrdering_t order = CURAND_ORDERING_PSEUDO_BEST;
const int n = 12;
const unsigned long long offset = 0ULL;
const unsigned long long seed = 1234ULL;
const data_type mean = 1.0f;
const data_type stddev = 2.0f;
/* Create stream */
CUDA_CHECK(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
/* Allocate n floats on host */
std::vector<data_type> h_data(n, 0);
run_on_host(n, mean, stddev, offset, seed, order, rng, stream, gen, h_data);
printf("Host\n");
print_vector(h_data);
printf("=====\n");
run_on_device(n, mean, stddev, offset, seed, order, rng, stream, gen, h_data);
printf("Device\n");
print_vector(h_data);
printf("=====\n");
/* Cleanup */
CUDA_CHECK(cudaStreamDestroy(stream));
CUDA_CHECK(cudaDeviceReset());
return EXIT_SUCCESS;
}