-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpyCUREVERSE.cuh
42 lines (33 loc) · 1.43 KB
/
pyCUREVERSE.cuh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#ifndef PYCUREVERSE_CUH
#define PYCUREVERSE_CUH
#include <iostream>
#define CUERR { \
cudaError_t err; \
if ((err = cudaGetLastError()) != cudaSuccess) { \
std::cout << "CUDA error: " << cudaGetErrorString(err) << " : " \
<< __FILE__ << ", line " << __LINE__ << std::endl; \
exit(1); \
}}
#define GRIDDIM (1024)
#define BLOCKDIM (1024)
#ifndef SWIG
template <class index_t, class value_t> __global__
void reverse_kernel(value_t * x_d, index_t N) {
size_t thid = blockIdx.x*blockDim.x + threadIdx.x;
for (; thid < N/2; thid += gridDim.x*blockDim.x) {
value_t temporary = x_d[thid];
x_d[thid] = x_d[N-thid-1];
x_d[N-thid-1] = temporary;
}
}
#endif
template <class index_t, class value_t>
void reverse(value_t * x, index_t N) {
value_t * x_d;
cudaMalloc((void**) &x_d, sizeof(value_t)*N); CUERR
cudaMemcpy(x_d, x, sizeof(value_t)*N, cudaMemcpyHostToDevice); CUERR
reverse_kernel<<<GRIDDIM, BLOCKDIM>>>(x_d, N); CUERR
cudaMemcpy(x, x_d, sizeof(value_t)*N, cudaMemcpyDeviceToHost); CUERR
cudaFree(x_d); CUERR
}
#endif