|
| 1 | +// UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2, cuda-10.0, cuda-10.1, cuda-10.2, cuda-11.0 |
| 2 | +// UNSUPPORTED: v8.0, v9.0, v9.1, v9.2, v10.0, v10.1, v10.2, v11.0 |
| 3 | +// RUN: dpct --format-range=none -out-root %T/cooperative_groups_reduce %s --cuda-include-path="%cuda-path/include" --extra-arg="-std=c++14" |
| 4 | +// RUN: FileCheck %s --match-full-lines --input-file %T/cooperative_groups_reduce/cooperative_groups_reduce.dp.cpp |
| 5 | + |
| 6 | + |
| 7 | +#include <cooperative_groups.h> |
| 8 | +#include <cooperative_groups/reduce.h> |
| 9 | + |
| 10 | +namespace cg = cooperative_groups; |
| 11 | + |
| 12 | +__device__ void testReduce(double *sdata, const cg::thread_block &cta) { |
| 13 | + const unsigned int tid = cta.thread_rank(); |
| 14 | + cg::thread_block_tile<32> tile32 = cg::tiled_partition<32>(cta); |
| 15 | + int *idata; |
| 16 | + // CHECK: sycl::reduce_over_group(item_ct1.get_sub_group(), sdata[tid], sycl::plus<double>()); |
| 17 | + cg::reduce(tile32, sdata[tid], cg::plus<double>()); |
| 18 | + // CHECK: sycl::reduce_over_group(item_ct1.get_sub_group(), sdata[tid], sycl::minimum<double>()); |
| 19 | + cg::reduce(tile32, sdata[tid], cg::less<double>()); |
| 20 | + // CHECK: sycl::reduce_over_group(item_ct1.get_sub_group(), sdata[tid], sycl::maximum<double>()); |
| 21 | + cg::reduce(tile32, sdata[tid], cg::greater<double>()); |
| 22 | + // CHECK: sycl::reduce_over_group(item_ct1.get_sub_group(), idata[tid], sycl::bit_and<int>()); |
| 23 | + cg::reduce(tile32, idata[tid], cg::bit_and<int>()); |
| 24 | + // CHECK: sycl::reduce_over_group(item_ct1.get_sub_group(), idata[tid], sycl::bit_xor<int>()); |
| 25 | + cg::reduce(tile32, idata[tid], cg::bit_xor<int>()); |
| 26 | + // CHECK: sycl::reduce_over_group(item_ct1.get_sub_group(), idata[tid], sycl::bit_or<int>()); |
| 27 | + cg::reduce(tile32, idata[tid], cg::bit_or<int>()); |
| 28 | + cg::sync(cta); |
| 29 | + |
| 30 | +} |
0 commit comments