Skip to content

Commit

Permalink
Resolve CI issues on block_scan_api test
Browse files Browse the repository at this point in the history
  • Loading branch information
gonidelis committed Jun 3, 2024
1 parent 19620d0 commit f38095f
Showing 1 changed file with 14 additions and 14 deletions.
28 changes: 14 additions & 14 deletions cub/test/catch2_test_block_scan_api.cu
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@

#include "catch2_test_helper.h"

constexpr int items_per_thread = 2;
constexpr int block_threads = 64;
constexpr int num_items_per_thr = 2;
constexpr int block_num_threads = 64;

// example-begin inclusive-scan-array-init-value
__global__ void InclusiveScanKernel(int* output)
Expand All @@ -50,8 +50,8 @@ __global__ void InclusiveScanKernel(int* output)

int initial_value = 1;
int thread_data[] = {
+1 * ((int) threadIdx.x * items_per_thread), // item 0
-1 * ((int) threadIdx.x * items_per_thread + 1) // item 1
+1 * ((int) threadIdx.x * num_items_per_thread), // item 0
-1 * ((int) threadIdx.x * num_items_per_thread + 1) // item 1
};
// input: {[0, -1], [2, -3],[4, -5], ... [126, -127]}

Expand All @@ -67,17 +67,17 @@ __global__ void InclusiveScanKernel(int* output)

CUB_TEST("Block array-based inclusive scan works with initial value", "[block]")
{
thrust::device_vector<int> d_out(block_threads * items_per_thread);
thrust::device_vector<int> d_out(block_num_threads * num_items_per_thread);

InclusiveScanKernel<<<1, block_threads>>>(thrust::raw_pointer_cast(d_out.data()));
InclusiveScanKernel<<<1, block_num_threads>>>(thrust::raw_pointer_cast(d_out.data()));
REQUIRE(cudaSuccess == cudaPeekAtLastError());
REQUIRE(cudaSuccess == cudaDeviceSynchronize());

thrust::host_vector<int> expected(d_out.size());
for (size_t i = 0; i < expected.size() - 1; i += 2)
{
expected[i] = i;
expected[i + 1] = i;
expected[i] = static_cast<int>(i);
expected[i + 1] = static_cast<int>(i);
}

// When initial value = 1 for the given input the first two
Expand All @@ -100,8 +100,8 @@ __global__ void InclusiveScanKernelAggregate(int* output, int* block_aggregate)

int initial_value = 1;
int thread_data[] = {
+1 * ((int) threadIdx.x * items_per_thread), // item 0
-1 * ((int) threadIdx.x * items_per_thread + 1) // item 1
+1 * ((int) threadIdx.x * num_items_per_thread), // item 0
-1 * ((int) threadIdx.x * num_items_per_thread + 1) // item 1
};
// input: {[0, -1], [2, -3],[4, -5], ... [126, -127]}

Expand All @@ -118,19 +118,19 @@ __global__ void InclusiveScanKernelAggregate(int* output, int* block_aggregate)

CUB_TEST("Block array-based inclusive scan with block aggregate works with initial value", "[block]")
{
thrust::device_vector<int> d_out(block_threads * items_per_thread);
thrust::device_vector<int> d_out(block_num_threads * num_items_per_thread);

c2h::device_vector<int> d_block_aggregate(1);
InclusiveScanKernelAggregate<<<1, block_threads>>>(
InclusiveScanKernelAggregate<<<1, block_num_threads>>>(
thrust::raw_pointer_cast(d_out.data()), thrust::raw_pointer_cast(d_block_aggregate.data()));
REQUIRE(cudaSuccess == cudaPeekAtLastError());
REQUIRE(cudaSuccess == cudaDeviceSynchronize());

thrust::host_vector<int> expected(d_out.size());
for (size_t i = 0; i < expected.size() - 1; i += 2)
{
expected[i] = i;
expected[i + 1] = i;
expected[i] = static_cast<int>(i);
expected[i + 1] = static_cast<int>(i);
}

// When initial value = 1 for the given input the first two
Expand Down

0 comments on commit f38095f

Please sign in to comment.