-
Notifications
You must be signed in to change notification settings - Fork 322
Fix extracting CUDA stream in cub::DeviceTransform
#7239
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,6 +13,16 @@ namespace stdexec = cuda::std::execution; | |
|
|
||
| using namespace thrust::placeholders; | ||
|
|
||
| struct custom_stream | ||
| { | ||
| cudaStream_t stream; | ||
|
|
||
| operator cudaStream_t() const noexcept | ||
| { | ||
| return stream; | ||
| } | ||
| }; | ||
|
|
||
| auto make_stream_env(cudaStream_t stream) | ||
| { | ||
| // MSVC has trouble nesting two aggregate initializations with CTAD | ||
|
|
@@ -31,14 +41,20 @@ C2H_TEST("DeviceTransform::Transform custom stream", "[device][transform]") | |
| REQUIRE(cudaStreamCreate(&stream) == cudaSuccess); | ||
|
|
||
| c2h::device_vector<type> result(num_items, thrust::no_init); | ||
| auto run = [&](auto streamish) { | ||
| cub::DeviceTransform::Transform(cuda::std::make_tuple(a, b), result.begin(), num_items, _1 + _2, streamish); | ||
| }; | ||
| SECTION("raw stream") | ||
| { | ||
| cub::DeviceTransform::Transform(cuda::std::make_tuple(a, b), result.begin(), num_items, _1 + _2, stream); | ||
| run(stream); | ||
| } | ||
| SECTION("custom stream") | ||
| { | ||
| run(custom_stream{stream}); | ||
|
Comment on lines
+44
to
+53
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I verified that the stream is extracted in the debugger, but I wonder if I could write the unit test in a way to detect if the default stream was taken anywhere. Does anybody know if I can query the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One solution would be to start a graph capture on a stream and see if anything was captured, but that might have some limitations, not sure if its applicable here |
||
| } | ||
| SECTION("environment") | ||
| { | ||
| auto env = make_stream_env(stream); | ||
| cub::DeviceTransform::Transform(cuda::std::make_tuple(a, b), result.begin(), num_items, _1 + _2, env); | ||
| run(make_stream_env(stream)); | ||
| } | ||
|
|
||
| REQUIRE(cudaStreamDestroy(stream) == cudaSuccess); | ||
|
|
@@ -55,14 +71,20 @@ C2H_TEST("DeviceTransform::Transform (single argument) custom stream", "[device] | |
| REQUIRE(cudaStreamCreate(&stream) == cudaSuccess); | ||
|
|
||
| c2h::device_vector<type> result(num_items, thrust::no_init); | ||
| auto run = [&](auto streamish) { | ||
| cub::DeviceTransform::Transform(a, result.begin(), num_items, _1 + 13, streamish); | ||
| }; | ||
| SECTION("raw stream") | ||
| { | ||
| cub::DeviceTransform::Transform(a, result.begin(), num_items, _1 + 13, stream); | ||
| run(stream); | ||
| } | ||
| SECTION("custom stream") | ||
| { | ||
| run(custom_stream{stream}); | ||
| } | ||
| SECTION("environment") | ||
| { | ||
| auto env = make_stream_env(stream); | ||
| cub::DeviceTransform::Transform(a, result.begin(), num_items, _1 + 13, env); | ||
| run(make_stream_env(stream)); | ||
| } | ||
|
|
||
| REQUIRE(cudaStreamDestroy(stream) == cudaSuccess); | ||
|
|
@@ -79,14 +101,20 @@ C2H_TEST("DeviceTransform::Generate custom stream", "[device][transform]") | |
| REQUIRE(cudaStreamCreate(&stream) == cudaSuccess); | ||
|
|
||
| c2h::device_vector<type> result(num_items, thrust::no_init); | ||
| auto run = [&](auto streamish) { | ||
| cub::DeviceTransform::Generate(result.begin(), num_items, generator, streamish); | ||
| }; | ||
| SECTION("raw stream") | ||
| { | ||
| cub::DeviceTransform::Generate(result.begin(), num_items, generator, stream); | ||
| run(stream); | ||
| } | ||
| SECTION("custom stream") | ||
| { | ||
| run(custom_stream{stream}); | ||
| } | ||
| SECTION("environment") | ||
| { | ||
| auto env = make_stream_env(stream); | ||
| cub::DeviceTransform::Generate(result.begin(), num_items, generator, env); | ||
| run(make_stream_env(stream)); | ||
| } | ||
|
|
||
| REQUIRE(cudaStreamDestroy(stream) == cudaSuccess); | ||
|
|
@@ -102,14 +130,20 @@ C2H_TEST("DeviceTransform::Fill custom stream", "[device][transform]") | |
| REQUIRE(cudaStreamCreate(&stream) == cudaSuccess); | ||
|
|
||
| c2h::device_vector<type> result(num_items, thrust::no_init); | ||
| auto run = [&](auto streamish) { | ||
| cub::DeviceTransform::Fill(result.begin(), num_items, 0xBAD, streamish); | ||
| }; | ||
| SECTION("raw stream") | ||
| { | ||
| cub::DeviceTransform::Fill(result.begin(), num_items, 0xBAD, stream); | ||
| run(stream); | ||
| } | ||
| SECTION("custom stream") | ||
| { | ||
| run(custom_stream{stream}); | ||
| } | ||
| SECTION("environment") | ||
| { | ||
| auto env = make_stream_env(stream); | ||
| cub::DeviceTransform::Fill(result.begin(), num_items, 0xBAD, env); | ||
| run(make_stream_env(stream)); | ||
| } | ||
|
|
||
| REQUIRE(cudaStreamDestroy(stream) == cudaSuccess); | ||
|
|
@@ -136,16 +170,21 @@ C2H_TEST("DeviceTransform::TransformIf custom stream", "[device][transform]") | |
| REQUIRE(cudaStreamCreate(&stream) == cudaSuccess); | ||
|
|
||
| c2h::device_vector<type> result(num_items, 1337); | ||
| auto run = [&](auto streamish) { | ||
| cub::DeviceTransform::TransformIf( | ||
| cuda::std::make_tuple(a, b), result.begin(), num_items, (_1 + _2) > 1000, _1 + _2, streamish); | ||
| }; | ||
| SECTION("raw stream") | ||
| { | ||
| cub::DeviceTransform::TransformIf( | ||
| cuda::std::make_tuple(a, b), result.begin(), num_items, (_1 + _2) > 1000, _1 + _2, stream); | ||
| run(stream); | ||
| } | ||
| SECTION("custom stream") | ||
| { | ||
| run(custom_stream{stream}); | ||
| } | ||
| SECTION("environment") | ||
| { | ||
| auto env = make_stream_env(stream); | ||
| cub::DeviceTransform::TransformIf( | ||
| cuda::std::make_tuple(a, b), result.begin(), num_items, (_1 + _2) > 1000, _1 + _2, env); | ||
| run(make_stream_env(stream)); | ||
| } | ||
|
|
||
| auto reference_it = cuda::transform_iterator{cuda::counting_iterator{42}, reference_func{}}; | ||
|
|
@@ -164,14 +203,20 @@ C2H_TEST("DeviceTransform::TransformIf (single argument) custom stream", "[devic | |
| REQUIRE(cudaStreamCreate(&stream) == cudaSuccess); | ||
|
|
||
| c2h::device_vector<type> result(num_items, 1337); | ||
| auto run = [&](auto streamish) { | ||
| cub::DeviceTransform::TransformIf(a, result.begin(), num_items, (_1 + 13) > 1000, _1 + 13, streamish); | ||
| }; | ||
| SECTION("raw stream") | ||
| { | ||
| cub::DeviceTransform::TransformIf(a, result.begin(), num_items, (_1 + 13) > 1000, _1 + 13, stream); | ||
| run(stream); | ||
| } | ||
| SECTION("custom stream") | ||
| { | ||
| run(custom_stream{stream}); | ||
| } | ||
| SECTION("environment") | ||
| { | ||
| auto env = make_stream_env(stream); | ||
| cub::DeviceTransform::TransformIf(a, result.begin(), num_items, (_1 + 13) > 1000, _1 + 13, env); | ||
| run(make_stream_env(stream)); | ||
| } | ||
|
|
||
| auto reference_it = cuda::transform_iterator{cuda::counting_iterator{42}, reference_func{}}; | ||
|
|
@@ -191,16 +236,21 @@ C2H_TEST("DeviceTransform::TransformStableArgumentAddresses custom stream", "[de | |
| REQUIRE(cudaStreamCreate(&stream) == cudaSuccess); | ||
|
|
||
| c2h::device_vector<type> result(num_items, thrust::no_init); | ||
| auto run = [&](auto streamish) { | ||
| cub::DeviceTransform::TransformStableArgumentAddresses( | ||
| cuda::std::make_tuple(a, b), result.begin(), num_items, _1 + _2, streamish); | ||
| }; | ||
| SECTION("raw stream") | ||
| { | ||
| cub::DeviceTransform::TransformStableArgumentAddresses( | ||
| cuda::std::make_tuple(a, b), result.begin(), num_items, _1 + _2, stream); | ||
| run(stream); | ||
| } | ||
| SECTION("custom stream") | ||
| { | ||
| run(custom_stream{stream}); | ||
| } | ||
| SECTION("environment") | ||
| { | ||
| auto env = make_stream_env(stream); | ||
| cub::DeviceTransform::TransformStableArgumentAddresses( | ||
| cuda::std::make_tuple(a, b), result.begin(), num_items, _1 + _2, env); | ||
| run(make_stream_env(stream)); | ||
| } | ||
|
|
||
| REQUIRE(cudaStreamDestroy(stream) == cudaSuccess); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Drive-by fix