From 732dd83079bb4a7a51473949feec2335ff045af5 Mon Sep 17 00:00:00 2001 From: Nghia Truong Date: Thu, 3 Oct 2024 15:52:41 -0700 Subject: [PATCH] Use one warp per row to improve performance Signed-off-by: Nghia Truong --- src/main/cpp/src/json_utils.cu | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/main/cpp/src/json_utils.cu b/src/main/cpp/src/json_utils.cu index 46940a760..6940cae7a 100644 --- a/src/main/cpp/src/json_utils.cu +++ b/src/main/cpp/src/json_utils.cu @@ -15,6 +15,7 @@ */ #include +#include #include #include #include @@ -82,13 +83,18 @@ std::tuple, std::unique_ptr, c // This will be returned to the caller. rmm::device_uvector is_null_or_empty(input.size(), stream, mr); - thrust::transform( + thrust::for_each( rmm::exec_policy_nosync(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(input.size()), - thrust::make_zip_iterator(thrust::make_tuple(is_valid_input.begin(), is_null_or_empty.begin())), - [input = *d_input_ptr] __device__(cudf::size_type idx) -> thrust::tuple { - if (input.is_null(idx)) { return {false, true}; } + thrust::make_counting_iterator(0L), + thrust::make_counting_iterator(input.size() * static_cast(cudf::detail::warp_size)), + [input = *d_input_ptr, + output = thrust::make_zip_iterator(thrust::make_tuple( + is_valid_input.begin(), is_null_or_empty.begin()))] __device__(int64_t tidx) { + // Execute one warp per row to minimize thread divergence. + if ((tidx % cudf::detail::warp_size) != 0) { return; } + auto const idx = tidx / cudf::detail::warp_size; + + if (input.is_null(idx)) { output[idx] = thrust::make_tuple(false, true); } auto const d_str = input.element(idx); auto const size = d_str.size_bytes(); @@ -121,7 +127,7 @@ std::tuple, std::unique_ptr, c // The current row contains only `null` string literal and not any other non-empty characters. // Such rows need to be masked out as null when doing concatenation. - if (is_null_literal) { return {false, false}; } + if (is_null_literal) { output[idx] = thrust::make_tuple(false, false); } auto const not_eol = i < size; @@ -129,9 +135,9 @@ std::tuple, std::unique_ptr, c // replace it by a null. This is necessary for libcudf's JSON reader to work. // Note that if we want to support ARRAY schema, we need to check for `[` instead. auto constexpr start_character = '{'; - if (not_eol && ch != start_character) { return {false, false}; } + if (not_eol && ch != start_character) { output[idx] = thrust::make_tuple(false, false); } - return {not_eol, !not_eol}; + return thrust::make_tuple(not_eol, !not_eol); }); auto constexpr num_levels = 256;