Skip to content

Commit

Permalink
Expose streams in public null mask APIs (#14263)
Browse files Browse the repository at this point in the history
Contributes to #925

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Bradley Dice (https://github.com/bdice)
  - David Wendt (https://github.com/davidwendt)

URL: #14263
  • Loading branch information
vyasr authored Oct 20, 2023
1 parent f7ad66f commit 0341bb7
Show file tree
Hide file tree
Showing 18 changed files with 191 additions and 44 deletions.
6 changes: 3 additions & 3 deletions cpp/include/cudf/lists/detail/scatter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
#include <cudf/column/column_factories.hpp>
#include <cudf/copying.hpp>
#include <cudf/detail/iterator.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/lists/detail/scatter_helper.cuh>
#include <cudf/lists/list_device_view.cuh>
#include <cudf/null_mask.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>
Expand Down Expand Up @@ -130,8 +130,8 @@ std::unique_ptr<column> scatter_impl(rmm::device_uvector<unbound_list_view> cons
std::vector<std::unique_ptr<column>> children;
children.emplace_back(std::move(offsets_column));
children.emplace_back(std::move(child_column));
auto null_mask =
target.has_nulls() ? copy_bitmask(target, stream, mr) : rmm::device_buffer{0, stream, mr};
auto null_mask = target.has_nulls() ? cudf::detail::copy_bitmask(target, stream, mr)
: rmm::device_buffer{0, stream, mr};

// The output column from this function only has null masks copied from the target columns.
// That is still not a correct final null mask for the scatter result.
Expand Down
24 changes: 22 additions & 2 deletions cpp/include/cudf/null_mask.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#pragma once

#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/device_buffer.hpp>
Expand Down Expand Up @@ -80,13 +81,15 @@ size_type num_bitmask_words(size_type number_of_bits);
*
* @param size The number of elements to be represented by the mask
* @param state The desired state of the mask
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned device_buffer
* @return A `device_buffer` for use as a null bitmask
* satisfying the desired size and state
*/
rmm::device_buffer create_null_mask(
size_type size,
mask_state state,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -100,8 +103,13 @@ rmm::device_buffer create_null_mask(
* @param begin_bit Index of the first bit to set (inclusive)
* @param end_bit Index of the last bit to set (exclusive)
* @param valid If true set all entries to valid; otherwise, set all to null
* @param stream CUDA stream used for device memory operations and kernel launches
*/
void set_null_mask(bitmask_type* bitmask, size_type begin_bit, size_type end_bit, bool valid);
void set_null_mask(bitmask_type* bitmask,
size_type begin_bit,
size_type end_bit,
bool valid,
rmm::cuda_stream_view stream = cudf::get_default_stream());

/**
* @brief Creates a `device_buffer` from a slice of bitmask defined by a range
Expand All @@ -115,6 +123,7 @@ void set_null_mask(bitmask_type* bitmask, size_type begin_bit, size_type end_bit
* @param mask Bitmask residing in device memory whose bits will be copied
* @param begin_bit Index of the first bit to be copied (inclusive)
* @param end_bit Index of the last bit to be copied (exclusive)
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned device_buffer
* @return A `device_buffer` containing the bits
* `[begin_bit, end_bit)` from `mask`.
Expand All @@ -123,6 +132,7 @@ rmm::device_buffer copy_bitmask(
bitmask_type const* mask,
size_type begin_bit,
size_type end_bit,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -132,12 +142,14 @@ rmm::device_buffer copy_bitmask(
* Returns empty `device_buffer` if the column is not nullable
*
* @param view Column view whose bitmask needs to be copied
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned device_buffer
* @return A `device_buffer` containing the bits
* `[view.offset(), view.offset() + view.size())` from `view`'s bitmask.
*/
rmm::device_buffer copy_bitmask(
column_view const& view,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -148,11 +160,13 @@ rmm::device_buffer copy_bitmask(
* If no column in the table is nullable, an empty bitmask is returned.
*
* @param view The table of columns
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned device_buffer
* @return A pair of resulting bitmask and count of unset bits
*/
std::pair<rmm::device_buffer, size_type> bitmask_and(
table_view const& view,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -163,11 +177,13 @@ std::pair<rmm::device_buffer, size_type> bitmask_and(
* If no column in the table is nullable, an empty bitmask is returned.
*
* @param view The table of columns
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned device_buffer
* @return A pair of resulting bitmask and count of unset bits
*/
std::pair<rmm::device_buffer, size_type> bitmask_or(
table_view const& view,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand All @@ -183,8 +199,12 @@ std::pair<rmm::device_buffer, size_type> bitmask_or(
* @param bitmask Validity bitmask residing in device memory.
* @param start Index of the first bit to count (inclusive).
* @param stop Index of the last bit to count (exclusive).
* @param stream CUDA stream used for device memory operations and kernel launches
* @return The number of null elements in the specified range.
*/
cudf::size_type null_count(bitmask_type const* bitmask, size_type start, size_type stop);
cudf::size_type null_count(bitmask_type const* bitmask,
size_type start,
size_type stop,
rmm::cuda_stream_view stream = cudf::get_default_stream());
/** @} */ // end of group
} // namespace cudf
2 changes: 1 addition & 1 deletion cpp/src/binaryop/binaryop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ std::unique_ptr<column> binary_operation(column_view const& lhs,

CUDF_EXPECTS((lhs.size() == rhs.size()), "Column sizes don't match");

auto [new_mask, null_count] = bitmask_and(table_view({lhs, rhs}), stream, mr);
auto [new_mask, null_count] = cudf::detail::bitmask_and(table_view({lhs, rhs}), stream, mr);
auto out =
make_fixed_width_column(output_type, lhs.size(), std::move(new_mask), null_count, stream, mr);

Expand Down
38 changes: 28 additions & 10 deletions cpp/src/bitmask/null_mask.cu
Original file line number Diff line number Diff line change
Expand Up @@ -157,16 +157,21 @@ void set_null_mask(bitmask_type* bitmask,
// Create a device_buffer for a null mask
rmm::device_buffer create_null_mask(size_type size,
mask_state state,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return detail::create_null_mask(size, state, cudf::get_default_stream(), mr);
return detail::create_null_mask(size, state, stream, mr);
}

// Set pre-allocated null mask of given bit range [begin_bit, end_bit) to valid, if valid==true,
// or null, otherwise;
void set_null_mask(bitmask_type* bitmask, size_type begin_bit, size_type end_bit, bool valid)
void set_null_mask(bitmask_type* bitmask,
size_type begin_bit,
size_type end_bit,
bool valid,
rmm::cuda_stream_view stream)
{
return detail::set_null_mask(bitmask, begin_bit, end_bit, valid, cudf::get_default_stream());
return detail::set_null_mask(bitmask, begin_bit, end_bit, valid, stream);
}

namespace detail {
Expand Down Expand Up @@ -511,33 +516,46 @@ std::pair<rmm::device_buffer, size_type> bitmask_or(table_view const& view,
rmm::device_buffer copy_bitmask(bitmask_type const* mask,
size_type begin_bit,
size_type end_bit,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return detail::copy_bitmask(mask, begin_bit, end_bit, cudf::get_default_stream(), mr);
CUDF_FUNC_RANGE();
return detail::copy_bitmask(mask, begin_bit, end_bit, stream, mr);
}

// Create a bitmask from a column view
rmm::device_buffer copy_bitmask(column_view const& view, rmm::mr::device_memory_resource* mr)
rmm::device_buffer copy_bitmask(column_view const& view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return detail::copy_bitmask(view, cudf::get_default_stream(), mr);
CUDF_FUNC_RANGE();
return detail::copy_bitmask(view, stream, mr);
}

std::pair<rmm::device_buffer, size_type> bitmask_and(table_view const& view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return detail::bitmask_and(view, cudf::get_default_stream(), mr);
CUDF_FUNC_RANGE();
return detail::bitmask_and(view, stream, mr);
}

std::pair<rmm::device_buffer, size_type> bitmask_or(table_view const& view,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return detail::bitmask_or(view, cudf::get_default_stream(), mr);
CUDF_FUNC_RANGE();
return detail::bitmask_or(view, stream, mr);
}

// Count non-zero bits in the specified range
cudf::size_type null_count(bitmask_type const* bitmask, size_type start, size_type stop)
cudf::size_type null_count(bitmask_type const* bitmask,
size_type start,
size_type stop,
rmm::cuda_stream_view stream)
{
return detail::null_count(bitmask, start, stop, cudf::get_default_stream());
CUDF_FUNC_RANGE();
return detail::null_count(bitmask, start, stop, stream);
}

} // namespace cudf
2 changes: 1 addition & 1 deletion cpp/src/copying/concatenate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,7 @@ rmm::device_buffer concatenate_masks(host_span<column_view const> views,
});

rmm::device_buffer null_mask =
create_null_mask(total_element_count, mask_state::UNINITIALIZED, mr);
cudf::detail::create_null_mask(total_element_count, mask_state::UNINITIALIZED, stream, mr);

detail::concatenate_masks(views, static_cast<bitmask_type*>(null_mask.data()), stream);

Expand Down
5 changes: 3 additions & 2 deletions cpp/src/copying/scatter.cu
Original file line number Diff line number Diff line change
Expand Up @@ -268,8 +268,9 @@ struct column_scalar_scatterer_impl<struct_view, MapIterator> {

// Compute null mask
rmm::device_buffer null_mask =
target.nullable() ? copy_bitmask(target, stream, mr)
: create_null_mask(target.size(), mask_state::UNALLOCATED, stream, mr);
target.nullable()
? detail::copy_bitmask(target, stream, mr)
: detail::create_null_mask(target.size(), mask_state::UNALLOCATED, stream, mr);
column null_mask_stub(data_type{type_id::STRUCT},
target.size(),
rmm::device_buffer{},
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/groupby/hash/groupby.cu
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,8 @@ void sparse_to_dense_results(table_view const& keys,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto row_bitmask = bitmask_and(keys, stream, rmm::mr::get_current_device_resource()).first;
auto row_bitmask =
cudf::detail::bitmask_and(keys, stream, rmm::mr::get_current_device_resource()).first;
bool skip_key_rows_with_nulls = keys_have_nulls and include_null_keys == null_policy::EXCLUDE;
bitmask_type const* row_bitmask_ptr =
skip_key_rows_with_nulls ? static_cast<bitmask_type*>(row_bitmask.data()) : nullptr;
Expand Down
16 changes: 9 additions & 7 deletions cpp/src/lists/contains.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include <cudf/column/column_factories.hpp>
#include <cudf/detail/iterator.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/valid_if.cuh>
#include <cudf/lists/detail/contains.hpp>
#include <cudf/lists/detail/lists_column_factories.hpp>
Expand Down Expand Up @@ -274,12 +275,13 @@ std::unique_ptr<column> index_of(lists_column_view const& lists,
rmm::mr::device_memory_resource* mr)
{
if (!search_key.is_valid(stream)) {
return make_numeric_column(data_type{cudf::type_to_id<size_type>()},
lists.size(),
cudf::create_null_mask(lists.size(), mask_state::ALL_NULL, mr),
lists.size(),
stream,
mr);
return make_numeric_column(
data_type{cudf::type_to_id<size_type>()},
lists.size(),
cudf::detail::create_null_mask(lists.size(), mask_state::ALL_NULL, stream, mr),
lists.size(),
stream,
mr);
}
if (lists.size() == 0) {
return make_numeric_column(
Expand Down Expand Up @@ -337,7 +339,7 @@ std::unique_ptr<column> contains_nulls(lists_column_view const& lists,
auto const lists_cv = lists.parent();
auto output = make_numeric_column(data_type{type_to_id<bool>()},
lists.size(),
copy_bitmask(lists_cv, stream, mr),
cudf::detail::copy_bitmask(lists_cv, stream, mr),
lists_cv.null_count(),
stream,
mr);
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/merge/merge.cu
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ std::unique_ptr<column> column_merger::operator()<cudf::struct_view>(
// materialize the output buffer
rmm::device_buffer validity =
lcol.has_nulls() || rcol.has_nulls()
? create_null_mask(merged_size, mask_state::UNINITIALIZED, stream, mr)
? detail::create_null_mask(merged_size, mask_state::UNINITIALIZED, stream, mr)
: rmm::device_buffer{};
if (lcol.has_nulls() || rcol.has_nulls()) {
materialize_bitmask(lcol,
Expand Down
16 changes: 12 additions & 4 deletions cpp/src/round/round.cu
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,12 @@ std::unique_ptr<column> round_with(column_view const& input,
if (decimal_places >= 0 && std::is_integral_v<T>)
return std::make_unique<cudf::column>(input, stream, mr);

auto result = cudf::make_fixed_width_column(
input.type(), input.size(), copy_bitmask(input, stream, mr), input.null_count(), stream, mr);
auto result = cudf::make_fixed_width_column(input.type(),
input.size(),
detail::copy_bitmask(input, stream, mr),
input.null_count(),
stream,
mr);

auto out_view = result->mutable_view();
T const n = std::pow(10, std::abs(decimal_places));
Expand Down Expand Up @@ -256,8 +260,12 @@ std::unique_ptr<column> round_with(column_view const& input,
if (input.type().scale() > -decimal_places)
return cudf::detail::cast(input, result_type, stream, mr);

auto result = cudf::make_fixed_width_column(
result_type, input.size(), copy_bitmask(input, stream, mr), input.null_count(), stream, mr);
auto result = cudf::make_fixed_width_column(result_type,
input.size(),
detail::copy_bitmask(input, stream, mr),
input.null_count(),
stream,
mr);

auto out_view = result->mutable_view();

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/search/contains_column.cu
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ struct contains_column_dispatch {
stream,
mr);
return std::make_unique<column>(
std::move(result_v), copy_bitmask(needles, stream, mr), needles.null_count());
std::move(result_v), detail::copy_bitmask(needles, stream, mr), needles.null_count());
}
};

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/replace/multi.cu
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ std::unique_ptr<column> replace_character_parallel(strings_column_view const& in
std::move(offsets),
std::move(chars->release().children.back()),
input.null_count(),
copy_bitmask(input.parent(), stream, mr));
cudf::detail::copy_bitmask(input.parent(), stream, mr));
}

/**
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/strings/split/split_re.cu
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ std::unique_ptr<column> split_record_re(strings_column_view const& input,
std::move(offsets),
std::move(strings_output),
input.null_count(),
copy_bitmask(input.parent(), stream, mr),
cudf::detail::copy_bitmask(input.parent(), stream, mr),
stream,
mr);
}
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/strings/split/split_record.cu
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ std::unique_ptr<column> split_record_fn(strings_column_view const& input,
std::move(offsets),
std::move(results),
input.null_count(),
copy_bitmask(input.parent(), stream, mr),
cudf::detail::copy_bitmask(input.parent(), stream, mr),
stream,
mr);
}
Expand All @@ -72,7 +72,7 @@ std::unique_ptr<column> split_record_fn(strings_column_view const& input,
std::move(offsets),
std::move(strings_child),
input.null_count(),
copy_bitmask(input.parent(), stream, mr),
cudf::detail::copy_bitmask(input.parent(), stream, mr),
stream,
mr);
}
Expand Down Expand Up @@ -160,7 +160,7 @@ std::unique_ptr<column> whitespace_split_record_fn(strings_column_view const& in
std::move(offsets),
std::move(strings_output),
input.null_count(),
copy_bitmask(input.parent(), stream, mr),
cudf::detail::copy_bitmask(input.parent(), stream, mr),
stream,
mr);
}
Expand Down
Loading

0 comments on commit 0341bb7

Please sign in to comment.