Skip to content

Commit ffb417a

Browse files
committed
Merge branch 'cccl-3.1.x' of github.com:bdice/cudf into cccl-3.1.x
2 parents ea1e63c + 7045bbd commit ffb417a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+1781
-1282
lines changed

conda/environments/all_cuda-129_arch-aarch64.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ dependencies:
7474
- pytest-benchmark
7575
- pytest-cases>=3.8.2
7676
- pytest-cov
77+
- pytest-httpserver
7778
- pytest-rerunfailures!=16.0.0
7879
- pytest-xdist
7980
- python-confluent-kafka>=2.8.0,<2.9.0a0

conda/environments/all_cuda-129_arch-x86_64.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ dependencies:
7575
- pytest-benchmark
7676
- pytest-cases>=3.8.2
7777
- pytest-cov
78+
- pytest-httpserver
7879
- pytest-rerunfailures!=16.0.0
7980
- pytest-xdist
8081
- python-confluent-kafka>=2.8.0,<2.9.0a0

conda/environments/all_cuda-130_arch-aarch64.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ dependencies:
7474
- pytest-benchmark
7575
- pytest-cases>=3.8.2
7676
- pytest-cov
77+
- pytest-httpserver
7778
- pytest-rerunfailures!=16.0.0
7879
- pytest-xdist
7980
- python-confluent-kafka>=2.8.0,<2.9.0a0

conda/environments/all_cuda-130_arch-x86_64.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ dependencies:
7575
- pytest-benchmark
7676
- pytest-cases>=3.8.2
7777
- pytest-cov
78+
- pytest-httpserver
7879
- pytest-rerunfailures!=16.0.0
7980
- pytest-xdist
8081
- python-confluent-kafka>=2.8.0,<2.9.0a0

cpp/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,9 @@ include(cmake/thirdparty/get_cccl.cmake)
292292
# find rmm
293293
include(cmake/thirdparty/get_rmm.cmake)
294294

295+
# find croaring
296+
include(cmake/thirdparty/get_croaring.cmake)
297+
295298
# find flatbuffers
296299
include(cmake/thirdparty/get_flatbuffers.cmake)
297300

@@ -536,6 +539,7 @@ add_library(
536539
src/io/parquet/compact_protocol_writer.cpp
537540
src/io/parquet/decode_preprocess.cu
538541
src/io/parquet/experimental/dictionary_page_filter.cu
542+
src/io/parquet/experimental/deletion_vectors.cu
539543
src/io/parquet/experimental/hybrid_scan.cpp
540544
src/io/parquet/experimental/hybrid_scan_chunking.cu
541545
src/io/parquet/experimental/hybrid_scan_helpers.cpp

cpp/benchmarks/CMakeLists.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,19 @@ ConfigureNVBench(
329329
PARQUET_EXPERIMENTAL_READER_NVBENCH io/parquet/experimental/parquet_dictionary_page_filter.cpp
330330
)
331331

332+
# ##################################################################################################
333+
# * parquet deletion vector benchmark
334+
# ----------------------------------------------------------------------
335+
ConfigureNVBench(
336+
PARQUET_DELETION_VECTORS_NVBENCH io/parquet/experimental/parquet_deletion_vectors.cpp
337+
)
338+
target_compile_definitions(
339+
PARQUET_DELETION_VECTORS_NVBENCH
340+
PRIVATE DISABLENEON=1 ROARING_DISABLE_X64=1 ROARING_DISABLE_AVX=1
341+
CROARING_COMPILER_SUPPORTS_AVX512=0
342+
)
343+
target_link_libraries(PARQUET_DELETION_VECTORS_NVBENCH PRIVATE roaring)
344+
332345
# ##################################################################################################
333346
# * parquet multithread reader benchmark
334347
# ----------------------------------------------------------------------
Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
/*
2+
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include <benchmarks/common/generate_input.hpp>
18+
#include <benchmarks/fixture/benchmark_fixture.hpp>
19+
#include <benchmarks/io/cuio_common.hpp>
20+
#include <benchmarks/io/nvbench_helpers.hpp>
21+
22+
#include <cudf/io/experimental/deletion_vectors.hpp>
23+
#include <cudf/io/parquet.hpp>
24+
#include <cudf/utilities/default_stream.hpp>
25+
26+
#include <nvbench/nvbench.cuh>
27+
#include <roaring/roaring64.h>
28+
29+
#include <random>
30+
31+
namespace {
32+
/**
33+
* @brief Serializes a roaring64 bitmap to a vector of cuda::std::byte
34+
*
35+
* @param deletion_vector Pointer to the roaring64 bitmap to serialize
36+
*
37+
* @return Host vector of bytes containing the serialized roaring64 bitmap
38+
*/
39+
auto serialize_roaring_bitmap(roaring64_bitmap_t const* roaring_bitmap)
40+
{
41+
auto const num_bytes = roaring64_bitmap_portable_size_in_bytes(roaring_bitmap);
42+
CUDF_EXPECTS(num_bytes > 0, "Roaring64 bitmap is empty");
43+
auto serialized_bitmap = thrust::host_vector<cuda::std::byte>(num_bytes);
44+
std::ignore = roaring64_bitmap_portable_serialize(
45+
roaring_bitmap, reinterpret_cast<char*>(serialized_bitmap.data()));
46+
return serialized_bitmap;
47+
}
48+
49+
/**
50+
* @brief Builds a host vector of expected row indices from the specified row group offsets and
51+
* row counts
52+
*
53+
* @param row_group_offsets Row group offsets
54+
* @param row_group_num_rows Number of rows in each row group
55+
* @param num_rows Total number of table rows
56+
*
57+
* @return Host vector of expected row indices
58+
*/
59+
auto build_row_indices(cudf::host_span<size_t const> row_group_offsets,
60+
cudf::host_span<cudf::size_type const> row_group_num_rows,
61+
cudf::size_type num_rows)
62+
{
63+
auto const num_row_groups = static_cast<cudf::size_type>(row_group_num_rows.size());
64+
65+
// Row group span offsets
66+
auto row_group_span_offsets = thrust::host_vector<cudf::size_type>(num_row_groups + 1);
67+
row_group_span_offsets[0] = 0;
68+
thrust::inclusive_scan(
69+
row_group_num_rows.begin(), row_group_num_rows.end(), row_group_span_offsets.begin() + 1);
70+
71+
// Expected row indices data
72+
auto expected_row_indices = thrust::host_vector<size_t>(num_rows);
73+
std::fill(expected_row_indices.begin(), expected_row_indices.end(), 1);
74+
75+
// Scatter row group row offsets to expected row indices
76+
thrust::scatter(row_group_offsets.begin(),
77+
row_group_offsets.end(),
78+
row_group_span_offsets.begin(),
79+
expected_row_indices.begin());
80+
81+
// Inclusive scan to compute the rest of the expected row indices
82+
std::for_each(
83+
thrust::counting_iterator(0), thrust::counting_iterator(num_row_groups), [&](auto i) {
84+
auto start_row_index = row_group_span_offsets[i];
85+
auto end_row_index = row_group_span_offsets[i + 1];
86+
thrust::inclusive_scan(expected_row_indices.begin() + start_row_index,
87+
expected_row_indices.begin() + end_row_index,
88+
expected_row_indices.begin() + start_row_index);
89+
});
90+
91+
return expected_row_indices;
92+
}
93+
94+
/**
95+
* @brief Builds a roaring64 deletion vector and a (host) row mask vector based on the specified
96+
* probability of a row being deleted
97+
*
98+
* @param row_group_offsets Row group row offsets
99+
* @param row_group_num_rows Number of rows in each row group
100+
* @param num_rows Number of rows in the table
101+
* @param deletion_probability The probability of a row being deleted
102+
*
103+
* @return Serialized roaring64 bitmap buffer
104+
*/
105+
auto build_deletion_vector(cudf::host_span<size_t const> row_group_offsets,
106+
cudf::host_span<cudf::size_type const> row_group_num_rows,
107+
cudf::size_type num_rows,
108+
float deletion_probability)
109+
{
110+
std::mt19937 engine{0xbaLL};
111+
std::bernoulli_distribution dist(deletion_probability);
112+
113+
auto row_indices = build_row_indices(row_group_offsets, row_group_num_rows, num_rows);
114+
115+
CUDF_EXPECTS(std::cmp_equal(row_indices.size(), num_rows),
116+
"Row indices vector must have the same number of rows as the table");
117+
118+
auto input_row_mask = thrust::host_vector<bool>(num_rows);
119+
std::generate(input_row_mask.begin(), input_row_mask.end(), [&]() { return dist(engine); });
120+
121+
auto deletion_vector = roaring64_bitmap_create();
122+
123+
// Context for the roaring64 bitmap for faster (bulk) add operations
124+
auto roaring64_context =
125+
roaring64_bulk_context_t{.high_bytes = {0, 0, 0, 0, 0, 0}, .leaf = nullptr};
126+
127+
std::for_each(thrust::counting_iterator<size_t>(0),
128+
thrust::counting_iterator<size_t>(num_rows),
129+
[&](auto row_idx) {
130+
// Insert provided host row index if the row is deleted in the row mask
131+
if (not input_row_mask[row_idx]) {
132+
roaring64_bitmap_add_bulk(
133+
deletion_vector, &roaring64_context, row_indices[row_idx]);
134+
}
135+
});
136+
137+
return serialize_roaring_bitmap(deletion_vector);
138+
}
139+
140+
auto setup_table_and_deletion_vector(nvbench::state& state)
141+
{
142+
auto const num_columns = static_cast<cudf::size_type>(state.get_int64("num_cols"));
143+
auto const rows_per_row_group =
144+
static_cast<cudf::size_type>(state.get_int64("rows_per_row_group"));
145+
auto const num_row_groups = static_cast<cudf::size_type>(state.get_int64("num_row_groups"));
146+
auto const deletion_probability = static_cast<float>(state.get_float64("deletion_probability"));
147+
auto const source_type = retrieve_io_type_enum(state.get_string("io_type"));
148+
auto const num_rows = rows_per_row_group * num_row_groups;
149+
150+
cuio_source_sink_pair source_sink(source_type);
151+
152+
// Create a table and write it to parquet sink
153+
{
154+
auto const d_types = std::vector<cudf::type_id>{
155+
cudf::type_id::FLOAT64,
156+
cudf::type_id::DURATION_MICROSECONDS,
157+
cudf::type_id::TIMESTAMP_MILLISECONDS,
158+
cudf::type_id::STRING,
159+
};
160+
161+
auto const table = create_random_table(cycle_dtypes(d_types, num_columns),
162+
row_count{num_rows},
163+
data_profile_builder().null_probability(0.10),
164+
0xbad);
165+
cudf::io::parquet_writer_options write_opts =
166+
cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), table->view())
167+
.row_group_size_rows(rows_per_row_group)
168+
.compression(cudf::io::compression_type::NONE);
169+
cudf::io::write_parquet(write_opts);
170+
}
171+
172+
// Row offsets for each row group - arbitrary, only used to build the index column
173+
auto row_group_offsets = thrust::host_vector<size_t>(num_row_groups);
174+
row_group_offsets[0] = static_cast<size_t>(std::llround(2e9));
175+
std::for_each(
176+
thrust::counting_iterator<size_t>(1),
177+
thrust::counting_iterator<size_t>(num_row_groups),
178+
[&](auto i) { row_group_offsets[i] = std::llround(row_group_offsets[i - 1] + 0.5e9); });
179+
180+
// Row group splits
181+
auto row_group_splits = thrust::host_vector<cudf::size_type>(num_row_groups - 1);
182+
{
183+
std::mt19937 engine{0xf00d};
184+
std::uniform_int_distribution<cudf::size_type> dist{1, num_rows};
185+
std::generate(row_group_splits.begin(), row_group_splits.end(), [&]() { return dist(engine); });
186+
std::sort(row_group_splits.begin(), row_group_splits.end());
187+
}
188+
189+
// Number of rows in each row group
190+
auto row_group_num_rows = thrust::host_vector<cudf::size_type>{};
191+
{
192+
row_group_num_rows.reserve(num_row_groups);
193+
auto previous_split = cudf::size_type{0};
194+
std::transform(row_group_splits.begin(),
195+
row_group_splits.end(),
196+
std::back_inserter(row_group_num_rows),
197+
[&](auto current_split) {
198+
auto current_split_size = current_split - previous_split;
199+
previous_split = current_split;
200+
return current_split_size;
201+
});
202+
row_group_num_rows.push_back(num_rows - row_group_splits.back());
203+
}
204+
205+
auto deletion_vector =
206+
build_deletion_vector(row_group_offsets, row_group_num_rows, num_rows, deletion_probability);
207+
208+
return std::tuple{std::move(source_sink),
209+
std::move(row_group_offsets),
210+
std::move(row_group_num_rows),
211+
std::move(deletion_vector)};
212+
}
213+
214+
} // namespace
215+
216+
void BM_parquet_deletion_vectors(nvbench::state& state)
217+
{
218+
auto const num_row_groups = static_cast<cudf::size_type>(state.get_int64("num_row_groups"));
219+
auto const rows_per_row_group =
220+
static_cast<cudf::size_type>(state.get_int64("rows_per_row_group"));
221+
auto const num_rows = rows_per_row_group * num_row_groups;
222+
223+
auto [source_sink, row_group_offsets, row_group_num_rows, deletion_vector] =
224+
setup_table_and_deletion_vector(state);
225+
226+
cudf::io::parquet_reader_options read_opts =
227+
cudf::io::parquet_reader_options::builder(source_sink.make_source_info());
228+
229+
auto mem_stats_logger = cudf::memory_stats_logger();
230+
state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
231+
state.exec(
232+
nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) {
233+
try_drop_l3_cache();
234+
235+
timer.start();
236+
auto const result = cudf::io::parquet::experimental::read_parquet_and_apply_deletion_vector(
237+
read_opts, deletion_vector, row_group_offsets, row_group_num_rows);
238+
timer.stop();
239+
});
240+
241+
auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
242+
state.add_element_count(static_cast<double>(num_rows) / time, "rows_per_second");
243+
state.add_buffer_size(
244+
mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
245+
state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
246+
}
247+
248+
NVBENCH_BENCH(BM_parquet_deletion_vectors)
249+
.set_name("parquet_deletion_vectors")
250+
.set_min_samples(4)
251+
.add_int64_power_of_two_axis("num_row_groups", nvbench::range(4, 14, 2))
252+
.add_int64_axis("rows_per_row_group", {5'000, 10'000})
253+
.add_string_axis("io_type", {"DEVICE_BUFFER"})
254+
.add_float64_axis("deletion_probability", {0.15, 0.5, 0.75})
255+
.add_int64_axis("num_cols", {4});
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# =============================================================================
2+
# Copyright (c) 2025, NVIDIA CORPORATION.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
5+
# in compliance with the License. You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software distributed under the License
10+
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
11+
# or implied. See the License for the specific language governing permissions and limitations under
12+
# the License.
13+
# =============================================================================
14+
15+
# Use CPM to clone CRoaring and set up the necessary targets and include directories.
16+
function(find_and_configure_roaring VERSION)
17+
rapids_cpm_find(
18+
roaring ${VERSION}
19+
GLOBAL_TARGETS roaring
20+
CPM_ARGS
21+
GIT_REPOSITORY https://github.com/RoaringBitmap/CRoaring.git
22+
GIT_TAG v${VERSION}
23+
GIT_SHALLOW TRUE
24+
OPTIONS "ROARING_BUILD_STATIC ON"
25+
"BUILD_SHARED_LIBS OFF"
26+
"ENABLE_ROARING_TESTS OFF"
27+
"ENABLE_ROARING_MICROBENCHMARKS OFF"
28+
"ROARING_DISABLE_NEON ON"
29+
"ROARING_DISABLE_X64 ON"
30+
"ROARING_DISABLE_AVX2 ON"
31+
"ROARING_DISABLE_AVX512 ON"
32+
)
33+
if(roaring_ADDED)
34+
set_target_properties(roaring PROPERTIES POSITION_INDEPENDENT_CODE ON)
35+
endif()
36+
37+
if(DEFINED roaring_SOURCE_DIR)
38+
set(roaring_INCLUDE_DIR
39+
"${roaring_SOURCE_DIR}"
40+
PARENT_SCOPE
41+
)
42+
endif()
43+
44+
endfunction()
45+
46+
set(roaring_VERSION_cudf "4.3.11")
47+
find_and_configure_roaring(${roaring_VERSION_cudf})

0 commit comments

Comments
 (0)