Skip to content

Commit e89b80c

Browse files
committed
serde/parquet: fix UB in schema_element
Previously I consistently saw the following UB crash: #0 0x798cfa38a419 in serde::parquet::(anonymous namespace)::record_shredder::process_group_node(serde::parquet::schema_element const*, serde::parquet::(anonymous namespace)::traversal_levels, std::__1::variant<serde::parquet::null_value, serde::parquet::boolean_value, serde::parquet::int32_value, serde::parquet::int64_value, serde::parquet::float32_value, serde::parquet::float64_value, serde::parquet::byte_array_value, serde::parquet::fixed_byte_array_value, fragmented_vector<serde::parquet::group_member, 18446744073709551615ul>, fragmented_vector<serde::parquet::repeated_element, 18446744073709551615ul> >) /home/awong/Repos/redpanda/src/v/serde/parquet/shredder.cc:145:9 #1 0x798cfa38a419 in serde::parquet::(anonymous namespace)::record_shredder::shred() (.resume) /home/awong/Repos/redpanda/src/v/serde/parquet/shredder.cc:60:26 #2 0x798d04e84f26 in std::__1::coroutine_handle<seastar::internal::coroutine_traits_base<void>::promise_type>::resume[abi:ne180100]() const /home/andrew/xfs/vbuild/llvm/3b5b5c1ec4a3095ab096dd780e84d7ab81f3d7ff/install/bin/../include/c++/v1/__coroutine/coroutine_handle.h:143:5 #3 0x798d04e84f26 in seastar::internal::coroutine_traits_base<void>::promise_type::run_and_dispose() /home/andrew/xfs/vbuild/redpanda/dev/debug/clang/rp_deps_install/include/seastar/core/coroutine.hh:125:20 redpanda-data#4 0x798cd42e82c6 in seastar::reactor::run_tasks(seastar::reactor::task_queue&) /home/andrew/xfs/vbuild/redpanda/dev/debug/clang/v_deps_build/seastar-prefix/src/seastar-build/../seastar/src/core/reactor.cc:2800:14 redpanda-data#5 0x798cd42ee05e in seastar::reactor::run_some_tasks() /home/andrew/xfs/vbuild/redpanda/dev/debug/clang/v_deps_build/seastar-prefix/src/seastar-build/../seastar/src/core/reactor.cc:3262:9 redpanda-data#6 0x798cd42f04d2 in seastar::reactor::do_run() /home/andrew/xfs/vbuild/redpanda/dev/debug/clang/v_deps_build/seastar-prefix/src/seastar-build/../seastar/src/core/reactor.cc:3445:9 redpanda-data#7 0x798cd4353ff3 in seastar::smp::configure(seastar::smp_options const&, seastar::reactor_options const&)::$_2::operator()() const /home/andrew/xfs/vbuild/redpanda/dev/debug/clang/v_deps_build/seastar-prefix/src/seastar-build/../seastar/src/core/reactor.cc:4702:22 redpanda-data#8 0x798cd4353ff3 in decltype(std::declval<seastar::smp::configure(seastar::smp_options const&, seastar::reactor_options const&)::$_2&>()()) std::__1::__invoke[abi:ne180100]<seastar::smp::configure(seastar::smp_options const&, seastar::reactor_options const&)::$_2&>(seastar::smp::configure(seastar::smp_options const&, seastar::reactor_options const&)::$_2&) /home/andrew/xfs/vbuild/llvm/3b5b5c1ec4a3095ab096dd780e84d7ab81f3d7ff/install/bin/../include/c++/v1/__type_traits/invoke.h:344:25 redpanda-data#9 0x798cd4353ff3 in void std::__1::__invoke_void_return_wrapper<void, true>::__call[abi:ne180100]<seastar::smp::configure(seastar::smp_options const&, seastar::reactor_options const&)::$_2&>(seastar::smp::configure(seastar::smp_options const&, seastar::reactor_options const&)::$_2&) /home/andrew/xfs/vbuild/llvm/3b5b5c1ec4a3095ab096dd780e84d7ab81f3d7ff/install/bin/../include/c++/v1/__type_traits/invoke.h:419:5 redpanda-data#10 0x798cd4353ff3 in std::__1::__function::__alloc_func<seastar::smp::configure(seastar::smp_options const&, seastar::reactor_options const&)::$_2, std::__1::allocator<seastar::smp::configure(seastar::smp_options const&, seastar::reactor_options const&)::$_2>, void ()>::operator()[abi:ne180100]() /home/andrew/xfs/vbuild/llvm/3b5b5c1ec4a3095ab096dd780e84d7ab81f3d7ff/install/bin/../include/c++/v1/__functional/function.h:169:12 redpanda-data#11 0x798cd4353ff3 in std::__1::__function::__func<seastar::smp::configure(seastar::smp_options const&, seastar::reactor_options const&)::$_2, std::__1::allocator<seastar::smp::configure(seastar::smp_options const&, seastar::reactor_options const&)::$_2>, void ()>::operator()() /home/andrew/xfs/vbuild/llvm/3b5b5c1ec4a3095ab096dd780e84d7ab81f3d7ff/install/bin/../include/c++/v1/__functional/function.h:311:10 redpanda-data#12 0x798cd41fb2eb in std::__1::__function::__value_func<void ()>::operator()[abi:ne180100]() const /home/andrew/xfs/vbuild/llvm/3b5b5c1ec4a3095ab096dd780e84d7ab81f3d7ff/install/bin/../include/c++/v1/__functional/function.h:428:12 redpanda-data#13 0x798cd41fb2eb in std::__1::function<void ()>::operator()() const /home/andrew/xfs/vbuild/llvm/3b5b5c1ec4a3095ab096dd780e84d7ab81f3d7ff/install/bin/../include/c++/v1/__functional/function.h:981:10 redpanda-data#14 0x798cd41fb2eb in seastar::posix_thread::start_routine(void*) /home/andrew/xfs/vbuild/redpanda/dev/debug/clang/v_deps_build/seastar-prefix/src/seastar-build/../seastar/src/core/posix.cc:90:5 redpanda-data#15 0x600bf2dbed78 in asan_thread_start(void*) /home/andrew/xfs/vbuild/llvm/3b5b5c1ec4a3095ab096dd780e84d7ab81f3d7ff/src/compiler-rt/lib/asan/asan_interceptors.cpp:239:28 redpanda-data#16 0x798cd2094ac2 (/home/andrew/xfs/vbuild/redpanda/dev/debug/clang/dist/local/redpanda/lib/libc.so.6+0x94ac2) (BuildId: 490fef8403240c91833978d494d39e537409b92e) redpanda-data#17 0x798cd212684f (/home/andrew/xfs/vbuild/redpanda/dev/debug/clang/dist/local/redpanda/lib/libc.so.6+0x12684f) (BuildId: 490fef8403240c91833978d494d39e537409b92e) SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior /home/awong/Repos/redpanda/src/v/serde/parquet/shredder.cc:136:18
1 parent d57236a commit e89b80c

File tree

2 files changed

+5
-3
lines changed

2 files changed

+5
-3
lines changed

src/v/datalake/schema_parquet.cc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,9 @@ struct type_converting_visitor {
130130

131131
serde::parquet::schema_element
132132
operator()(const iceberg::struct_type& type) {
133-
return struct_to_parquet(type);
133+
auto res = struct_to_parquet(type);
134+
res.repetition_type = map_repetition_type(required);
135+
return res;
134136
}
135137

136138
serde::parquet::schema_element operator()(const iceberg::list_type& type) {
@@ -210,6 +212,7 @@ struct_to_parquet(const iceberg::struct_type& schema) {
210212
field.path.emplace_back(f->name);
211213
res.children.push_back(std::move(field));
212214
}
215+
res.repetition_type = serde::parquet::field_repetition_type::required;
213216
return res;
214217
}
215218

@@ -218,7 +221,6 @@ struct_to_parquet(const iceberg::struct_type& schema) {
218221
serde::parquet::schema_element
219222
schema_to_parquet(const iceberg::struct_type& schema) {
220223
auto root = struct_to_parquet(schema);
221-
root.repetition_type = serde::parquet::field_repetition_type::required;
222224
root.path.emplace_back("root");
223225
return root;
224226
}

src/v/serde/parquet/schema.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ struct schema_element {
261261
*
262262
* This must be set to required on the schema root
263263
*/
264-
field_repetition_type repetition_type;
264+
field_repetition_type repetition_type{field_repetition_type::required};
265265

266266
/**
267267
* The full path of the node within the schema.

0 commit comments

Comments
 (0)