-
Notifications
You must be signed in to change notification settings - Fork 709
Implement bound truncation in parquet writer #29164
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
Changes from 3 commits
6791d83
81b6f4d
3abadda
6262bca
b004a33
af2da8f
6cccc78
96e9f3e
9ce1d1a
c3837e1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -14,12 +14,18 @@ | |
| #include "bytes/details/io_fragment.h" | ||
|
|
||
| #include <iterator> | ||
| #include <type_traits> | ||
|
|
||
| // See io_iterator_consumer for iterator validity notes. | ||
| namespace details { | ||
| class io_byte_iterator { | ||
|
|
||
| template<bool Forward> | ||
| class io_byte_iterator_base { | ||
| public: | ||
| using io_const_iterator = io_fragment_list::const_iterator; | ||
| using io_const_iterator = std::conditional_t< | ||
| Forward, | ||
| io_fragment_list::const_iterator, | ||
| io_fragment_list::const_reverse_iterator>; | ||
|
|
||
| // iterator_traits | ||
| using difference_type = void; | ||
|
|
@@ -28,24 +34,35 @@ class io_byte_iterator { | |
| using reference = const char&; | ||
| using iterator_category = std::forward_iterator_tag; | ||
|
|
||
| io_byte_iterator( | ||
| io_byte_iterator_base( | ||
| const io_const_iterator& begin, const io_const_iterator& end) noexcept | ||
| : _frag(begin) | ||
| , _frag_end(end) { | ||
| if (_frag != _frag_end) { | ||
| _frag_index = _frag->get(); | ||
| // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) | ||
| _frag_index_end = _frag->get() + _frag->size(); | ||
| // handle an empty fragment | ||
| if (_frag_index == _frag_index_end) { | ||
| next_fragment(); | ||
| if constexpr (Forward) { | ||
| _frag_index = _frag->get(); | ||
| // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) | ||
| _frag_index_end = _frag->get() + _frag->size(); | ||
| // handle an empty fragment | ||
| if (_frag_index == _frag_index_end) { | ||
| next_fragment(); | ||
| } | ||
| } else { | ||
| auto frag_size = _frag->size(); | ||
| if (frag_size == 0) { | ||
| next_fragment(); | ||
| return; | ||
| } | ||
| // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) | ||
| _frag_index = _frag->get() + (_frag->size() - 1); | ||
| _frag_index_end = _frag->get(); | ||
| } | ||
| } else { | ||
| _frag_index = nullptr; | ||
| _frag_index_end = nullptr; | ||
| } | ||
| } | ||
| io_byte_iterator( | ||
| io_byte_iterator_base( | ||
| const io_const_iterator& begin, | ||
| const io_const_iterator& end, | ||
| const char* frag_index, | ||
|
|
@@ -59,20 +76,27 @@ class io_byte_iterator { | |
| reference operator*() const noexcept { return *_frag_index; } | ||
| pointer operator->() const noexcept { return _frag_index; } | ||
| /// true if pointing to the byte-value (not necessarily the same address) | ||
| bool operator==(const io_byte_iterator& o) const noexcept { | ||
| bool operator==(const io_byte_iterator_base& o) const noexcept { | ||
| return _frag_index == o._frag_index; | ||
| } | ||
| bool operator!=(const io_byte_iterator& o) const noexcept { | ||
| bool operator!=(const io_byte_iterator_base& o) const noexcept { | ||
| return !(*this == o); | ||
| } | ||
| io_byte_iterator& operator++() { | ||
| // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) | ||
| if (++_frag_index == _frag_index_end) { | ||
| next_fragment(); | ||
| io_byte_iterator_base& operator++() { | ||
| if constexpr (Forward) { | ||
| // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) | ||
| if (++_frag_index == _frag_index_end) { | ||
| next_fragment(); | ||
| } | ||
| } else { | ||
| // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) | ||
| if (_frag_index-- == _frag_index_end) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will form a "one before the start" pointer (when the condition is true), which is not allowed (yes, it's quite annoying). |
||
| next_fragment(); | ||
| } | ||
| } | ||
| return *this; | ||
| } | ||
| io_byte_iterator operator++(int) { | ||
| io_byte_iterator_base operator++(int) { | ||
| auto tmp = *this; | ||
| ++*this; | ||
| return tmp; | ||
|
|
@@ -83,12 +107,22 @@ class io_byte_iterator { | |
| while (true) { | ||
| ++_frag; | ||
| if (_frag != _frag_end) { | ||
| _frag_index = _frag->get(); | ||
| // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) | ||
| _frag_index_end = _frag->get() + _frag->size(); | ||
| // handle an empty fragment | ||
| if (_frag_index == _frag_index_end) { | ||
| continue; | ||
| if constexpr (Forward) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This block of logic should be DRY with the identical block of logic in the ctor, probably called "maybe_next_fragment", called in the ctor and after every increment/decrement type operation. |
||
| _frag_index = _frag->get(); | ||
| // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) | ||
| _frag_index_end = _frag->get() + _frag->size(); | ||
| // handle an empty fragment | ||
| if (_frag_index == _frag_index_end) { | ||
| continue; | ||
| } | ||
| } else { | ||
| auto frag_size = _frag->size(); | ||
| if (frag_size == 0) { | ||
| continue; | ||
| } | ||
| // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic) | ||
| _frag_index = _frag->get() + (frag_size - 1); | ||
| _frag_index_end = _frag->get(); | ||
| } | ||
| return; | ||
| } | ||
|
|
@@ -104,4 +138,7 @@ class io_byte_iterator { | |
| const char* _frag_index_end = nullptr; | ||
| }; | ||
|
|
||
| using io_byte_iterator = io_byte_iterator_base<true>; | ||
| using reverse_io_byte_iterator = io_byte_iterator_base<false>; | ||
|
|
||
| } // namespace details | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,7 +27,10 @@ class io_placeholder { | |
| , _byte_index(initial_index) | ||
| , _remaining_size(max_size_to_write) {} | ||
|
|
||
| [[gnu::always_inline]] void write(const char* src, size_t len) { | ||
| template<typename T> | ||
| [[gnu::always_inline]] void write(const T* src, size_t len) | ||
| requires(sizeof(T) == 1) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function needs a doc now that it's not entirely obvious what it is doing. It will do an element-wise copy from the src array to this placeholder, if T can be assigned to a char, right? |
||
| { | ||
| details::check_out_of_range(len, _remaining_size); | ||
| std::copy_n(src, len, mutable_index()); | ||
| _remaining_size -= len; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
iobuf_fuzz should probably be augemnted with reverse iterator cases