From 8c6bab6c50bbc9fb1a4f1d888602768995ad8ea8 Mon Sep 17 00:00:00 2001 From: Gu Haiyan Date: Mon, 27 Apr 2026 20:39:15 +0800 Subject: [PATCH 1/2] fix bits::copyBits coredump caused by negative leafNullsSize_ --- bolt/common/base/BitUtil.h | 38 ++++++++++++++++ bolt/dwio/parquet/reader/PageReader.cpp | 45 ++++++++++++++----- bolt/dwio/parquet/reader/PageReader.h | 27 ++++++++--- .../parquet/reader/RepeatedColumnReader.cpp | 10 +++-- .../parquet/reader/StructColumnReader.cpp | 5 ++- 5 files changed, 101 insertions(+), 24 deletions(-) diff --git a/bolt/common/base/BitUtil.h b/bolt/common/base/BitUtil.h index 338bb451c..41a31b89d 100644 --- a/bolt/common/base/BitUtil.h +++ b/bolt/common/base/BitUtil.h @@ -37,6 +37,7 @@ #include #include #include +#include #ifdef __BMI2__ #include @@ -125,10 +126,47 @@ constexpr inline uint64_t nbytes(int32_t bits) { return roundUp(bits, 8) / 8; } +constexpr inline uint64_t nbytes(int64_t bits) { + return bits <= 0 ? 0 : roundUp(static_cast(bits), 8) / 8; +} + +template , int> = 0> +constexpr inline uint64_t nbytes(T bits) { + if constexpr (std::is_signed_v) { + if (bits <= 0) { + return 0; + } + } else { + if (bits == 0) { + return 0; + } + } + return roundUp(static_cast(bits), 8) / 8; +} + constexpr inline uint64_t nwords(int32_t bits) { return roundUp(bits, 64) / 64; } +constexpr inline uint64_t nwords(int64_t bits) { + return bits <= 0 ? 0 + : roundUp(static_cast(bits), 64) / 64; +} + +template , int> = 0> +constexpr inline uint64_t nwords(T bits) { + if constexpr (std::is_signed_v) { + if (bits <= 0) { + return 0; + } + } else { + if (bits == 0) { + return 0; + } + } + return roundUp(static_cast(bits), 64) / 64; +} + inline int32_t getAndClearLastSetBit(uint16_t& bits) { int32_t trailingZeros = __builtin_ctz(bits); // erase last non-zero bit diff --git a/bolt/dwio/parquet/reader/PageReader.cpp b/bolt/dwio/parquet/reader/PageReader.cpp index 648a352c9..6022198ef 100644 --- a/bolt/dwio/parquet/reader/PageReader.cpp +++ b/bolt/dwio/parquet/reader/PageReader.cpp @@ -33,6 +33,8 @@ #include #include +#include + #include "bolt/dwio/common/BufferUtil.h" #include "bolt/dwio/common/ColumnVisitors.h" #include "bolt/dwio/parquet/reader/Decompression.h" @@ -290,13 +292,14 @@ void PageReader::readPageDefLevels() { nullptr, leafNulls_.data(), 0); - numRowsInPage_ = leafNullsSize_; + numRowsInPage_ = checkedInt64ToInt32(leafNullsSize_, "leafNullsSize_"); numLeafNullsConsumed_ = 0; } void PageReader::updateRowInfoAfterPageSkipped() { rowOfPage_ += numRowsInPage_; if (hasChunkRepDefs_) { + BOLT_CHECK_GE(rowOfPage_, 0); numLeafNullsConsumed_ = rowOfPage_; } } @@ -767,7 +770,7 @@ void PageReader::preloadPageRepDefs(const bool keepRepDefRawData) { leafNulls_.data(), leafNullsSize_); leafNullsSize_ += numLeaves; - numLeavesInPage_.push_back(numLeaves); + numLeavesInPage_.push_back(checkedInt64ToInt32(numLeaves, "numLeaves")); } return; } @@ -1025,20 +1028,20 @@ void PageReader::decodeRepDefsFromBuffer() { leafNulls_.data(), leafNullsSize_); leafNullsSize_ += numLeaves; - numLeavesInPage_.push_back(numLeaves); + numLeavesInPage_.push_back(checkedInt64ToInt32(numLeaves, "numLeaves")); } preloadedRepDefs_.pop_front(); } -int32_t PageReader::getLengthsAndNulls( +int64_t PageReader::getLengthsAndNulls( LevelMode mode, const arrow::LevelInfo& info, - int32_t begin, - int32_t end, - int32_t maxItems, + int64_t begin, + int64_t end, + int64_t maxItems, int32_t* lengths, uint64_t* nulls, - int32_t nullsStartIndex) const { + int64_t nullsStartIndex) const { arrow::ValidityBitmapInputOutput bits; bits.values_read_upper_bound = maxItems; bits.values_read = 0; @@ -1060,7 +1063,7 @@ int32_t PageReader::getLengthsAndNulls( &bits, lengths); // Convert offsets to lengths. - for (auto i = 0; i < bits.values_read; ++i) { + for (int64_t i = 0; i < bits.values_read; ++i) { lengths[i] = lengths[i + 1] - lengths[i]; } break; @@ -1153,6 +1156,7 @@ void PageReader::skip(int64_t numRows) { if (firstUnvisited_ + numRows >= rowOfPage_ + numRowsInPage_) { seekToPage(firstUnvisited_ + numRows); if (hasChunkRepDefs_) { + BOLT_CHECK_GE(rowOfPage_, 0); numLeafNullsConsumed_ = rowOfPage_; } toSkip -= rowOfPage_ - firstUnvisited_; @@ -1256,6 +1260,7 @@ PageReader::readNulls(int32_t numValues, BufferPtr& buffer) { buffer = nullptr; return nullptr; } + BOLT_CHECK_GE(numValues, 0); dwio::common::ensureCapacity(buffer, numValues, &pool_); if (isTopLevel_) { BOLT_CHECK_EQ(1, maxDefine_); @@ -1264,12 +1269,29 @@ PageReader::readNulls(int32_t numValues, BufferPtr& buffer) { numValues, buffer->asMutable(), &allOnes); return allOnes ? nullptr : buffer->as(); } + + const int64_t erasedBits = erasedLeafNullWords_ * 64; + const int64_t relativeConsumed = numLeafNullsConsumed_ - erasedBits; + BOLT_CHECK( + !leafNulls_.empty() && leafNullsSize_ >= 0 && + numLeafNullsConsumed_ >= erasedBits && relativeConsumed >= 0 && + relativeConsumed <= leafNullsSize_ && + relativeConsumed + numValues <= leafNullsSize_, + "invalid leafNulls range in readNulls(non-top): maxDefine_={} numValues={} numLeafNullsConsumed_={} erasedLeafNullWords_={} erasedBits={} relativeConsumed={} leafNullsSize_={} leafNullsWords={}", + maxDefine_, + numValues, + numLeafNullsConsumed_, + erasedLeafNullWords_, + erasedBits, + relativeConsumed, + leafNullsSize_, + leafNulls_.size()); bits::copyBits( leafNulls_.data(), - numLeafNullsConsumed_ - erasedLeafNullWords_ * 64, + static_cast(relativeConsumed), buffer->asMutable(), 0, - numValues); + static_cast(numValues)); numLeafNullsConsumed_ += numValues; return buffer->as(); } @@ -1298,6 +1320,7 @@ bool PageReader::rowsForPage( if (rowZero >= rowOfPage_ + numRowsInPage_) { seekToPage(rowZero); if (hasChunkRepDefs_) { + BOLT_CHECK_GE(rowOfPage_, 0); numLeafNullsConsumed_ = rowOfPage_; } } diff --git a/bolt/dwio/parquet/reader/PageReader.h b/bolt/dwio/parquet/reader/PageReader.h index 80e0a3e38..820c11881 100644 --- a/bolt/dwio/parquet/reader/PageReader.h +++ b/bolt/dwio/parquet/reader/PageReader.h @@ -30,6 +30,10 @@ #pragma once +#include +#include + +#include "bolt/common/base/Exceptions.h" #include "bolt/common/compression/Compression.h" #include "bolt/dwio/common/BitConcatenation.h" #include "bolt/dwio/common/DirectDecoder.h" @@ -47,10 +51,19 @@ #include namespace bytedance::bolt::parquet { - constexpr int16_t kNonPageOrdinal = static_cast(-1); constexpr uint32_t kDefaultMaxPageHeaderSize = 16 * 1024 * 1024; +inline int32_t checkedInt64ToInt32(int64_t value, const char* what) { + BOLT_CHECK( + value >= 0 && + value <= static_cast(std::numeric_limits::max()), + "{} out of int32_t range: {}", + what, + value); + return static_cast(value); +} + struct CryptoContext { CryptoContext( bool startWithDictionaryPage, @@ -131,15 +144,15 @@ class PageReader { /// to produce. 'lengths' is only filled for mode kList. 'nulls' is filled /// from bit position 'nullsStartIndex'. Returns the number of lengths/nulls /// filled. - int32_t getLengthsAndNulls( + int64_t getLengthsAndNulls( LevelMode mode, const arrow::LevelInfo& info, - int32_t begin, - int32_t end, - int32_t maxItems, + int64_t begin, + int64_t end, + int64_t maxItems, int32_t* FOLLY_NULLABLE lengths, uint64_t* FOLLY_NULLABLE nulls, - int32_t nullsStartIndex) const; + int64_t nullsStartIndex) const; /// Applies 'visitor' to values in the ColumnChunk of 'this'. The /// operation to perform and The operand rows are given by @@ -519,7 +532,7 @@ class PageReader { raw_vector repetitionLevels_; // Number of valid bits in 'leafNulls_' - int32_t leafNullsSize_{0}; + int64_t leafNullsSize_{0}; // Number of leaf nulls read. int64_t numLeafNullsConsumed_{0}; diff --git a/bolt/dwio/parquet/reader/RepeatedColumnReader.cpp b/bolt/dwio/parquet/reader/RepeatedColumnReader.cpp index 36163f998..e1619b30b 100644 --- a/bolt/dwio/parquet/reader/RepeatedColumnReader.cpp +++ b/bolt/dwio/parquet/reader/RepeatedColumnReader.cpp @@ -198,7 +198,7 @@ void MapColumnReader::setLengthsFromRepDefs(PageReader& pageReader) { memset(lengths->asMutable(), 0, lengths->size()); dwio::common::ensureCapacity( nullsInReadRange_, bits::nwords(numRepDefs), &memoryPool_); - auto numLists = pageReader.getLengthsAndNulls( + auto numLists64 = pageReader.getLengthsAndNulls( LevelMode::kList, levelInfo_, repDefRange.first, @@ -207,7 +207,8 @@ void MapColumnReader::setLengthsFromRepDefs(PageReader& pageReader) { lengths->asMutable(), nullsInReadRange()->asMutable(), 0); - lengths->setSize(numLists * sizeof(int32_t)); + const int32_t numLists = checkedInt64ToInt32(numLists64, "numLists"); + lengths->setSize(static_cast(numLists) * sizeof(int32_t)); formatData_->as().setNulls(nullsInReadRange(), numLists); setLengths(std::move(lengths)); } @@ -312,7 +313,7 @@ void ListColumnReader::setLengthsFromRepDefs(PageReader& pageReader) { memset(lengths->asMutable(), 0, lengths->size()); dwio::common::ensureCapacity( nullsInReadRange_, bits::nwords(numRepDefs), &memoryPool_); - auto numLists = pageReader.getLengthsAndNulls( + auto numLists64 = pageReader.getLengthsAndNulls( LevelMode::kList, levelInfo_, repDefRange.first, @@ -321,7 +322,8 @@ void ListColumnReader::setLengthsFromRepDefs(PageReader& pageReader) { lengths->asMutable(), nullsInReadRange()->asMutable(), 0); - lengths->setSize(numLists * sizeof(int32_t)); + const int32_t numLists = checkedInt64ToInt32(numLists64, "numLists"); + lengths->setSize(static_cast(numLists) * sizeof(int32_t)); formatData_->as().setNulls(nullsInReadRange(), numLists); setLengths(std::move(lengths)); } diff --git a/bolt/dwio/parquet/reader/StructColumnReader.cpp b/bolt/dwio/parquet/reader/StructColumnReader.cpp index c40424972..25adb296f 100644 --- a/bolt/dwio/parquet/reader/StructColumnReader.cpp +++ b/bolt/dwio/parquet/reader/StructColumnReader.cpp @@ -254,7 +254,7 @@ void StructColumnReader::setNullsFromRepDefs(PageReader& pageReader) { int32_t numRepDefs = repDefRange.second - repDefRange.first; dwio::common::ensureCapacity( nullsInReadRange_, bits::nwords(numRepDefs), &memoryPool_); - auto numStructs = pageReader.getLengthsAndNulls( + auto numStructs64 = pageReader.getLengthsAndNulls( levelMode_, levelInfo_, repDefRange.first, @@ -263,7 +263,8 @@ void StructColumnReader::setNullsFromRepDefs(PageReader& pageReader) { nullptr, nullsInReadRange()->asMutable(), 0); - formatData_->as().setNulls(nullsInReadRange(), numStructs); + formatData_->as().setNulls( + nullsInReadRange(), checkedInt64ToInt32(numStructs64, "numStructs")); } void StructColumnReader::filterRowGroups( From 10b62e58eac2c179bdbbca401c4e03a9c9d715c9 Mon Sep 17 00:00:00 2001 From: Gu Haiyan Date: Tue, 12 May 2026 14:45:27 +0800 Subject: [PATCH 2/2] fix review comments --- bolt/common/base/BitUtil.h | 35 +++-------------- bolt/common/base/tests/BitUtilTest.cpp | 1 + bolt/dwio/parquet/reader/PageReader.cpp | 39 ++++++++++--------- bolt/dwio/parquet/reader/PageReader.h | 22 ++--------- .../parquet/reader/RepeatedColumnReader.cpp | 10 ++--- .../parquet/reader/StructColumnReader.cpp | 5 +-- 6 files changed, 37 insertions(+), 75 deletions(-) diff --git a/bolt/common/base/BitUtil.h b/bolt/common/base/BitUtil.h index 41a31b89d..39a1d37e4 100644 --- a/bolt/common/base/BitUtil.h +++ b/bolt/common/base/BitUtil.h @@ -37,7 +37,6 @@ #include #include #include -#include #ifdef __BMI2__ #include @@ -126,25 +125,11 @@ constexpr inline uint64_t nbytes(int32_t bits) { return roundUp(bits, 8) / 8; } -constexpr inline uint64_t nbytes(int64_t bits) { - return bits <= 0 ? 0 : roundUp(static_cast(bits), 8) / 8; -} - -template , int> = 0> -constexpr inline uint64_t nbytes(T bits) { - if constexpr (std::is_signed_v) { - if (bits <= 0) { - return 0; - } - } else { - if (bits == 0) { - return 0; - } - } - return roundUp(static_cast(bits), 8) / 8; +constexpr inline uint64_t nwords(int32_t bits) { + return roundUp(bits, 64) / 64; } -constexpr inline uint64_t nwords(int32_t bits) { +constexpr inline uint64_t nwords(uint32_t bits) { return roundUp(bits, 64) / 64; } @@ -153,18 +138,8 @@ constexpr inline uint64_t nwords(int64_t bits) { : roundUp(static_cast(bits), 64) / 64; } -template , int> = 0> -constexpr inline uint64_t nwords(T bits) { - if constexpr (std::is_signed_v) { - if (bits <= 0) { - return 0; - } - } else { - if (bits == 0) { - return 0; - } - } - return roundUp(static_cast(bits), 64) / 64; +constexpr inline uint64_t nwords(uint64_t bits) { + return roundUp(bits, 64) / 64; } inline int32_t getAndClearLastSetBit(uint16_t& bits) { diff --git a/bolt/common/base/tests/BitUtilTest.cpp b/bolt/common/base/tests/BitUtilTest.cpp index 4c35280a5..4a8860e2f 100644 --- a/bolt/common/base/tests/BitUtilTest.cpp +++ b/bolt/common/base/tests/BitUtilTest.cpp @@ -228,6 +228,7 @@ TEST_F(BitUtilTest, nwords) { EXPECT_EQ(nwords(63), 1); EXPECT_EQ(nwords(64), 1); EXPECT_EQ(nwords(65), 2); + EXPECT_EQ(nwords(uint32_t{65}), 2); } TEST_F(BitUtilTest, setBits) { diff --git a/bolt/dwio/parquet/reader/PageReader.cpp b/bolt/dwio/parquet/reader/PageReader.cpp index 6022198ef..055e69c2e 100644 --- a/bolt/dwio/parquet/reader/PageReader.cpp +++ b/bolt/dwio/parquet/reader/PageReader.cpp @@ -33,8 +33,6 @@ #include #include -#include - #include "bolt/dwio/common/BufferUtil.h" #include "bolt/dwio/common/ColumnVisitors.h" #include "bolt/dwio/parquet/reader/Decompression.h" @@ -283,7 +281,7 @@ void PageReader::readPageDefLevels() { wideDefineDecoder_, "parquet read error with maxDefine = {}", maxDefine_); wideDefineDecoder_->GetBatch(definitionLevels_.data(), numRepDefsInPage_); leafNulls_.resize(bits::nwords(numRepDefsInPage_)); - leafNullsSize_ = getLengthsAndNulls( + numRowsInPage_ = getLengthsAndNulls( LevelMode::kNulls, leafInfo_, 0, @@ -292,7 +290,7 @@ void PageReader::readPageDefLevels() { nullptr, leafNulls_.data(), 0); - numRowsInPage_ = checkedInt64ToInt32(leafNullsSize_, "leafNullsSize_"); + leafNullsSize_ = numRowsInPage_; numLeafNullsConsumed_ = 0; } @@ -770,7 +768,7 @@ void PageReader::preloadPageRepDefs(const bool keepRepDefRawData) { leafNulls_.data(), leafNullsSize_); leafNullsSize_ += numLeaves; - numLeavesInPage_.push_back(checkedInt64ToInt32(numLeaves, "numLeaves")); + numLeavesInPage_.push_back(numLeaves); } return; } @@ -933,7 +931,7 @@ void PageReader::decodeRepDefsFromBuffer() { const auto& repDefData = preloadedRepDefs_.front(); const auto* rawData = repDefData.data(); constexpr int32_t WordBits = 64; - size_t erasedBits = erasedLeafNullWords_ * WordBits; + int64_t erasedBits = erasedLeafNullWords_ * WordBits; BOLT_CHECK_LE(numLeafNullsConsumed_, leafNullsSize_ + erasedBits); // clear consumed nulls if (numLeafNullsConsumed_ - erasedBits > WordBits) { @@ -1028,17 +1026,17 @@ void PageReader::decodeRepDefsFromBuffer() { leafNulls_.data(), leafNullsSize_); leafNullsSize_ += numLeaves; - numLeavesInPage_.push_back(checkedInt64ToInt32(numLeaves, "numLeaves")); + numLeavesInPage_.push_back(numLeaves); } preloadedRepDefs_.pop_front(); } -int64_t PageReader::getLengthsAndNulls( +int32_t PageReader::getLengthsAndNulls( LevelMode mode, const arrow::LevelInfo& info, - int64_t begin, - int64_t end, - int64_t maxItems, + int32_t begin, + int32_t end, + int32_t maxItems, int32_t* lengths, uint64_t* nulls, int64_t nullsStartIndex) const { @@ -1063,7 +1061,7 @@ int64_t PageReader::getLengthsAndNulls( &bits, lengths); // Convert offsets to lengths. - for (int64_t i = 0; i < bits.values_read; ++i) { + for (auto i = 0; i < bits.values_read; ++i) { lengths[i] = lengths[i + 1] - lengths[i]; } break; @@ -1078,7 +1076,12 @@ int64_t PageReader::getLengthsAndNulls( break; } } - return bits.values_read; + BOLT_CHECK( + bits.values_read >= 0 && bits.values_read <= maxItems, + "values_read out of range: {}, maxItems: {}", + bits.values_read, + maxItems); + return static_cast(bits.values_read); } void PageReader::makeDecoder() { @@ -1273,11 +1276,11 @@ PageReader::readNulls(int32_t numValues, BufferPtr& buffer) { const int64_t erasedBits = erasedLeafNullWords_ * 64; const int64_t relativeConsumed = numLeafNullsConsumed_ - erasedBits; BOLT_CHECK( - !leafNulls_.empty() && leafNullsSize_ >= 0 && - numLeafNullsConsumed_ >= erasedBits && relativeConsumed >= 0 && - relativeConsumed <= leafNullsSize_ && - relativeConsumed + numValues <= leafNullsSize_, - "invalid leafNulls range in readNulls(non-top): maxDefine_={} numValues={} numLeafNullsConsumed_={} erasedLeafNullWords_={} erasedBits={} relativeConsumed={} leafNullsSize_={} leafNullsWords={}", + relativeConsumed >= 0 && leafNullsSize_ >= numValues && + relativeConsumed <= leafNullsSize_ - numValues, + "invalid leafNulls range in readNulls(non-top): maxDefine_={} " + "numValues={} numLeafNullsConsumed_={} erasedLeafNullWords_={} " + "erasedBits={} relativeConsumed={} leafNullsSize_={} leafNullsWords={}", maxDefine_, numValues, numLeafNullsConsumed_, diff --git a/bolt/dwio/parquet/reader/PageReader.h b/bolt/dwio/parquet/reader/PageReader.h index 820c11881..7cca41acf 100644 --- a/bolt/dwio/parquet/reader/PageReader.h +++ b/bolt/dwio/parquet/reader/PageReader.h @@ -30,10 +30,6 @@ #pragma once -#include -#include - -#include "bolt/common/base/Exceptions.h" #include "bolt/common/compression/Compression.h" #include "bolt/dwio/common/BitConcatenation.h" #include "bolt/dwio/common/DirectDecoder.h" @@ -54,16 +50,6 @@ namespace bytedance::bolt::parquet { constexpr int16_t kNonPageOrdinal = static_cast(-1); constexpr uint32_t kDefaultMaxPageHeaderSize = 16 * 1024 * 1024; -inline int32_t checkedInt64ToInt32(int64_t value, const char* what) { - BOLT_CHECK( - value >= 0 && - value <= static_cast(std::numeric_limits::max()), - "{} out of int32_t range: {}", - what, - value); - return static_cast(value); -} - struct CryptoContext { CryptoContext( bool startWithDictionaryPage, @@ -144,12 +130,12 @@ class PageReader { /// to produce. 'lengths' is only filled for mode kList. 'nulls' is filled /// from bit position 'nullsStartIndex'. Returns the number of lengths/nulls /// filled. - int64_t getLengthsAndNulls( + int32_t getLengthsAndNulls( LevelMode mode, const arrow::LevelInfo& info, - int64_t begin, - int64_t end, - int64_t maxItems, + int32_t begin, + int32_t end, + int32_t maxItems, int32_t* FOLLY_NULLABLE lengths, uint64_t* FOLLY_NULLABLE nulls, int64_t nullsStartIndex) const; diff --git a/bolt/dwio/parquet/reader/RepeatedColumnReader.cpp b/bolt/dwio/parquet/reader/RepeatedColumnReader.cpp index e1619b30b..36163f998 100644 --- a/bolt/dwio/parquet/reader/RepeatedColumnReader.cpp +++ b/bolt/dwio/parquet/reader/RepeatedColumnReader.cpp @@ -198,7 +198,7 @@ void MapColumnReader::setLengthsFromRepDefs(PageReader& pageReader) { memset(lengths->asMutable(), 0, lengths->size()); dwio::common::ensureCapacity( nullsInReadRange_, bits::nwords(numRepDefs), &memoryPool_); - auto numLists64 = pageReader.getLengthsAndNulls( + auto numLists = pageReader.getLengthsAndNulls( LevelMode::kList, levelInfo_, repDefRange.first, @@ -207,8 +207,7 @@ void MapColumnReader::setLengthsFromRepDefs(PageReader& pageReader) { lengths->asMutable(), nullsInReadRange()->asMutable(), 0); - const int32_t numLists = checkedInt64ToInt32(numLists64, "numLists"); - lengths->setSize(static_cast(numLists) * sizeof(int32_t)); + lengths->setSize(numLists * sizeof(int32_t)); formatData_->as().setNulls(nullsInReadRange(), numLists); setLengths(std::move(lengths)); } @@ -313,7 +312,7 @@ void ListColumnReader::setLengthsFromRepDefs(PageReader& pageReader) { memset(lengths->asMutable(), 0, lengths->size()); dwio::common::ensureCapacity( nullsInReadRange_, bits::nwords(numRepDefs), &memoryPool_); - auto numLists64 = pageReader.getLengthsAndNulls( + auto numLists = pageReader.getLengthsAndNulls( LevelMode::kList, levelInfo_, repDefRange.first, @@ -322,8 +321,7 @@ void ListColumnReader::setLengthsFromRepDefs(PageReader& pageReader) { lengths->asMutable(), nullsInReadRange()->asMutable(), 0); - const int32_t numLists = checkedInt64ToInt32(numLists64, "numLists"); - lengths->setSize(static_cast(numLists) * sizeof(int32_t)); + lengths->setSize(numLists * sizeof(int32_t)); formatData_->as().setNulls(nullsInReadRange(), numLists); setLengths(std::move(lengths)); } diff --git a/bolt/dwio/parquet/reader/StructColumnReader.cpp b/bolt/dwio/parquet/reader/StructColumnReader.cpp index 25adb296f..c40424972 100644 --- a/bolt/dwio/parquet/reader/StructColumnReader.cpp +++ b/bolt/dwio/parquet/reader/StructColumnReader.cpp @@ -254,7 +254,7 @@ void StructColumnReader::setNullsFromRepDefs(PageReader& pageReader) { int32_t numRepDefs = repDefRange.second - repDefRange.first; dwio::common::ensureCapacity( nullsInReadRange_, bits::nwords(numRepDefs), &memoryPool_); - auto numStructs64 = pageReader.getLengthsAndNulls( + auto numStructs = pageReader.getLengthsAndNulls( levelMode_, levelInfo_, repDefRange.first, @@ -263,8 +263,7 @@ void StructColumnReader::setNullsFromRepDefs(PageReader& pageReader) { nullptr, nullsInReadRange()->asMutable(), 0); - formatData_->as().setNulls( - nullsInReadRange(), checkedInt64ToInt32(numStructs64, "numStructs")); + formatData_->as().setNulls(nullsInReadRange(), numStructs); } void StructColumnReader::filterRowGroups(