storage: fix block rows not match when filter column is the first non…

…-empty column in the block (#9484) ref #9472 storage: fix block rows not match when filter column is the first non-empty column in the block Signed-off-by: Lloyd-Pottiger <[email protected]> Co-authored-by: JaySon <[email protected]>
pingcap · Sep 29, 2024 · 6578d2f · 6578d2f
1 parent 9fd7ba2
commit 6578d2f
Show file tree

Hide file tree

Showing 3 changed files with 22 additions and 8 deletions.
diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp
@@ -531,7 +531,7 @@ static ReturnType checkBlockStructure(const Block & lhs, const Block & rhs, cons
         }
     }
 
-    return ReturnType(true);
+    return static_cast<ReturnType>(true);
 }
 
 /// join blocks by columns
@@ -541,10 +541,8 @@ Block hstackBlocks(Blocks && blocks, const Block & header)
         return {};
 
     Block res = header.cloneEmpty();
-    size_t num_rows = blocks.front().rows();
     for (const auto & block : blocks)
     {
-        RUNTIME_CHECK_MSG(block.rows() == num_rows, "Cannot hstack blocks with different number of rows");
         for (const auto & elem : block)
         {
             if (likely(res.has(elem.name)))

diff --git a/dbms/src/Core/Block.h b/dbms/src/Core/Block.h
@@ -175,11 +175,14 @@ using BucketBlocksListMap = std::map<Int32, BlocksList>;
 /// Join blocks by columns
 /// The schema of the output block is the same as the header block.
 /// The columns not in the header block will be ignored.
-/// For example:
-/// header: (a UInt32, b UInt32, c UInt32, d UInt32)
-/// block1: (a UInt32, b UInt32, c UInt32, e UInt32), rows: 3
-/// block2: (d UInt32), rows: 3
-/// result: (a UInt32, b UInt32, c UInt32, d UInt32), rows: 3
+/// NOTE: The input blocks can have columns with different sizes,
+///       but the columns in the header block must have the same size,
+///       Otherwise, the returned block will contain columns with the different size.
+/// Example:
+///       header: (a UInt32, b UInt32, c UInt32, d UInt32)
+///       block1: (a UInt32, b UInt32, c UInt32, e UInt32), rows: 3
+///       block2: (d UInt32), rows: 3
+///       result: (a UInt32, b UInt32, c UInt32, d UInt32), rows: 3
 Block hstackBlocks(Blocks && blocks, const Block & header);
 
 /// Join blocks by rows

diff --git a/dbms/src/Storages/DeltaMerge/LateMaterializationBlockInputStream.cpp b/dbms/src/Storages/DeltaMerge/LateMaterializationBlockInputStream.cpp
@@ -110,12 +110,18 @@ Block LateMaterializationBlockInputStream::readImpl()
                 // so only if the number of rows left after filtering out is large enough,
                 // we can skip some packs of the next block, call readWithFilter to get the next block.
                 rest_column_block = rest_column_stream->readWithFilter(*filter);
+                ColumnPtr filter_column;
                 for (auto & col : filter_column_block)
                 {
                     if (col.name == filter_column_name)
+                    {
+                        filter_column = col.column;
                         continue;
+                    }
                     col.column = col.column->filter(*filter, passed_count);
                 }
+                if (header.has(filter_column_name))
+                    filter_column = filter_column->filter(*filter, passed_count);
             }
             else if (filter_out_count > 0)
             {
@@ -126,12 +132,19 @@ Block LateMaterializationBlockInputStream::readImpl()
                 {
                     col.column = col.column->filter(*filter, passed_count);
                 }
+                ColumnPtr filter_column;
+
                 for (auto & col : filter_column_block)
                 {
                     if (col.name == filter_column_name)
+                    {
+                        filter_column = col.column;
                         continue;
+                    }
                     col.column = col.column->filter(*filter, passed_count);
                 }
+                if (header.has(filter_column_name))
+                    filter_column = filter_column->filter(*filter, passed_count);
             }
             else
             {