From 28a2314dd36fc1e990b9d8a3a6aec89ba8a220b8 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Wed, 28 Feb 2024 14:12:32 +0800 Subject: [PATCH] fix potential memcpy overlap issue in join (#8797) (#8801) close pingcap/tiflash#8796 --- dbms/src/Common/PODArray.h | 18 ++++ dbms/src/Flash/tests/gtest_join_executor.cpp | 90 +++++++++++++------- dbms/src/Interpreters/Join.cpp | 4 +- 3 files changed, 81 insertions(+), 31 deletions(-) diff --git a/dbms/src/Common/PODArray.h b/dbms/src/Common/PODArray.h index a5b24a974c2..178cb1f1ea3 100644 --- a/dbms/src/Common/PODArray.h +++ b/dbms/src/Common/PODArray.h @@ -634,8 +634,26 @@ class PODArray : public PODArrayBasec_end = this->c_start + bytes_to_copy; } + void assignFromSelf(size_t start, size_t end) + { + assert(end >= start && end <= this->size()); + if unlikely (start == 0) + { + this->c_end = this->c_start + this->byte_size(end); + return; + } + + size_t required_capacity = end - start; + size_t bytes_to_copy = this->byte_size(required_capacity); + const auto & from_begin = this->begin() + start; + memmove(this->c_start, reinterpret_cast(&*from_begin), bytes_to_copy); + this->c_end = this->c_start + bytes_to_copy; + } + void assign(const PODArray & from) { + if unlikely (this == &from) + return; assign(from.begin(), from.end()); } diff --git a/dbms/src/Flash/tests/gtest_join_executor.cpp b/dbms/src/Flash/tests/gtest_join_executor.cpp index b92b2fea62f..9bcfcc14c62 100644 --- a/dbms/src/Flash/tests/gtest_join_executor.cpp +++ b/dbms/src/Flash/tests/gtest_join_executor.cpp @@ -561,36 +561,50 @@ CATCH TEST_F(JoinExecutorTestRunner, Issue8791) try { + // clang-format off auto build_key = toNullableVec( "id", { - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, }); auto build_col = toNullableVec( "build_value", { - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 30, 10, 30, 10, 30, 10, 10, 10, 30, 10, 30, 10, 30, 30, + 10, 30, 10, 30, 10, 30, 10, 30, + 30, 30, 30, 30, 30, + 30, 30, 30, 30, 30, 10, + 30, 30, 10, 30, 30, 30, + 30, 30, 30, 10, 10, 30, }); auto probe_key = toNullableVec( "id", { - 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, + 4, 6, 4, 6, 4, 6, 4, 6, 4, 6, 4, 6, 4, 6, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 4, 6, 4, 6, 4, 6, 4, 6, 4, 6, 4, 6, 4, 6, + 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, }); auto probe_col = toNullableVec( "probe_value", { - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, }); + // clang-format on context.addMockTable( "issue_8791", "build_table", @@ -602,21 +616,39 @@ try {{"id", TiDB::TP::TypeLongLong}, {"probe_value", TiDB::TP::TypeLongLong}}, {probe_key, probe_col}); - auto request = context.scan("issue_8791", "probe_table") - .join( - context.scan("issue_8791", "build_table"), - tipb::JoinType::TypeAntiSemiJoin, - {col("id")}, - {}, - {}, - {gt(col("probe_value"), col("build_value"))}, - {}) - .aggregation({Count(col("id"))}, {}) - .build(context); - - context.context->setSetting("max_block_size", Field(static_cast(200))); - auto expected_columns = {toVec({1})}; - ASSERT_COLUMNS_EQ_UR(expected_columns, executeStreams(request, 1)); + context.context->setSetting("max_block_size", Field(static_cast(90))); + { + auto anti_join_request = context.scan("issue_8791", "probe_table") + .join( + context.scan("issue_8791", "build_table"), + tipb::JoinType::TypeAntiSemiJoin, + {col("id")}, + {}, + {}, + {gt(col("probe_value"), col("build_value"))}, + {}) + .aggregation({Count(col("id"))}, {}) + .build(context); + + auto expected_columns = {toVec({16})}; + ASSERT_COLUMNS_EQ_UR(expected_columns, executeStreams(anti_join_request, 1)); + } + { + auto inner_join_request = context.scan("issue_8791", "probe_table") + .join( + context.scan("issue_8791", "build_table"), + tipb::JoinType::TypeInnerJoin, + {col("id")}, + {}, + {}, + {gt(col("probe_value"), col("build_value"))}, + {}) + .aggregation({Count(col("id"))}, {}) + .build(context); + + auto expected_columns = {toVec({240})}; + ASSERT_COLUMNS_EQ_UR(expected_columns, executeStreams(inner_join_request, 1)); + } } CATCH diff --git a/dbms/src/Interpreters/Join.cpp b/dbms/src/Interpreters/Join.cpp index 02ed80170d9..5f8f21e3235 100644 --- a/dbms/src/Interpreters/Join.cpp +++ b/dbms/src/Interpreters/Join.cpp @@ -875,9 +875,9 @@ Block Join::doJoinBlockHash(ProbeProcessInfo & probe_process_info) const auto helper_col = block.getByName(match_helper_name).column; helper_col = helper_col->cut(probe_process_info.start_row, probe_process_info.end_row); } - offsets_to_replicate->assign(offsets_to_replicate->begin() + probe_process_info.start_row, offsets_to_replicate->begin() + probe_process_info.end_row); + offsets_to_replicate->assignFromSelf(probe_process_info.start_row, probe_process_info.end_row); if (isAntiJoin(kind) && filter != nullptr) - filter->assign(filter->begin() + probe_process_info.start_row, filter->begin() + probe_process_info.end_row); + filter->assignFromSelf(probe_process_info.start_row, probe_process_info.end_row); } } }