From 82c1eae6ad21a2367b19029ece53ffce428df165 Mon Sep 17 00:00:00 2001 From: Ti Chi Robot Date: Thu, 23 Jun 2022 23:40:37 +0800 Subject: [PATCH] DDL: Use Column Name Instead of Offset to Find the common handle cluster index (#5166) (#5191) close pingcap/tiflash#5154 --- dbms/src/Core/Block.cpp | 14 +- dbms/src/Debug/MockTiDB.cpp | 13 +- dbms/src/Debug/dbgFuncMockRaftCommand.cpp | 42 ++- dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp | 94 +++--- dbms/src/Debug/dbgFuncRegion.cpp | 8 +- dbms/src/Debug/dbgTools.cpp | 316 ++++++++++-------- .../DecodingStorageSchemaSnapshot.h | 14 +- .../Transaction/RegionBlockReader.cpp | 2 + dbms/src/Storages/Transaction/TiDB.cpp | 2 + dbms/src/Storages/Transaction/TiDB.h | 21 +- .../Storages/Transaction/TiKVRecordFormat.h | 128 +++++-- .../Transaction/tests/RowCodecTestUtils.h | 8 +- .../tests/bench_region_block_reader.cpp | 171 ++++++++++ ...gtest_decoding_storage_schema_snapshot.cpp | 65 ++++ .../tests/gtest_region_block_reader.cpp | 68 ++-- .../clustered_index/ddl.test | 86 +++++ 16 files changed, 756 insertions(+), 296 deletions(-) create mode 100644 dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp create mode 100644 dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index 8612d7dd7e5..4970c0da606 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -224,10 +224,18 @@ void Block::checkNumberOfRows() const if (rows == -1) rows = size; else if (rows != size) - throw Exception("Sizes of columns doesn't match: " - + data.front().name + ": " + toString(rows) - + ", " + elem.name + ": " + toString(size), + { + auto first_col = data.front(); + throw Exception(fmt::format( + "Sizes of columns doesn't match: {}(id={}): {}, {}(id={}): {}", + first_col.name, + first_col.column_id, + rows, + elem.name, + elem.column_id, + size), ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + } } } diff --git a/dbms/src/Debug/MockTiDB.cpp b/dbms/src/Debug/MockTiDB.cpp index 438ac494c15..26bed06a0fd 100644 --- a/dbms/src/Debug/MockTiDB.cpp +++ b/dbms/src/Debug/MockTiDB.cpp @@ -173,16 +173,16 @@ TiDB::TableInfoPtr MockTiDB::parseColumns( if (it != columns.defaults.end()) default_value = getDefaultValue(it->second.expression); table_info.columns.emplace_back(reverseGetColumnInfo(column, i++, default_value, true)); - for (auto sit = string_tokens.begin(); sit != string_tokens.end(); sit++) + for (const auto & string_token : string_tokens) { // todo support prefix index - if (*sit == column.name) + if (string_token == column.name) { has_pk = true; if (!column.type->isInteger() && !column.type->isUnsignedInteger()) has_non_int_pk = true; table_info.columns.back().setPriKeyFlag(); - pk_column_pos_map[*sit] = i - 2; + pk_column_pos_map[string_token] = i - 2; break; } } @@ -207,7 +207,6 @@ TiDB::TableInfoPtr MockTiDB::parseColumns( { String & name = string_tokens[index]; index_info.idx_cols[index].name = name; - index_info.idx_cols[index].offset = pk_column_pos_map[name]; index_info.idx_cols[index].length = -1; } } @@ -540,11 +539,11 @@ TiDB::DBInfoPtr MockTiDB::getDBInfoByID(DatabaseID db_id) { TiDB::DBInfoPtr db_ptr = std::make_shared(TiDB::DBInfo()); db_ptr->id = db_id; - for (auto it = databases.begin(); it != databases.end(); it++) + for (auto & database : databases) { - if (it->second == db_id) + if (database.second == db_id) { - db_ptr->name = it->first; + db_ptr->name = database.first; break; } } diff --git a/dbms/src/Debug/dbgFuncMockRaftCommand.cpp b/dbms/src/Debug/dbgFuncMockRaftCommand.cpp index 7036145f2c0..2ae2bcb4f30 100644 --- a/dbms/src/Debug/dbgFuncMockRaftCommand.cpp +++ b/dbms/src/Debug/dbgFuncMockRaftCommand.cpp @@ -26,7 +26,7 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar auto & tmt = context.getTMTContext(); auto & kvstore = tmt.getKVStore(); - RegionID region_id = (RegionID)safeGet(typeid_cast(*args[0]).value); + auto region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); const String & database_name = typeid_cast(*args[1]).name; const String & table_name = typeid_cast(*args[2]).name; auto table = MockTiDB::instance().getTableByName(database_name, table_name); @@ -35,7 +35,7 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar if (4 + handle_column_size * 4 != args.size()) throw Exception("Args not matched, should be: region-id1, database-name, table-name, start1, end1, start2, end2, region-id2", ErrorCodes::BAD_ARGUMENTS); - RegionID region_id2 = (RegionID)safeGet(typeid_cast(*args[args.size() - 1]).value); + auto region_id2 = static_cast(safeGet(typeid_cast(*args[args.size() - 1]).value)); auto table_id = table->id(); TiKVKey start_key1, start_key2, end_key1, end_key2; @@ -45,9 +45,17 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar std::vector start_keys2; std::vector end_keys1; std::vector end_keys2; + + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } + for (size_t i = 0; i < handle_column_size; i++) { - auto & column_info = table_info.columns[table_info.getPrimaryIndexInfo().idx_cols[i].offset]; + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + auto & column_info = table_info.columns[idx]; auto start_field1 = RegionBench::convertField(column_info, typeid_cast(*args[3 + i]).value); TiDB::DatumBumpy start_datum1 = TiDB::DatumBumpy(start_field1, column_info.tp); @@ -74,10 +82,10 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar } else { - HandleID start1 = (HandleID)safeGet(typeid_cast(*args[3]).value); - HandleID end1 = (HandleID)safeGet(typeid_cast(*args[4]).value); - HandleID start2 = (HandleID)safeGet(typeid_cast(*args[5]).value); - HandleID end2 = (HandleID)safeGet(typeid_cast(*args[6]).value); + auto start1 = static_cast(safeGet(typeid_cast(*args[3]).value)); + auto end1 = static_cast(safeGet(typeid_cast(*args[4]).value)); + auto start2 = static_cast(safeGet(typeid_cast(*args[5]).value)); + auto end2 = static_cast(safeGet(typeid_cast(*args[6]).value)); start_key1 = RecordKVFormat::genKey(table_id, start1); start_key2 = RecordKVFormat::genKey(table_id, start2); end_key1 = RecordKVFormat::genKey(table_id, end1); @@ -96,7 +104,7 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar request.set_cmd_type(raft_cmdpb::AdminCmdType::BatchSplit); raft_cmdpb::BatchSplitResponse * splits = response.mutable_splits(); { - auto region = splits->add_regions(); + auto * region = splits->add_regions(); region->set_id(region_id); region->set_start_key(start_key1); region->set_end_key(end_key1); @@ -104,7 +112,7 @@ void MockRaftCommand::dbgFuncRegionBatchSplit(Context & context, const ASTs & ar *region->mutable_region_epoch() = new_epoch; } { - auto region = splits->add_regions(); + auto * region = splits->add_regions(); region->set_id(region_id2); region->set_start_key(start_key2); region->set_end_key(end_key2); @@ -130,8 +138,8 @@ void MockRaftCommand::dbgFuncPrepareMerge(Context & context, const ASTs & args, throw Exception("Args not matched, should be: source-id1, target-id2", ErrorCodes::BAD_ARGUMENTS); } - RegionID region_id = (RegionID)safeGet(typeid_cast(*args[0]).value); - RegionID target_id = (RegionID)safeGet(typeid_cast(*args[1]).value); + auto region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); + auto target_id = static_cast(safeGet(typeid_cast(*args[1]).value)); auto & tmt = context.getTMTContext(); auto & kvstore = tmt.getKVStore(); @@ -143,7 +151,7 @@ void MockRaftCommand::dbgFuncPrepareMerge(Context & context, const ASTs & args, { request.set_cmd_type(raft_cmdpb::AdminCmdType::PrepareMerge); - auto prepare_merge = request.mutable_prepare_merge(); + auto * prepare_merge = request.mutable_prepare_merge(); { auto min_index = region->appliedIndex(); prepare_merge->set_min_index(min_index); @@ -170,8 +178,8 @@ void MockRaftCommand::dbgFuncCommitMerge(Context & context, const ASTs & args, D throw Exception("Args not matched, should be: source-id1, current-id2", ErrorCodes::BAD_ARGUMENTS); } - RegionID source_id = (RegionID)safeGet(typeid_cast(*args[0]).value); - RegionID current_id = (RegionID)safeGet(typeid_cast(*args[1]).value); + auto source_id = static_cast(safeGet(typeid_cast(*args[0]).value)); + auto current_id = static_cast(safeGet(typeid_cast(*args[1]).value)); auto & tmt = context.getTMTContext(); auto & kvstore = tmt.getKVStore(); @@ -182,7 +190,7 @@ void MockRaftCommand::dbgFuncCommitMerge(Context & context, const ASTs & args, D { request.set_cmd_type(raft_cmdpb::AdminCmdType::CommitMerge); - auto commit_merge = request.mutable_commit_merge(); + auto * commit_merge = request.mutable_commit_merge(); { commit_merge->set_commit(source_region->appliedIndex()); *commit_merge->mutable_source() = source_region->getMetaRegion(); @@ -206,7 +214,7 @@ void MockRaftCommand::dbgFuncRollbackMerge(Context & context, const ASTs & args, throw Exception("Args not matched, should be: region-id", ErrorCodes::BAD_ARGUMENTS); } - RegionID region_id = (RegionID)safeGet(typeid_cast(*args[0]).value); + auto region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); auto & tmt = context.getTMTContext(); auto & kvstore = tmt.getKVStore(); @@ -217,7 +225,7 @@ void MockRaftCommand::dbgFuncRollbackMerge(Context & context, const ASTs & args, { request.set_cmd_type(raft_cmdpb::AdminCmdType::RollbackMerge); - auto rollback_merge = request.mutable_rollback_merge(); + auto * rollback_merge = request.mutable_rollback_merge(); { auto merge_state = region->getMergeState(); rollback_merge->set_commit(merge_state.commit()); diff --git a/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp b/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp index 5fe2eadf15b..07dd5db0dea 100644 --- a/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp +++ b/dbms/src/Debug/dbgFuncMockRaftSnapshot.cpp @@ -45,7 +45,7 @@ RegionPtr GenDbgRegionSnapshotWithData(Context & context, const ASTs & args) { const String & database_name = typeid_cast(*args[0]).name; const String & table_name = typeid_cast(*args[1]).name; - RegionID region_id = (RegionID)safeGet(typeid_cast(*args[2]).value); + RegionID region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); TableID table_id = RegionBench::getTableID(context, database_name, table_name, ""); MockTiDB::TablePtr table = MockTiDB::instance().getTableByName(database_name, table_name); auto & table_info = table->table_info; @@ -53,10 +53,16 @@ RegionPtr GenDbgRegionSnapshotWithData(Context & context, const ASTs & args) size_t handle_column_size = is_common_handle ? table_info.getPrimaryIndexInfo().idx_cols.size() : 1; RegionPtr region; + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } + if (!is_common_handle) { - HandleID start = (HandleID)safeGet(typeid_cast(*args[3]).value); - HandleID end = (HandleID)safeGet(typeid_cast(*args[4]).value); + HandleID start = static_cast(safeGet(typeid_cast(*args[3]).value)); + HandleID end = static_cast(safeGet(typeid_cast(*args[4]).value)); region = RegionBench::createRegion(table_id, region_id, start, end); } else @@ -66,7 +72,8 @@ RegionPtr GenDbgRegionSnapshotWithData(Context & context, const ASTs & args) std::vector end_keys; for (size_t i = 0; i < handle_column_size; i++) { - auto & column_info = table_info.columns[table_info.getPrimaryIndexInfo().idx_cols[i].offset]; + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + auto & column_info = table_info.columns[idx]; auto start_field = RegionBench::convertField(column_info, typeid_cast(*args[3 + i]).value); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); start_keys.emplace_back(start_datum.field()); @@ -89,9 +96,9 @@ RegionPtr GenDbgRegionSnapshotWithData(Context & context, const ASTs & args) // Parse row values for (auto it = args_begin; it != args_end; it += len) { - HandleID handle_id = is_common_handle ? 0 : (HandleID)safeGet(typeid_cast(*it[0]).value); - Timestamp tso = (Timestamp)safeGet(typeid_cast(*it[1]).value); - UInt8 del = (UInt8)safeGet(typeid_cast(*it[2]).value); + HandleID handle_id = is_common_handle ? 0 : static_cast(safeGet(typeid_cast(*it[0]).value)); + Timestamp tso = static_cast(safeGet(typeid_cast(*it[1]).value)); + UInt8 del = static_cast(safeGet(typeid_cast(*it[2]).value)); { std::vector fields; @@ -107,9 +114,9 @@ RegionPtr GenDbgRegionSnapshotWithData(Context & context, const ASTs & args) std::vector keys; // handle key for (size_t i = 0; i < table_info.getPrimaryIndexInfo().idx_cols.size(); i++) { - auto & idx_col = table_info.getPrimaryIndexInfo().idx_cols[i]; - auto & column_info = table_info.columns[idx_col.offset]; - auto start_field = RegionBench::convertField(column_info, fields[idx_col.offset]); + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + auto & column_info = table_info.columns[idx]; + auto start_field = RegionBench::convertField(column_info, fields[idx]); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); keys.emplace_back(start_datum.field()); } @@ -153,7 +160,7 @@ void MockRaftCommand::dbgFuncRegionSnapshotWithData(Context & context, const AST // DBGInvoke region_snapshot(region-id, start-key, end-key, database-name, table-name[, partition-id]) void MockRaftCommand::dbgFuncRegionSnapshot(Context & context, const ASTs & args, DBGInvoker::Printer output) { - RegionID region_id = (RegionID)safeGet(typeid_cast(*args[0]).value); + RegionID region_id = static_cast(safeGet(typeid_cast(*args[0]).value)); bool has_partition_id = false; size_t args_size = args.size(); if (dynamic_cast(args[args_size - 1].get()) != nullptr) @@ -183,9 +190,16 @@ void MockRaftCommand::dbgFuncRegionSnapshot(Context & context, const ASTs & args // Get start key and end key form multiple column if it is clustered_index. std::vector start_keys; std::vector end_keys; + + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } for (size_t i = 0; i < handle_column_size; i++) { - auto & column_info = table_info.columns[table_info.getPrimaryIndexInfo().idx_cols[i].offset]; + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + const auto & column_info = table_info.columns[idx]; auto start_field = RegionBench::convertField(column_info, typeid_cast(*args[1 + i]).value); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); start_keys.emplace_back(start_datum.field()); @@ -199,8 +213,8 @@ void MockRaftCommand::dbgFuncRegionSnapshot(Context & context, const ASTs & args } else { - HandleID start = (HandleID)safeGet(typeid_cast(*args[1]).value); - HandleID end = (HandleID)safeGet(typeid_cast(*args[2]).value); + HandleID start = static_cast(safeGet(typeid_cast(*args[1]).value)); + HandleID end = static_cast(safeGet(typeid_cast(*args[2]).value)); start_key = RecordKVFormat::genKey(table_id, start); end_key = RecordKVFormat::genKey(table_id, end); } @@ -238,21 +252,21 @@ struct MockSSTReader Data() = default; }; - MockSSTReader(const Data & data_) + explicit MockSSTReader(const Data & data_) : iter(data_.begin()) , end(data_.end()) , remained(iter != end) {} - static SSTReaderPtr ffi_get_cf_file_reader(const Data & data_) { return SSTReaderPtr{new MockSSTReader(data_)}; } + static SSTReaderPtr ffiGetCfFileReader(const Data & data_) { return SSTReaderPtr{new MockSSTReader(data_)}; } - bool ffi_remained() const { return iter != end; } + bool ffiRemained() const { return iter != end; } - BaseBuffView ffi_key() const { return {iter->first.data(), iter->first.length()}; } + BaseBuffView ffiKey() const { return {iter->first.data(), iter->first.length()}; } - BaseBuffView ffi_val() const { return {iter->second.data(), iter->second.length()}; } + BaseBuffView ffiVal() const { return {iter->second.data(), iter->second.length()}; } - void ffi_next() { ++iter; } + void ffiNext() { ++iter; } static std::map & getMockSSTData() { return MockSSTData; } @@ -273,31 +287,31 @@ SSTReaderPtr fn_get_sst_reader(SSTView v, RaftStoreProxyPtr) if (iter == MockSSTReader::getMockSSTData().end()) throw Exception("Can not find data in MockSSTData, [key=" + s + "] [type=" + CFToName(v.type) + "]"); auto & d = iter->second; - return MockSSTReader::ffi_get_cf_file_reader(d); + return MockSSTReader::ffiGetCfFileReader(d); } uint8_t fn_remained(SSTReaderPtr ptr, ColumnFamilyType) { - auto reader = reinterpret_cast(ptr.inner); - return reader->ffi_remained(); + auto * reader = reinterpret_cast(ptr.inner); + return reader->ffiRemained(); } BaseBuffView fn_key(SSTReaderPtr ptr, ColumnFamilyType) { - auto reader = reinterpret_cast(ptr.inner); - return reader->ffi_key(); + auto * reader = reinterpret_cast(ptr.inner); + return reader->ffiKey(); } BaseBuffView fn_value(SSTReaderPtr ptr, ColumnFamilyType) { - auto reader = reinterpret_cast(ptr.inner); - return reader->ffi_val(); + auto * reader = reinterpret_cast(ptr.inner); + return reader->ffiVal(); } void fn_next(SSTReaderPtr ptr, ColumnFamilyType) { - auto reader = reinterpret_cast(ptr.inner); - reader->ffi_next(); + auto * reader = reinterpret_cast(ptr.inner); + reader->ffiNext(); } void fn_gc(SSTReaderPtr ptr, ColumnFamilyType) { - auto reader = reinterpret_cast(ptr.inner); + auto * reader = reinterpret_cast(ptr.inner); delete reader; } @@ -462,9 +476,9 @@ void MockRaftCommand::dbgFuncIngestSST(Context & context, const ASTs & args, DBG { const String & database_name = typeid_cast(*args[0]).name; const String & table_name = typeid_cast(*args[1]).name; - RegionID region_id = (RegionID)safeGet(typeid_cast(*args[2]).value); - RegionID start_handle = (RegionID)safeGet(typeid_cast(*args[3]).value); - RegionID end_handle = (RegionID)safeGet(typeid_cast(*args[4]).value); + RegionID region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); + RegionID start_handle = static_cast(safeGet(typeid_cast(*args[3]).value)); + RegionID end_handle = static_cast(safeGet(typeid_cast(*args[4]).value)); MockTiDB::TablePtr table = MockTiDB::instance().getTableByName(database_name, table_name); const auto & table_info = RegionBench::getTableInfo(context, database_name, table_name); @@ -585,7 +599,7 @@ void MockRaftCommand::dbgFuncRegionSnapshotApplyBlock(Context & context, const A throw Exception("Args not matched, should be: region-id", ErrorCodes::BAD_ARGUMENTS); } - RegionID region_id = (RegionID)safeGet(typeid_cast(*args.front()).value); + RegionID region_id = static_cast(safeGet(typeid_cast(*args.front()).value)); auto [region, block_cache] = GLOBAL_REGION_MAP.popRegionCache("__snap_" + std::to_string(region_id)); auto & tmt = context.getTMTContext(); context.getTMTContext().getKVStore()->checkAndApplySnapshot({region, std::move(block_cache)}, tmt); @@ -607,16 +621,16 @@ void MockRaftCommand::dbgFuncRegionSnapshotPreHandleDTFiles(Context & context, c const String & database_name = typeid_cast(*args[0]).name; const String & table_name = typeid_cast(*args[1]).name; - RegionID region_id = (RegionID)safeGet(typeid_cast(*args[2]).value); - RegionID start_handle = (RegionID)safeGet(typeid_cast(*args[3]).value); - RegionID end_handle = (RegionID)safeGet(typeid_cast(*args[4]).value); + RegionID region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); + RegionID start_handle = static_cast(safeGet(typeid_cast(*args[3]).value)); + RegionID end_handle = static_cast(safeGet(typeid_cast(*args[4]).value)); const String schema_str = safeGet(typeid_cast(*args[5]).value); String handle_pk_name = safeGet(typeid_cast(*args[6]).value); UInt64 test_fields = 1; if (args.size() > 7) - test_fields = (UInt64)safeGet(typeid_cast(*args[7]).value); + test_fields = static_cast(safeGet(typeid_cast(*args[7]).value)); std::unordered_set cfs; { String cfs_str = "write,default"; @@ -707,7 +721,7 @@ void MockRaftCommand::dbgFuncRegionSnapshotPreHandleDTFilesWithHandles(Context & const String & database_name = typeid_cast(*args[0]).name; const String & table_name = typeid_cast(*args[1]).name; - RegionID region_id = (RegionID)safeGet(typeid_cast(*args[2]).value); + RegionID region_id = static_cast(safeGet(typeid_cast(*args[2]).value)); const String schema_str = safeGet(typeid_cast(*args[3]).value); String handle_pk_name = safeGet(typeid_cast(*args[4]).value); @@ -800,7 +814,7 @@ void MockRaftCommand::dbgFuncRegionSnapshotApplyDTFiles(Context & context, const if (args.size() != 1) throw Exception("Args not matched, should be: region-id", ErrorCodes::BAD_ARGUMENTS); - RegionID region_id = (RegionID)safeGet(typeid_cast(*args.front()).value); + RegionID region_id = static_cast(safeGet(typeid_cast(*args.front()).value)); const auto region_name = "__snap_snap_" + std::to_string(region_id); auto [new_region, ingest_ids] = GLOBAL_REGION_MAP.popRegionSnap(region_name); auto & tmt = context.getTMTContext(); diff --git a/dbms/src/Debug/dbgFuncRegion.cpp b/dbms/src/Debug/dbgFuncRegion.cpp index d8a20bfa0dc..1809618e359 100644 --- a/dbms/src/Debug/dbgFuncRegion.cpp +++ b/dbms/src/Debug/dbgFuncRegion.cpp @@ -47,9 +47,15 @@ void dbgFuncPutRegion(Context & context, const ASTs & args, DBGInvoker::Printer { std::vector start_keys; std::vector end_keys; + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } for (size_t i = 0; i < handle_column_size; i++) { - const auto & column_info = table_info.columns[table_info.getPrimaryIndexInfo().idx_cols[i].offset]; + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + const auto & column_info = table_info.columns[idx]; auto start_field = RegionBench::convertField(column_info, typeid_cast(*args[1 + i]).value); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); start_keys.emplace_back(start_datum.field()); diff --git a/dbms/src/Debug/dbgTools.cpp b/dbms/src/Debug/dbgTools.cpp index 8de590d28f4..4fdcb045785 100644 --- a/dbms/src/Debug/dbgTools.cpp +++ b/dbms/src/Debug/dbgTools.cpp @@ -18,7 +18,6 @@ namespace DB { - namespace ErrorCodes { extern const int LOGICAL_ERROR; @@ -27,7 +26,6 @@ extern const int UNKNOWN_TABLE; namespace RegionBench { - using TiDB::ColumnInfo; using TiDB::TableInfo; @@ -64,7 +62,10 @@ Regions createRegions(TableID table_id, size_t region_num, size_t key_num_each_r } RegionPtr createRegion( - const TiDB::TableInfo & table_info, RegionID region_id, std::vector & start_keys, std::vector & end_keys) + const TiDB::TableInfo & table_info, + RegionID region_id, + std::vector & start_keys, + std::vector & end_keys) { metapb::Region region; metapb::Peer peer; @@ -98,8 +99,7 @@ void setupDelRequest(raft_cmdpb::Request * req, const std::string & cf, const Ti del->set_key(key.getStr()); } -void addRequestsToRaftCmd(raft_cmdpb::RaftCmdRequest & request, const TiKVKey & key, const TiKVValue & value, UInt64 prewrite_ts, - UInt64 commit_ts, bool del, const String pk = "pk") +void addRequestsToRaftCmd(raft_cmdpb::RaftCmdRequest & request, const TiKVKey & key, const TiKVValue & value, UInt64 prewrite_ts, UInt64 commit_ts, bool del, const String pk = "pk") { TiKVKey commit_key = RecordKVFormat::appendTs(key, commit_ts); const TiKVKey & lock_key = key; @@ -146,22 +146,22 @@ T convertNumber(const Field & field) { switch (field.getType()) { - case Field::Types::Int64: - return static_cast(field.get()); - case Field::Types::UInt64: - return static_cast(field.get()); - case Field::Types::Float64: - return static_cast(field.get()); - case Field::Types::Decimal32: - return static_cast(field.get>()); - case Field::Types::Decimal64: - return static_cast(field.get>()); - case Field::Types::Decimal128: - return static_cast(field.get>()); - case Field::Types::Decimal256: - return static_cast(field.get>()); - default: - throw Exception(String("Unable to convert field type ") + field.getTypeName() + " to number", ErrorCodes::LOGICAL_ERROR); + case Field::Types::Int64: + return static_cast(field.get()); + case Field::Types::UInt64: + return static_cast(field.get()); + case Field::Types::Float64: + return static_cast(field.get()); + case Field::Types::Decimal32: + return static_cast(field.get>()); + case Field::Types::Decimal64: + return static_cast(field.get>()); + case Field::Types::Decimal128: + return static_cast(field.get>()); + case Field::Types::Decimal256: + return static_cast(field.get>()); + default: + throw Exception(String("Unable to convert field type ") + field.getTypeName() + " to number", ErrorCodes::LOGICAL_ERROR); } } @@ -169,22 +169,22 @@ Field convertDecimal(const ColumnInfo & column_info, const Field & field) { switch (field.getType()) { - case Field::Types::Int64: - return column_info.getDecimalValue(std::to_string(field.get())); - case Field::Types::UInt64: - return column_info.getDecimalValue(std::to_string(field.get())); - case Field::Types::Float64: - return column_info.getDecimalValue(std::to_string(field.get())); - case Field::Types::Decimal32: - return column_info.getDecimalValue(field.get().toString(column_info.decimal)); - case Field::Types::Decimal64: - return column_info.getDecimalValue(field.get().toString(column_info.decimal)); - case Field::Types::Decimal128: - return column_info.getDecimalValue(field.get().toString(column_info.decimal)); - case Field::Types::Decimal256: - return column_info.getDecimalValue(field.get().toString(column_info.decimal)); - default: - throw Exception(String("Unable to convert field type ") + field.getTypeName() + " to number", ErrorCodes::LOGICAL_ERROR); + case Field::Types::Int64: + return column_info.getDecimalValue(std::to_string(field.get())); + case Field::Types::UInt64: + return column_info.getDecimalValue(std::to_string(field.get())); + case Field::Types::Float64: + return column_info.getDecimalValue(std::to_string(field.get())); + case Field::Types::Decimal32: + return column_info.getDecimalValue(field.get().toString(column_info.decimal)); + case Field::Types::Decimal64: + return column_info.getDecimalValue(field.get().toString(column_info.decimal)); + case Field::Types::Decimal128: + return column_info.getDecimalValue(field.get().toString(column_info.decimal)); + case Field::Types::Decimal256: + return column_info.getDecimalValue(field.get().toString(column_info.decimal)); + default: + throw Exception(String("Unable to convert field type ") + field.getTypeName() + " to number", ErrorCodes::LOGICAL_ERROR); } } @@ -192,13 +192,13 @@ Field convertEnum(const ColumnInfo & column_info, const Field & field) { switch (field.getType()) { - case Field::Types::Int64: - case Field::Types::UInt64: - return convertNumber(field); - case Field::Types::String: - return static_cast(column_info.getEnumIndex(field.get())); - default: - throw Exception(String("Unable to convert field type ") + field.getTypeName() + " to Enum", ErrorCodes::LOGICAL_ERROR); + case Field::Types::Int64: + case Field::Types::UInt64: + return convertNumber(field); + case Field::Types::String: + return static_cast(column_info.getEnumIndex(field.get())); + default: + throw Exception(String("Unable to convert field type ") + field.getTypeName() + " to Enum", ErrorCodes::LOGICAL_ERROR); } } @@ -209,45 +209,45 @@ Field convertField(const ColumnInfo & column_info, const Field & field) switch (column_info.tp) { - case TiDB::TypeTiny: - case TiDB::TypeShort: - case TiDB::TypeLong: - case TiDB::TypeLongLong: - case TiDB::TypeInt24: - if (column_info.hasUnsignedFlag()) - return convertNumber(field); - else - return convertNumber(field); - case TiDB::TypeFloat: - case TiDB::TypeDouble: - return convertNumber(field); - case TiDB::TypeDate: - case TiDB::TypeDatetime: - case TiDB::TypeTimestamp: - return parseMyDateTime(field.safeGet()); - case TiDB::TypeVarchar: - case TiDB::TypeTinyBlob: - case TiDB::TypeMediumBlob: - case TiDB::TypeLongBlob: - case TiDB::TypeBlob: - case TiDB::TypeVarString: - case TiDB::TypeString: - return field; - case TiDB::TypeEnum: - return convertEnum(column_info, field); - case TiDB::TypeNull: - return Field(); - case TiDB::TypeDecimal: - case TiDB::TypeNewDecimal: - return convertDecimal(column_info, field); - case TiDB::TypeTime: - case TiDB::TypeYear: - return convertNumber(field); - case TiDB::TypeSet: - case TiDB::TypeBit: + case TiDB::TypeTiny: + case TiDB::TypeShort: + case TiDB::TypeLong: + case TiDB::TypeLongLong: + case TiDB::TypeInt24: + if (column_info.hasUnsignedFlag()) return convertNumber(field); - default: - return Field(); + else + return convertNumber(field); + case TiDB::TypeFloat: + case TiDB::TypeDouble: + return convertNumber(field); + case TiDB::TypeDate: + case TiDB::TypeDatetime: + case TiDB::TypeTimestamp: + return parseMyDateTime(field.safeGet()); + case TiDB::TypeVarchar: + case TiDB::TypeTinyBlob: + case TiDB::TypeMediumBlob: + case TiDB::TypeLongBlob: + case TiDB::TypeBlob: + case TiDB::TypeVarString: + case TiDB::TypeString: + return field; + case TiDB::TypeEnum: + return convertEnum(column_info, field); + case TiDB::TypeNull: + return Field(); + case TiDB::TypeDecimal: + case TiDB::TypeNewDecimal: + return convertDecimal(column_info, field); + case TiDB::TypeTime: + case TiDB::TypeYear: + return convertNumber(field); + case TiDB::TypeSet: + case TiDB::TypeBit: + return convertNumber(field); + default: + return Field(); } } @@ -255,14 +255,14 @@ void encodeRow(const TiDB::TableInfo & table_info, const std::vector & fi { if (table_info.columns.size() < fields.size() + table_info.pk_is_handle) throw Exception("Encoding row has less columns than encode values [num_columns=" + DB::toString(table_info.columns.size()) - + "] [num_fields=" + DB::toString(fields.size()) + "] . ", - ErrorCodes::LOGICAL_ERROR); + + "] [num_fields=" + DB::toString(fields.size()) + "] . ", + ErrorCodes::LOGICAL_ERROR); std::vector flatten_fields; std::unordered_set pk_column_names; if (table_info.is_common_handle) { - for (auto & idx_col : table_info.getPrimaryIndexInfo().idx_cols) + for (const auto & idx_col : table_info.getPrimaryIndexInfo().idx_cols) { // todo support prefix index pk_column_names.insert(idx_col.name); @@ -284,15 +284,19 @@ void encodeRow(const TiDB::TableInfo & table_info, const std::vector & fi (row_format_flip = !row_format_flip) ? encodeRowV1(table_info, flatten_fields, ss) : encodeRowV2(table_info, flatten_fields, ss); } -void insert( // - const TiDB::TableInfo & table_info, RegionID region_id, HandleID handle_id, // - ASTs::const_iterator values_begin, ASTs::const_iterator values_end, // - Context & context, const std::optional> & tso_del) +void insert( // + const TiDB::TableInfo & table_info, + RegionID region_id, + HandleID handle_id, // + ASTs::const_iterator values_begin, + ASTs::const_iterator values_end, // + Context & context, + const std::optional> & tso_del) { // Parse the fields in the inserted row std::vector fields; { - for (ASTs::const_iterator it = values_begin; it != values_end; ++it) + for (auto it = values_begin; it != values_end; ++it) { auto field = typeid_cast((*it).get())->value; fields.emplace_back(field); @@ -312,11 +316,18 @@ void insert( if (table_info.is_common_handle) { std::vector keys; + + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } + for (size_t i = 0; i < table_info.getPrimaryIndexInfo().idx_cols.size(); i++) { - auto & idx_col = table_info.getPrimaryIndexInfo().idx_cols[i]; - auto & column_info = table_info.columns[idx_col.offset]; - auto start_field = RegionBench::convertField(column_info, fields[idx_col.offset]); + const auto & col_idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + const auto & column_info = table_info.columns[col_idx]; + auto start_field = RegionBench::convertField(column_info, fields[col_idx]); TiDB::DatumBumpy start_datum = TiDB::DatumBumpy(start_field, column_info.tp); keys.emplace_back(start_datum.field()); } @@ -343,7 +354,11 @@ void insert( raft_cmdpb::RaftCmdRequest request; addRequestsToRaftCmd(request, key, value, prewrite_ts, commit_ts, is_del); tmt.getKVStore()->handleWriteRaftCmd( - std::move(request), region_id, MockTiKV::instance().getRaftIndex(region_id), MockTiKV::instance().getRaftTerm(region_id), tmt); + std::move(request), + region_id, + MockTiKV::instance().getRaftIndex(region_id), + MockTiKV::instance().getRaftTerm(region_id), + tmt); } void remove(const TiDB::TableInfo & table_info, RegionID region_id, HandleID handle_id, Context & context) @@ -362,7 +377,11 @@ void remove(const TiDB::TableInfo & table_info, RegionID region_id, HandleID han raft_cmdpb::RaftCmdRequest request; addRequestsToRaftCmd(request, key, value, prewrite_ts, commit_ts, true); tmt.getKVStore()->handleWriteRaftCmd( - std::move(request), region_id, MockTiKV::instance().getRaftIndex(region_id), MockTiKV::instance().getRaftTerm(region_id), tmt); + std::move(request), + region_id, + MockTiKV::instance().getRaftIndex(region_id), + MockTiKV::instance().getRaftTerm(region_id), + tmt); } struct BatchCtrl @@ -378,17 +397,16 @@ struct BatchCtrl HandleID handle_begin; bool del; - BatchCtrl(Int64 concurrent_id_, Int64 flush_num_, Int64 batch_num_, UInt64 min_strlen_, UInt64 max_strlen_, Context * context_, - RegionPtr region_, HandleID handle_begin_, bool del_) - : concurrent_id(concurrent_id_), - flush_num(flush_num_), - batch_num(batch_num_), - min_strlen(min_strlen_), - max_strlen(max_strlen_), - context(context_), - region(region_), - handle_begin(handle_begin_), - del(del_) + BatchCtrl(Int64 concurrent_id_, Int64 flush_num_, Int64 batch_num_, UInt64 min_strlen_, UInt64 max_strlen_, Context * context_, RegionPtr region_, HandleID handle_begin_, bool del_) + : concurrent_id(concurrent_id_) + , flush_num(flush_num_) + , batch_num(batch_num_) + , min_strlen(min_strlen_) + , max_strlen(max_strlen_) + , context(context_) + , region(region_) + , handle_begin(handle_begin_) + , del(del_) { assert(max_strlen >= min_strlen); assert(min_strlen >= 1); @@ -396,52 +414,51 @@ struct BatchCtrl default_str = String(str_len, '_'); } - void EncodeDatum(WriteBuffer & ss, TiDB::CodecFlag flag, Int64 magic_num) + void encodeDatum(WriteBuffer & ss, TiDB::CodecFlag flag, Int64 magic_num) { Int8 target = (magic_num % 70) + '0'; EncodeUInt(UInt8(flag), ss); switch (flag) { - case TiDB::CodecFlagJson: - throw Exception("Not implented yet: BatchCtrl::EncodeDatum, TiDB::CodecFlagJson", ErrorCodes::LOGICAL_ERROR); - case TiDB::CodecFlagMax: - throw Exception("Not implented yet: BatchCtrl::EncodeDatum, TiDB::CodecFlagMax", ErrorCodes::LOGICAL_ERROR); - case TiDB::CodecFlagDuration: - throw Exception("Not implented yet: BatchCtrl::EncodeDatum, TiDB::CodecFlagDuration", ErrorCodes::LOGICAL_ERROR); - case TiDB::CodecFlagNil: - return; - case TiDB::CodecFlagBytes: - memset(default_str.data(), target, default_str.size()); - return EncodeBytes(default_str, ss); - //case TiDB::CodecFlagDecimal: - // return EncodeDecimal(Decimal(magic_num), ss); - case TiDB::CodecFlagCompactBytes: - memset(default_str.data(), target, default_str.size()); - return EncodeCompactBytes(default_str, ss); - case TiDB::CodecFlagFloat: - return EncodeFloat64(Float64(magic_num) / 1111.1, ss); - case TiDB::CodecFlagUInt: - return EncodeUInt(UInt64(magic_num), ss); - case TiDB::CodecFlagInt: - return EncodeInt64(Int64(magic_num), ss); - case TiDB::CodecFlagVarInt: - return EncodeVarInt(Int64(magic_num), ss); - case TiDB::CodecFlagVarUInt: - return EncodeVarUInt(UInt64(magic_num), ss); - default: - throw Exception("Not implented codec flag: " + std::to_string(flag), ErrorCodes::LOGICAL_ERROR); + case TiDB::CodecFlagJson: + throw Exception("Not implented yet: BatchCtrl::EncodeDatum, TiDB::CodecFlagJson", ErrorCodes::LOGICAL_ERROR); + case TiDB::CodecFlagMax: + throw Exception("Not implented yet: BatchCtrl::EncodeDatum, TiDB::CodecFlagMax", ErrorCodes::LOGICAL_ERROR); + case TiDB::CodecFlagDuration: + throw Exception("Not implented yet: BatchCtrl::EncodeDatum, TiDB::CodecFlagDuration", ErrorCodes::LOGICAL_ERROR); + case TiDB::CodecFlagNil: + return; + case TiDB::CodecFlagBytes: + memset(default_str.data(), target, default_str.size()); + return EncodeBytes(default_str, ss); + //case TiDB::CodecFlagDecimal: + // return EncodeDecimal(Decimal(magic_num), ss); + case TiDB::CodecFlagCompactBytes: + memset(default_str.data(), target, default_str.size()); + return EncodeCompactBytes(default_str, ss); + case TiDB::CodecFlagFloat: + return EncodeFloat64(Float64(magic_num) / 1111.1, ss); + case TiDB::CodecFlagUInt: + return EncodeUInt(UInt64(magic_num), ss); + case TiDB::CodecFlagInt: + return EncodeInt64(Int64(magic_num), ss); + case TiDB::CodecFlagVarInt: + return EncodeVarInt(Int64(magic_num), ss); + case TiDB::CodecFlagVarUInt: + return EncodeVarUInt(UInt64(magic_num), ss); + default: + throw Exception("Not implented codec flag: " + std::to_string(flag), ErrorCodes::LOGICAL_ERROR); } } - TiKVValue EncodeRow(const TiDB::TableInfo & table_info, Int64 magic_num) + TiKVValue encodeRow(const TiDB::TableInfo & table_info, Int64 magic_num) { WriteBufferFromOwnString ss; - for (size_t i = 0; i < table_info.columns.size(); i++) + for (const auto & column : table_info.columns) { - const TiDB::ColumnInfo & column = table_info.columns[i]; - EncodeDatum(ss, TiDB::CodecFlagInt, column.id); + encodeDatum(ss, TiDB::CodecFlagInt, column.id); // TODO: May need to use BumpyDatum to flatten before encoding. - EncodeDatum(ss, column.getCodecFlag(), magic_num); + encodeDatum(ss, column.getCodecFlag(), magic_num); } return TiKVValue(ss.releaseStr()); } @@ -466,17 +483,15 @@ void batchInsert(const TiDB::TableInfo & table_info, std::unique_ptr for (Int64 cnt = 0; cnt < batch_ctrl->batch_num; ++index, ++cnt) { TiKVKey key = RecordKVFormat::genKey(table_info.id, index); - TiKVValue value = batch_ctrl->EncodeRow(table_info, fn_gen_magic_num(index)); + TiKVValue value = batch_ctrl->encodeRow(table_info, fn_gen_magic_num(index)); addRequestsToRaftCmd(request, key, value, prewrite_ts, commit_ts, batch_ctrl->del); } - tmt.getKVStore()->handleWriteRaftCmd(std::move(request), region->id(), MockTiKV::instance().getRaftIndex(region->id()), - MockTiKV::instance().getRaftTerm(region->id()), tmt); + tmt.getKVStore()->handleWriteRaftCmd(std::move(request), region->id(), MockTiKV::instance().getRaftIndex(region->id()), MockTiKV::instance().getRaftTerm(region->id()), tmt); } } -void concurrentBatchInsert(const TiDB::TableInfo & table_info, Int64 concurrent_num, Int64 flush_num, Int64 batch_num, UInt64 min_strlen, - UInt64 max_strlen, Context & context) +void concurrentBatchInsert(const TiDB::TableInfo & table_info, Int64 concurrent_num, Int64 flush_num, Int64 batch_num, UInt64 min_strlen, UInt64 max_strlen, Context & context) { TMTContext & tmt = context.getTMTContext(); @@ -509,7 +524,12 @@ void concurrentBatchInsert(const TiDB::TableInfo & table_info, Int64 concurrent_ } Int64 concurrentRangeOperate( - const TiDB::TableInfo & table_info, HandleID start_handle, HandleID end_handle, Context & context, Int64 magic_num, bool del) + const TiDB::TableInfo & table_info, + HandleID start_handle, + HandleID end_handle, + Context & context, + Int64 magic_num, + bool del) { Regions regions; @@ -528,7 +548,7 @@ Int64 concurrentRangeOperate( std::list threads; Int64 tol = 0; - for (auto region : regions) + for (const auto & region : regions) { const auto range = region->getRange(); const auto & [ss, ee] = getHandleRangeByTable(range->rawKeys(), table_info.id); @@ -559,7 +579,7 @@ TableID getTableID(Context & context, const std::string & database_name, const s TablePtr table = MockTiDB::instance().getTableByName(database_name, table_name); if (table->isPartitionTable()) - return std::atoi(partition_id.c_str()); + return std::strtol(partition_id.c_str(), nullptr, 0); return table->id(); } diff --git a/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h b/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h index 24590b9b71d..f286eaa2711 100644 --- a/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h +++ b/dbms/src/Storages/Transaction/DecodingStorageSchemaSnapshot.h @@ -62,10 +62,12 @@ struct DecodingStorageSchemaSnapshot , decoding_schema_version{decoding_schema_version_} { std::unordered_map column_lut; + std::unordered_map column_name_id_map; for (size_t i = 0; i < table_info_.columns.size(); i++) { const auto & ci = table_info_.columns[i]; column_lut.emplace(ci.id, i); + column_name_id_map.emplace(ci.name, ci.id); } for (size_t i = 0; i < column_defines->size(); i++) { @@ -73,7 +75,7 @@ struct DecodingStorageSchemaSnapshot sorted_column_id_with_pos.insert({cd.id, i}); if (cd.id != TiDBPkColumnID && cd.id != VersionColumnID && cd.id != DelMarkColumnID) { - auto & columns = table_info_.columns; + const auto & columns = table_info_.columns; column_infos.push_back(columns[column_lut.at(cd.id)]); } else @@ -85,10 +87,14 @@ struct DecodingStorageSchemaSnapshot // create pk related metadata if needed if (is_common_handle) { - const auto & primary_index_info = table_info_.getPrimaryIndexInfo(); - for (size_t i = 0; i < primary_index_info.idx_cols.size(); i++) + /// we will not update the IndexInfo except Rename DDL. + /// When the add column / drop column action happenes, the offset of each column may change + /// Thus, we should not use offset to get the column we want, + /// but use to compare the column name to get the column id. + const auto & primary_index_cols = table_info_.getPrimaryIndexInfo().idx_cols; + for (const auto & col : primary_index_cols) { - auto pk_column_id = table_info_.columns[primary_index_info.idx_cols[i].offset].id; + auto pk_column_id = column_name_id_map[col.name]; pk_column_ids.emplace_back(pk_column_id); pk_pos_map.emplace(pk_column_id, reinterpret_cast(std::numeric_limits::max())); } diff --git a/dbms/src/Storages/Transaction/RegionBlockReader.cpp b/dbms/src/Storages/Transaction/RegionBlockReader.cpp index d4234a57df4..a5a8e956dd8 100644 --- a/dbms/src/Storages/Transaction/RegionBlockReader.cpp +++ b/dbms/src/Storages/Transaction/RegionBlockReader.cpp @@ -212,6 +212,8 @@ bool RegionBlockReader::readImpl(Block & block, const RegionDataReadInfoList & d } index++; } + block.checkNumberOfRows(); + return true; } diff --git a/dbms/src/Storages/Transaction/TiDB.cpp b/dbms/src/Storages/Transaction/TiDB.cpp index eb5cfa34218..8192d9ba8d5 100644 --- a/dbms/src/Storages/Transaction/TiDB.cpp +++ b/dbms/src/Storages/Transaction/TiDB.cpp @@ -601,6 +601,8 @@ catch (const Poco::Exception & e) /////////////////////// IndexColumnInfo::IndexColumnInfo(Poco::JSON::Object::Ptr json) + : length(0) + , offset(0) { deserialize(json); } diff --git a/dbms/src/Storages/Transaction/TiDB.h b/dbms/src/Storages/Transaction/TiDB.h index 232019c1f0d..c3e20e69b6d 100644 --- a/dbms/src/Storages/Transaction/TiDB.h +++ b/dbms/src/Storages/Transaction/TiDB.h @@ -165,7 +165,6 @@ struct ColumnInfo ColumnID id = -1; String name; - Int32 offset = -1; Poco::Dynamic::Var origin_default_value; Poco::Dynamic::Var default_value; Poco::Dynamic::Var default_bit_value; @@ -198,6 +197,12 @@ struct ColumnInfo Int64 getTimeValue(const String &) const; Int64 getYearValue(const String &) const; UInt64 getBitValue(const String &) const; + +private: + /// please be very careful when you have to use offset, + /// because we never update offset when DDL action changes. + /// Thus, our offset will not exactly correspond the order of columns. + Int32 offset = -1; }; enum PartitionType @@ -284,8 +289,13 @@ struct IndexColumnInfo void deserialize(Poco::JSON::Object::Ptr json); String name; - Int32 offset; Int32 length; + +private: + /// please be very careful when you have to use offset, + /// because we never update offset when DDL action changes. + /// Thus, our offset will not exactly correspond the order of columns. + Int32 offset; }; struct IndexInfo { @@ -367,7 +377,12 @@ struct TableInfo bool isLogicalPartitionTable() const { return is_partition_table && belonging_table_id == DB::InvalidTableID && partition.enable; } - /// should not be called if is_common_handle = false + /// should not be called if is_common_handle = false. + /// when use IndexInfo, please avoid to use the offset info + /// the offset value may be wrong in some cases, + /// due to we will not update IndexInfo except RENAME DDL action, + /// but DDL like add column / drop column may change the offset of columns + /// Thus, please be very careful when you must have to use offset information !!!!! const IndexInfo & getPrimaryIndexInfo() const { return index_infos[0]; } IndexInfo & getPrimaryIndexInfo() { return index_infos[0]; } diff --git a/dbms/src/Storages/Transaction/TiKVRecordFormat.h b/dbms/src/Storages/Transaction/TiKVRecordFormat.h index 5bbb0772291..0345316b7dd 100644 --- a/dbms/src/Storages/Transaction/TiKVRecordFormat.h +++ b/dbms/src/Storages/Transaction/TiKVRecordFormat.h @@ -16,7 +16,6 @@ namespace DB { - namespace ErrorCodes { extern const int LOGICAL_ERROR; @@ -24,7 +23,6 @@ extern const int LOGICAL_ERROR; namespace RecordKVFormat { - enum CFModifyFlag : UInt8 { PutFlag = 'P', @@ -69,17 +67,35 @@ inline TiKVKey encodeAsTiKVKey(const String & ori_str) return TiKVKey(ss.releaseStr()); } -inline UInt64 encodeUInt64(const UInt64 x) { return toBigEndian(x); } +inline UInt64 encodeUInt64(const UInt64 x) +{ + return toBigEndian(x); +} -inline UInt64 encodeInt64(const Int64 x) { return encodeUInt64(static_cast(x) ^ SIGN_MASK); } +inline UInt64 encodeInt64(const Int64 x) +{ + return encodeUInt64(static_cast(x) ^ SIGN_MASK); +} -inline UInt64 encodeUInt64Desc(const UInt64 x) { return encodeUInt64(~x); } +inline UInt64 encodeUInt64Desc(const UInt64 x) +{ + return encodeUInt64(~x); +} -inline UInt64 decodeUInt64(const UInt64 x) { return toBigEndian(x); } +inline UInt64 decodeUInt64(const UInt64 x) +{ + return toBigEndian(x); +} -inline UInt64 decodeUInt64Desc(const UInt64 x) { return ~decodeUInt64(x); } +inline UInt64 decodeUInt64Desc(const UInt64 x) +{ + return ~decodeUInt64(x); +} -inline Int64 decodeInt64(const UInt64 x) { return static_cast(decodeUInt64(x) ^ SIGN_MASK); } +inline Int64 decodeInt64(const UInt64 x) +{ + return static_cast(decodeUInt64(x) ^ SIGN_MASK); +} inline void encodeInt64(const Int64 x, WriteBuffer & ss) { @@ -111,7 +127,10 @@ inline DecodedTiKVKey genRawKey(const TableID tableId, const HandleID handleId) return key; } -inline TiKVKey genKey(const TableID tableId, const HandleID handleId) { return encodeAsTiKVKey(genRawKey(tableId, handleId)); } +inline TiKVKey genKey(const TableID tableId, const HandleID handleId) +{ + return encodeAsTiKVKey(genRawKey(tableId, handleId)); +} inline TiKVKey genKey(const TiDB::TableInfo & table_info, std::vector keys) { @@ -121,9 +140,16 @@ inline TiKVKey genKey(const TiDB::TableInfo & table_info, std::vector key memcpy(key.data() + 1, reinterpret_cast(&big_endian_table_id), 8); memcpy(key.data() + 1 + 8, RecordKVFormat::RECORD_PREFIX_SEP, 2); WriteBufferFromOwnString ss; + + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } for (size_t i = 0; i < keys.size(); i++) { - DB::EncodeDatum(keys[i], table_info.columns[table_info.getPrimaryIndexInfo().idx_cols[i].offset].getCodecFlag(), ss); + auto idx = column_name_columns_index_map[table_info.getPrimaryIndexInfo().idx_cols[i].name]; + DB::EncodeDatum(keys[i], table_info.columns[idx].getCodecFlag(), ss); } return encodeAsTiKVKey(key + ss.releaseStr()); } @@ -162,29 +188,50 @@ inline std::tuple decodeTiKVKeyFull(const TiKVKey & key) } } -inline DecodedTiKVKey decodeTiKVKey(const TiKVKey & key) { return std::get<0>(decodeTiKVKeyFull(key)); } +inline DecodedTiKVKey decodeTiKVKey(const TiKVKey & key) +{ + return std::get<0>(decodeTiKVKeyFull(key)); +} -inline Timestamp getTs(const TiKVKey & key) { return decodeUInt64Desc(read(key.data() + key.dataSize() - 8)); } +inline Timestamp getTs(const TiKVKey & key) +{ + return decodeUInt64Desc(read(key.data() + key.dataSize() - 8)); +} -inline TableID getTableId(const DecodedTiKVKey & key) { return decodeInt64(read(key.data() + 1)); } +inline TableID getTableId(const DecodedTiKVKey & key) +{ + return decodeInt64(read(key.data() + 1)); +} -inline HandleID getHandle(const DecodedTiKVKey & key) { return decodeInt64(read(key.data() + RAW_KEY_NO_HANDLE_SIZE)); } +inline HandleID getHandle(const DecodedTiKVKey & key) +{ + return decodeInt64(read(key.data() + RAW_KEY_NO_HANDLE_SIZE)); +} inline RawTiDBPK getRawTiDBPK(const DecodedTiKVKey & key) { return std::make_shared(key.begin() + RAW_KEY_NO_HANDLE_SIZE, key.end()); } -inline TableID getTableId(const TiKVKey & key) { return getTableId(decodeTiKVKey(key)); } +inline TableID getTableId(const TiKVKey & key) +{ + return getTableId(decodeTiKVKey(key)); +} -inline HandleID getHandle(const TiKVKey & key) { return getHandle(decodeTiKVKey(key)); } +inline HandleID getHandle(const TiKVKey & key) +{ + return getHandle(decodeTiKVKey(key)); +} inline bool isRecord(const DecodedTiKVKey & raw_key) { return raw_key.size() >= RAW_KEY_SIZE && raw_key[0] == TABLE_PREFIX && memcmp(raw_key.data() + 9, RECORD_PREFIX_SEP, 2) == 0; } -inline TiKVKey truncateTs(const TiKVKey & key) { return TiKVKey(String(key.data(), key.dataSize() - sizeof(Timestamp))); } +inline TiKVKey truncateTs(const TiKVKey & key) +{ + return TiKVKey(String(key.data(), key.dataSize() - sizeof(Timestamp))); +} inline TiKVKey appendTs(const TiKVKey & key, Timestamp ts) { @@ -201,7 +248,12 @@ inline TiKVKey genKey(TableID tableId, HandleID handleId, Timestamp ts) } inline TiKVValue encodeLockCfValue( - UInt8 lock_type, const String & primary, Timestamp ts, UInt64 ttl, const String * short_value = nullptr, Timestamp min_commit_ts = 0) + UInt8 lock_type, + const String & primary, + Timestamp ts, + UInt64 ttl, + const String * short_value = nullptr, + Timestamp min_commit_ts = 0) { WriteBufferFromOwnString res; res.write(lock_type); @@ -261,7 +313,10 @@ inline R readVarInt(const char *& data, size_t & len) return res; } -inline UInt64 readVarUInt(const char *& data, size_t & len) { return readVarInt(data, len); } +inline UInt64 readVarUInt(const char *& data, size_t & len) +{ + return readVarInt(data, len); +} inline UInt8 readUInt8(const char *& data, size_t & len) { @@ -333,30 +388,29 @@ inline DecodedWriteCFValue decodeWriteCfValue(const TiKVValue & value) auto flag = RecordKVFormat::readUInt8(data, len); switch (flag) { - case RecordKVFormat::SHORT_VALUE_PREFIX: - { - size_t slen = RecordKVFormat::readUInt8(data, len); - if (slen > len) - throw Exception("content len not equal to short value len", ErrorCodes::LOGICAL_ERROR); - short_value = RecordKVFormat::readRawString(data, len, slen); - break; - } - case RecordKVFormat::FLAG_OVERLAPPED_ROLLBACK: - // ignore - break; - case RecordKVFormat::GC_FENCE_PREFIX: - /** + case RecordKVFormat::SHORT_VALUE_PREFIX: + { + size_t slen = RecordKVFormat::readUInt8(data, len); + if (slen > len) + throw Exception("content len not equal to short value len", ErrorCodes::LOGICAL_ERROR); + short_value = RecordKVFormat::readRawString(data, len, slen); + break; + } + case RecordKVFormat::FLAG_OVERLAPPED_ROLLBACK: + // ignore + break; + case RecordKVFormat::GC_FENCE_PREFIX: + /** * according to https://github.com/tikv/tikv/pull/9207, when meet `GC fence` flag, it is definitely a * rewriting record and there must be a complete row written to tikv, just ignore it in tiflash. */ - return std::nullopt; - default: - throw Exception("invalid flag " + std::to_string(flag) + " in write cf", ErrorCodes::LOGICAL_ERROR); + return std::nullopt; + default: + throw Exception("invalid flag " + std::to_string(flag) + " in write cf", ErrorCodes::LOGICAL_ERROR); } } - return InnerDecodedWriteCFValue{write_type, prewrite_ts, - short_value.empty() ? nullptr : std::make_shared(short_value.data(), short_value.length())}; + return InnerDecodedWriteCFValue{write_type, prewrite_ts, short_value.empty() ? nullptr : std::make_shared(short_value.data(), short_value.length())}; } inline TiKVValue encodeWriteCfValue(UInt8 write_type, Timestamp ts, std::string_view short_value = {}, bool gc_fence = false) diff --git a/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h b/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h index 31fad745eaf..5d1450ab566 100644 --- a/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h +++ b/dbms/src/Storages/Transaction/tests/RowCodecTestUtils.h @@ -223,14 +223,14 @@ std::pair> getTableInfoAndFields(ColumnIDs handle_ { table_info.is_common_handle = true; TiDB::IndexInfo index_info; - for (size_t i = 0; i < handle_ids.size(); i++) + for (auto handle_id : handle_ids) { TiDB::IndexColumnInfo index_column_info; - for (size_t pos = 0; pos < table_info.columns.size(); pos++) + for (auto & column : table_info.columns) { - if (table_info.columns[pos].id == handle_ids[i]) + if (column.id == handle_id) { - index_column_info.offset = pos; + index_column_info.name = column.name; break; } } diff --git a/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp b/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp new file mode 100644 index 00000000000..05ab637de7f --- /dev/null +++ b/dbms/src/Storages/Transaction/tests/bench_region_block_reader.cpp @@ -0,0 +1,171 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +#include "RowCodecTestUtils.h" + +using TableInfo = TiDB::TableInfo; +namespace DB::tests +{ +using ColumnIDs = std::vector; +class RegionBlockReaderBenchTest : public benchmark::Fixture +{ +protected: + Int64 handle_value = 100; + UInt8 del_mark_value = 0; + UInt64 version_value = 100; + + RegionDataReadInfoList data_list_read; + std::unordered_map fields_map; + + enum RowEncodeVersion + { + RowV1, + RowV2 + }; + +protected: + void SetUp(const benchmark::State & /*state*/) override + { + data_list_read.clear(); + fields_map.clear(); + } + + void encodeColumns(TableInfo & table_info, std::vector & fields, RowEncodeVersion row_version, size_t num_rows) + { + // for later check + std::unordered_map column_name_columns_index_map; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + fields_map.emplace(table_info.columns[i].id, fields[i]); + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } + + std::vector value_fields; + std::vector pk_fields; + for (size_t i = 0; i < table_info.columns.size(); i++) + { + if (!table_info.columns[i].hasPriKeyFlag()) + value_fields.emplace_back(fields[i]); + else + pk_fields.emplace_back(fields[i]); + } + + // create PK + WriteBufferFromOwnString pk_buf; + if (table_info.is_common_handle) + { + auto & primary_index_info = table_info.getPrimaryIndexInfo(); + for (size_t i = 0; i < primary_index_info.idx_cols.size(); i++) + { + auto idx = column_name_columns_index_map[primary_index_info.idx_cols[i].name]; + EncodeDatum(pk_fields[i], table_info.columns[idx].getCodecFlag(), pk_buf); + } + } + else + { + DB::EncodeInt64(handle_value, pk_buf); + } + RawTiDBPK pk{std::make_shared(pk_buf.releaseStr())}; + // create value + WriteBufferFromOwnString value_buf; + if (row_version == RowEncodeVersion::RowV1) + { + encodeRowV1(table_info, value_fields, value_buf); + } + else if (row_version == RowEncodeVersion::RowV2) + { + encodeRowV2(table_info, value_fields, value_buf); + } + else + { + throw Exception("Unknown row format " + std::to_string(row_version), ErrorCodes::LOGICAL_ERROR); + } + auto row_value = std::make_shared(std::move(value_buf.str())); + for (size_t i = 0; i < num_rows; i++) + data_list_read.emplace_back(pk, del_mark_value, version_value, row_value); + } + + bool decodeColumns(DecodingStorageSchemaSnapshotConstPtr decoding_schema, bool force_decode) const + { + RegionBlockReader reader{decoding_schema}; + Block block = createBlockSortByColumnID(decoding_schema); + return reader.read(block, data_list_read, force_decode); + } + + std::pair> getNormalTableInfoFields(const ColumnIDs & handle_ids, bool is_common_handle) const + { + return getTableInfoAndFields( + handle_ids, + is_common_handle, + ColumnIDValue(2, handle_value), + ColumnIDValue(3, std::numeric_limits::max()), + ColumnIDValue(4, std::numeric_limits::min()), + ColumnIDValue(9, String("aaa")), + ColumnIDValue(10, DecimalField(ToDecimal(12345678910ULL, 4), 4)), + ColumnIDValueNull(11)); + } +}; + +BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, CommonHandle) +(benchmark::State & state) +{ + size_t num_rows = state.range(0); + auto [table_info, fields] = getNormalTableInfoFields({2, 3, 4}, true); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2, num_rows); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + for (auto _ : state) + { + decodeColumns(decoding_schema, true); + } +} + + +BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsNotHandle) +(benchmark::State & state) +{ + size_t num_rows = state.range(0); + auto [table_info, fields] = getNormalTableInfoFields({EXTRA_HANDLE_COLUMN_ID}, false); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2, num_rows); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + for (auto _ : state) + { + decodeColumns(decoding_schema, true); + } +} + +BENCHMARK_DEFINE_F(RegionBlockReaderBenchTest, PKIsHandle) +(benchmark::State & state) +{ + size_t num_rows = state.range(0); + auto [table_info, fields] = getNormalTableInfoFields({2}, false); + encodeColumns(table_info, fields, RowEncodeVersion::RowV2, num_rows); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + for (auto _ : state) + { + decodeColumns(decoding_schema, true); + } +} + +constexpr size_t num_iterations_test = 1000; + +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsHandle)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, CommonHandle)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); +BENCHMARK_REGISTER_F(RegionBlockReaderBenchTest, PKIsNotHandle)->Iterations(num_iterations_test)->Arg(1)->Arg(10)->Arg(100); + +} // namespace DB::tests diff --git a/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp b/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp new file mode 100644 index 00000000000..1de9809ecad --- /dev/null +++ b/dbms/src/Storages/Transaction/tests/gtest_decoding_storage_schema_snapshot.cpp @@ -0,0 +1,65 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include "RowCodecTestUtils.h" + +namespace DB::tests +{ +static TableInfo getTableInfoByJson(const String & json_table_info) +{ + return TableInfo(json_table_info); +} +TEST(DecodingStorageSchemaSnapshotTest, CheckPKInfosUnderClusteredIndex) +{ + // table with column [A,B,C,D], primary keys [A,C] + const String json_table_info = R"json({"id":75,"name":{"O":"test","L":"test"},"charset":"utf8mb4","collate":"utf8mb4_bin","cols":[{"id":1,"name":{"O":"A","L":"a"},"offset":0,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4099,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":2,"name":{"O":"B","L":"b"},"offset":1,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":15,"Flag":0,"Flen":20,"Decimal":0,"Charset":"utf8mb4","Collate":"utf8mb4_bin","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":3,"name":{"O":"C","L":"c"},"offset":2,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4099,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":4,"name":{"O":"D","L":"d"},"offset":3,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":0,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2}],"index_info":[{"id":1,"idx_name":{"O":"PRIMARY","L":"primary"},"tbl_name":{"O":"","L":""},"idx_cols":[{"name":{"O":"A","L":"a"},"offset":0,"length":-1},{"name":{"O":"C","L":"c"},"offset":2,"length":-1}],"state":5,"comment":"","index_type":1,"is_unique":true,"is_primary":true,"is_invisible":false,"is_global":false}],"constraint_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"is_common_handle":true,"common_handle_version":1,"comment":"","auto_inc_id":0,"auto_id_cache":0,"auto_rand_id":0,"max_col_id":4,"max_idx_id":1,"max_cst_id":0,"update_timestamp":434039123413303302,"ShardRowIDBits":0,"max_shard_row_id_bits":0,"auto_random_bits":0,"pre_split_regions":0,"partition":null,"compression":"","view":null,"sequence":null,"Lock":null,"version":4,"tiflash_replica":{"Count":1,"LocationLabels":[],"Available":false,"AvailablePartitionIDs":null},"is_columnar":false,"temp_table_type":0,"cache_table_status":0,"policy_ref_info":null,"stats_options":null})json"; + auto table_info = getTableInfoByJson(json_table_info); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + + //check decoding_schema->pk_column_ids infos + ASSERT_EQ(decoding_schema->pk_column_ids.size(), 2); + ASSERT_EQ(decoding_schema->pk_column_ids[0], 1); + ASSERT_EQ(decoding_schema->pk_column_ids[1], 3); + + //check decoding_schema->pk_pos_map infos + ASSERT_EQ(decoding_schema->pk_column_ids.size(), decoding_schema->pk_pos_map.size()); + // there are three hidden column in the decoded block, so the position of A,C is 3,5 + ASSERT_EQ(decoding_schema->pk_pos_map.at(decoding_schema->pk_column_ids[0]), 3); + ASSERT_EQ(decoding_schema->pk_pos_map.at(decoding_schema->pk_column_ids[1]), 5); +} + +TEST(DecodingStorageSchemaSnapshotTest, CheckPKInfosUnderClusteredIndexAfterDropColumn) +{ + // drop column B for [A,B,C,D]; table with column [A,C,D], primary keys [A,C] + const String json_table_info = R"json({"id":75,"name":{"O":"test","L":"test"},"charset":"utf8mb4","collate":"utf8mb4_bin","cols":[{"id":1,"name":{"O":"A","L":"a"},"offset":0,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4099,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":3,"name":{"O":"C","L":"c"},"offset":2,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":4099,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2},{"id":4,"name":{"O":"D","L":"d"},"offset":3,"origin_default":null,"origin_default_bit":null,"default":null,"default_bit":null,"default_is_expr":false,"generated_expr_string":"","generated_stored":false,"dependences":null,"type":{"Tp":3,"Flag":0,"Flen":11,"Decimal":0,"Charset":"binary","Collate":"binary","Elems":null},"state":5,"comment":"","hidden":false,"change_state_info":null,"version":2}],"index_info":[{"id":1,"idx_name":{"O":"PRIMARY","L":"primary"},"tbl_name":{"O":"","L":""},"idx_cols":[{"name":{"O":"A","L":"a"},"offset":0,"length":-1},{"name":{"O":"C","L":"c"},"offset":2,"length":-1}],"state":5,"comment":"","index_type":1,"is_unique":true,"is_primary":true,"is_invisible":false,"is_global":false}],"constraint_info":null,"fk_info":null,"state":5,"pk_is_handle":false,"is_common_handle":true,"common_handle_version":1,"comment":"","auto_inc_id":0,"auto_id_cache":0,"auto_rand_id":0,"max_col_id":4,"max_idx_id":1,"max_cst_id":0,"update_timestamp":434039123413303302,"ShardRowIDBits":0,"max_shard_row_id_bits":0,"auto_random_bits":0,"pre_split_regions":0,"partition":null,"compression":"","view":null,"sequence":null,"Lock":null,"version":4,"tiflash_replica":{"Count":1,"LocationLabels":[],"Available":false,"AvailablePartitionIDs":null},"is_columnar":false,"temp_table_type":0,"cache_table_status":0,"policy_ref_info":null,"stats_options":null})json"; + auto table_info = getTableInfoByJson(json_table_info); + auto decoding_schema = getDecodingStorageSchemaSnapshot(table_info); + + //check decoding_schema->pk_column_ids infos + ASSERT_EQ(decoding_schema->pk_column_ids.size(), 2); + ASSERT_EQ(decoding_schema->pk_column_ids[0], 1); + ASSERT_EQ(decoding_schema->pk_column_ids[1], 3); + + //check decoding_schema->pk_pos_map infos + ASSERT_EQ(decoding_schema->pk_column_ids.size(), decoding_schema->pk_pos_map.size()); + // there are three hidden column in the decoded block, so the position of A,C is 3,4 + ASSERT_EQ(decoding_schema->pk_pos_map.at(decoding_schema->pk_column_ids[0]), 3); + ASSERT_EQ(decoding_schema->pk_pos_map.at(decoding_schema->pk_column_ids[1]), 4); +} + +} // namespace DB::tests diff --git a/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp b/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp index d5ccc1914df..6314142d500 100644 --- a/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp +++ b/dbms/src/Storages/Transaction/tests/gtest_region_block_reader.cpp @@ -12,13 +12,13 @@ using ColumnIDs = std::vector; class RegionBlockReaderTestFixture : public ::testing::Test { protected: - Int64 handle_value_ = 100; - UInt8 del_mark_value_ = 0; - UInt64 version_value_ = 100; - size_t rows_ = 3; + Int64 handle_value = 100; + UInt8 del_mark_value = 0; + UInt64 version_value = 100; + size_t rows = 3; - RegionDataReadInfoList data_list_read_; - std::unordered_map fields_map_; + RegionDataReadInfoList data_list_read; + std::unordered_map fields_map; enum RowEncodeVersion { @@ -29,8 +29,8 @@ class RegionBlockReaderTestFixture : public ::testing::Test protected: void SetUp() override { - data_list_read_.clear(); - fields_map_.clear(); + data_list_read.clear(); + fields_map.clear(); } void TearDown() override {} @@ -38,8 +38,12 @@ class RegionBlockReaderTestFixture : public ::testing::Test void encodeColumns(TableInfo & table_info, std::vector & fields, RowEncodeVersion row_version) { // for later check + std::unordered_map column_name_columns_index_map; for (size_t i = 0; i < table_info.columns.size(); i++) - fields_map_.emplace(table_info.columns[i].id, fields[i]); + { + fields_map.emplace(table_info.columns[i].id, fields[i]); + column_name_columns_index_map.emplace(table_info.columns[i].name, i); + } std::vector value_fields; std::vector pk_fields; @@ -58,13 +62,13 @@ class RegionBlockReaderTestFixture : public ::testing::Test auto & primary_index_info = table_info.getPrimaryIndexInfo(); for (size_t i = 0; i < primary_index_info.idx_cols.size(); i++) { - size_t pk_offset = primary_index_info.idx_cols[i].offset; - EncodeDatum(pk_fields[i], table_info.columns[pk_offset].getCodecFlag(), pk_buf); + auto idx = column_name_columns_index_map[primary_index_info.idx_cols[i].name]; + EncodeDatum(pk_fields[i], table_info.columns[idx].getCodecFlag(), pk_buf); } } else { - DB::EncodeInt64(handle_value_, pk_buf); + DB::EncodeInt64(handle_value, pk_buf); } RawTiDBPK pk{std::make_shared(pk_buf.releaseStr())}; // create value @@ -82,44 +86,44 @@ class RegionBlockReaderTestFixture : public ::testing::Test throw Exception("Unknown row format " + std::to_string(row_version), ErrorCodes::LOGICAL_ERROR); } auto row_value = std::make_shared(std::move(value_buf.str())); - for (size_t i = 0; i < rows_; i++) - data_list_read_.emplace_back(pk, del_mark_value_, version_value_, row_value); + for (size_t i = 0; i < rows; i++) + data_list_read.emplace_back(pk, del_mark_value, version_value, row_value); } void checkBlock(DecodingStorageSchemaSnapshotConstPtr decoding_schema, const Block & block) const { ASSERT_EQ(block.columns(), decoding_schema->column_defines->size()); - for (size_t row = 0; row < rows_; row++) + for (size_t row = 0; row < rows; row++) { for (size_t pos = 0; pos < block.columns(); pos++) { - auto & column_element = block.getByPosition(pos); + const auto & column_element = block.getByPosition(pos); if (row == 0) { - ASSERT_EQ(column_element.column->size(), rows_); + ASSERT_EQ(column_element.column->size(), rows); } if (column_element.name == EXTRA_HANDLE_COLUMN_NAME) { if (decoding_schema->is_common_handle) { - ASSERT_EQ((*column_element.column)[row], Field(*std::get<0>(data_list_read_[row]))); + ASSERT_EQ((*column_element.column)[row], Field(*std::get<0>(data_list_read[row]))); } else { - ASSERT_EQ((*column_element.column)[row], Field(handle_value_)); + ASSERT_EQ((*column_element.column)[row], Field(handle_value)); } } else if (column_element.name == VERSION_COLUMN_NAME) { - ASSERT_EQ((*column_element.column)[row], Field(version_value_)); + ASSERT_EQ((*column_element.column)[row], Field(version_value)); } else if (column_element.name == TAG_COLUMN_NAME) { - ASSERT_EQ((*column_element.column)[row], Field(NearestFieldType::Type(del_mark_value_))); + ASSERT_EQ((*column_element.column)[row], Field(NearestFieldType::Type(del_mark_value))); } else { - ASSERT_EQ((*column_element.column)[row], fields_map_.at(column_element.column_id)); + ASSERT_EQ((*column_element.column)[row], fields_map.at(column_element.column_id)); } } } @@ -129,7 +133,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test { RegionBlockReader reader{decoding_schema}; Block block = createBlockSortByColumnID(decoding_schema); - if (!reader.read(block, data_list_read_, force_decode)) + if (!reader.read(block, data_list_read, force_decode)) return false; checkBlock(decoding_schema, block); @@ -141,7 +145,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test return getTableInfoAndFields( handle_ids, is_common_handle, - ColumnIDValue(2, handle_value_), + ColumnIDValue(2, handle_value), ColumnIDValue(3, std::numeric_limits::max()), ColumnIDValue(4, std::numeric_limits::min()), ColumnIDValue(9, String("aaa")), @@ -156,7 +160,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test handle_ids, is_common_handle, ColumnIDValue(1, String("")), - ColumnIDValue(2, handle_value_), + ColumnIDValue(2, handle_value), ColumnIDValue(3, std::numeric_limits::max()), ColumnIDValue(4, std::numeric_limits::min()), ColumnIDValue(8, String("")), @@ -168,12 +172,12 @@ class RegionBlockReaderTestFixture : public ::testing::Test // add default value for missing column std::vector missing_column_ids{1, 8, 13}; String missing_column_default_value = String("default"); - for (size_t i = 0; i < table_info.columns.size(); i++) + for (auto & column : table_info.columns) { - if (std::find(missing_column_ids.begin(), missing_column_ids.end(), table_info.columns[i].id) != missing_column_ids.end()) + if (std::find(missing_column_ids.begin(), missing_column_ids.end(), column.id) != missing_column_ids.end()) { - table_info.columns[i].origin_default_value = missing_column_default_value; - fields_map_.emplace(table_info.columns[i].id, Field(missing_column_default_value)); + column.origin_default_value = missing_column_default_value; + fields_map.emplace(column.id, Field(missing_column_default_value)); } } return table_info; @@ -185,7 +189,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test std::tie(table_info, std::ignore) = getTableInfoAndFields( handle_ids, is_common_handle, - ColumnIDValue(2, handle_value_), + ColumnIDValue(2, handle_value), ColumnIDValue(4, std::numeric_limits::min()), ColumnIDValue(9, String("aaa")), ColumnIDValue(10, DecimalField(ToDecimal(12345678910ULL, 4), 4))); @@ -198,7 +202,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test std::tie(table_info, std::ignore) = getTableInfoAndFields( handle_ids, is_common_handle, - ColumnIDValue(2, handle_value_), + ColumnIDValue(2, handle_value), ColumnIDValue(3, std::numeric_limits::max()), ColumnIDValue(4, std::numeric_limits::min()), ColumnIDValue(9, String("aaa")), @@ -213,7 +217,7 @@ class RegionBlockReaderTestFixture : public ::testing::Test std::tie(table_info, std::ignore) = getTableInfoAndFields( handle_ids, is_common_handle, - ColumnIDValue(2, handle_value_), + ColumnIDValue(2, handle_value), ColumnIDValue(3, std::numeric_limits::max()), ColumnIDValue(4, std::numeric_limits::min()), ColumnIDValue(9, String("aaa")), diff --git a/tests/fullstack-test-dt/clustered_index/ddl.test b/tests/fullstack-test-dt/clustered_index/ddl.test index 5750f43d34d..f3c35c3afa1 100644 --- a/tests/fullstack-test-dt/clustered_index/ddl.test +++ b/tests/fullstack-test-dt/clustered_index/ddl.test @@ -52,3 +52,89 @@ mysql> set session tidb_isolation_read_engines='tiflash'; select * from test.t_2 mysql> drop table test.t_1; mysql> drop table test.t_2; + +### about issue 5154 to check whether add column/drop column will effect the cluster index decode +### drop the column between two columns that are cluster index columns + +mysql> drop table if exists test.t_3; +mysql> create table test.t_3 (A int, B varchar(20), C int, D int, PRIMARY KEY(A,C) CLUSTERED); +mysql> insert into test.t_3 values (1,'1',1,1),(2,'2',2,2); + +mysql> alter table test.t_3 set tiflash replica 1; + +func> wait_table test t_3 + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_3; ++---+---+---+---+ +| A | B | C | D | ++---+---+---+---+ +| 1 | 1 | 1 | 1 | +| 2 | 2 | 2 | 2 | ++---+---+---+---+ + +mysql> alter table test.t_3 drop column B; + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_3; ++---+---+---+ +| A | C | D | ++---+---+---+ +| 1 | 1 | 1 | +| 2 | 2 | 2 | ++---+---+---+ + +# insert some rows +mysql> insert into test.t_3 values (3,3,3),(4,4,4); + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_3; ++---+---+---+ +| A | C | D | ++---+---+---+ +| 1 | 1 | 1 | +| 2 | 2 | 2 | +| 3 | 3 | 3 | +| 4 | 4 | 4 | ++---+---+---+ + +mysql> drop table test.t_3; + +### add the column between two columns that are cluster index columns +mysql> drop table if exists test.t_4 +mysql> create table test.t_4 (A int, B varchar(20), C int, D int, PRIMARY KEY(A,C) CLUSTERED); + +mysql> insert into test.t_4 values (1,'1',1,1),(2,'2',2,2); + +mysql> alter table test.t_4 set tiflash replica 1; + +func> wait_table test t_4 + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_4; ++---+---+---+---+ +| A | B | C | D | ++---+---+---+---+ +| 1 | 1 | 1 | 1 | +| 2 | 2 | 2 | 2 | ++---+---+---+---+ + +mysql> alter table test.t_4 Add column E int after B; + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_4; ++---+---+------+---+---+ +| A | B | E | C | D | ++---+---+------+---+---+ +| 1 | 1 | NULL | 1 | 1 | +| 2 | 2 | NULL | 2 | 2 | ++---+---+------+---+---+ + +mysql> insert into test.t_4 values (3,'3',3,3,3),(4,'4',4,4,4); + +mysql> set session tidb_isolation_read_engines='tiflash';select * from test.t_4; ++---+---+------+------+------+ +| A | B | E | C | D | ++---+---+------+------+------+ +| 1 | 1 | NULL | 1 | 1 | +| 2 | 2 | NULL | 2 | 2 | +| 3 | 3 | 3 | 3 | 3 | +| 4 | 4 | 4 | 4 | 4 | ++---+---+------+------+------+ + +mysql> drop table test.t_4; \ No newline at end of file