From 9e8061fd17f052656876f62939944bc0d12af253 Mon Sep 17 00:00:00 2001 From: ChangRui-Ryan Date: Thu, 27 Nov 2025 12:10:42 +0800 Subject: [PATCH 1/2] Fix crash by using alignedAlloc rather than alloc --- .../AggregateFunctionGroupArray.h | 6 +- dbms/src/Columns/ColumnAggregateFunction.cpp | 6 +- .../DataTypes/DataTypeAggregateFunction.cpp | 7 +- dbms/src/Interpreters/AggregationCommon.h | 94 ------------------- dbms/src/Interpreters/JoinPartition.cpp | 5 +- dbms/src/Interpreters/SpecializedAggregator.h | 3 +- 6 files changed, 14 insertions(+), 107 deletions(-) diff --git a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h index c80babc0502..67e5ed1f99c 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionGroupArray.h @@ -182,7 +182,7 @@ struct GroupArrayListNodeBase UInt64 size; readVarUInt(size, buf); - Node * node = reinterpret_cast(arena->alloc(sizeof(Node) + size)); + Node * node = reinterpret_cast(arena->alignedAlloc(sizeof(Node) + size, alignof(Node))); node->size = size; buf.read(node->data(), size); return node; @@ -198,7 +198,7 @@ struct GroupArrayListNodeString : public GroupArrayListNodeBase(column).getDataAt(row_num); - Node * node = reinterpret_cast(arena->alloc(sizeof(Node) + string.size)); + Node * node = reinterpret_cast(arena->alignedAlloc(sizeof(Node) + string.size, alignof(Node))); node->next = nullptr; node->size = string.size; memcpy(node->data(), string.data, string.size); @@ -215,7 +215,7 @@ struct GroupArrayListNodeGeneral : public GroupArrayListNodeBasealloc(sizeof(Node)); + const char * begin = arena->alignedAlloc(sizeof(Node), alignof(Node)); StringRef value = column.serializeValueIntoArena(row_num, *arena, begin); Node * node = reinterpret_cast(const_cast(begin)); diff --git a/dbms/src/Columns/ColumnAggregateFunction.cpp b/dbms/src/Columns/ColumnAggregateFunction.cpp index 60d35168d13..c12418099ef 100644 --- a/dbms/src/Columns/ColumnAggregateFunction.cpp +++ b/dbms/src/Columns/ColumnAggregateFunction.cpp @@ -325,7 +325,7 @@ void ColumnAggregateFunction::insert(const Field & x) Arena & arena = createOrGetArena(); - getData().push_back(arena.alloc(function->sizeOfData())); + getData().push_back(arena.alignedAlloc(function->sizeOfData(), function->alignOfData())); function->create(getData().back()); ReadBufferFromString read_buffer(x.get()); function->deserialize(getData().back(), read_buffer, &arena); @@ -337,7 +337,7 @@ void ColumnAggregateFunction::insertDefault() Arena & arena = createOrGetArena(); - getData().push_back(arena.alloc(function->sizeOfData())); + getData().push_back(arena.alignedAlloc(function->sizeOfData(), function->alignOfData())); function->create(getData().back()); } @@ -365,7 +365,7 @@ const char * ColumnAggregateFunction::deserializeAndInsertFromArena( */ Arena & dst_arena = createOrGetArena(); - getData().push_back(dst_arena.alloc(function->sizeOfData())); + getData().push_back(dst_arena.alignedAlloc(function->sizeOfData(), function->alignOfData())); function->create(getData().back()); /** We will read from src_arena. diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp index de30f9c577e..aba1217f587 100644 --- a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp @@ -90,7 +90,7 @@ void DataTypeAggregateFunction::deserializeBinary(IColumn & column, ReadBuffer & Arena & arena = column_concrete.createOrGetArena(); size_t size_of_state = function->sizeOfData(); - AggregateDataPtr place = arena.alloc(size_of_state); + AggregateDataPtr place = arena.alignedAlloc(size_of_state, function->alignOfData()); function->create(place); try @@ -144,8 +144,7 @@ void DataTypeAggregateFunction::deserializeBinaryBulk( { if (istr.eof()) break; - - AggregateDataPtr place = arena.alloc(size_of_state); + AggregateDataPtr place = arena.alignedAlloc(size_of_state, function->alignOfData()); function->create(place); @@ -176,7 +175,7 @@ static void deserializeFromString(const AggregateFunctionPtr & function, IColumn Arena & arena = column_concrete.createOrGetArena(); size_t size_of_state = function->sizeOfData(); - AggregateDataPtr place = arena.alloc(size_of_state); + AggregateDataPtr place = arena.alignedAlloc(size_of_state, function->alignOfData()); function->create(place); diff --git a/dbms/src/Interpreters/AggregationCommon.h b/dbms/src/Interpreters/AggregationCommon.h index a8612e16245..f927b76fe3a 100644 --- a/dbms/src/Interpreters/AggregationCommon.h +++ b/dbms/src/Interpreters/AggregationCommon.h @@ -235,100 +235,6 @@ static inline UInt128 ALWAYS_INLINE hash128( return key; } - -/// Copy keys to the pool. Then put into pool StringRefs to them and return the pointer to the first. -static inline StringRef * ALWAYS_INLINE placeKeysInPool(size_t keys_size, StringRefs & keys, Arena & pool) -{ - for (size_t j = 0; j < keys_size; ++j) - { - char * place = pool.alloc(keys[j].size); - memcpy(place, keys[j].data, keys[j].size); /// TODO padding in Arena and memcpySmall - keys[j].data = place; - } - - /// Place the StringRefs on the newly copied keys in the pool. - char * res = pool.alloc(keys_size * sizeof(StringRef)); - memcpy(res, keys.data(), keys_size * sizeof(StringRef)); - - return reinterpret_cast(res); -} - -/* -/// Copy keys to the pool. Then put into pool StringRefs to them and return the pointer to the first. -static inline StringRef * ALWAYS_INLINE extractKeysAndPlaceInPool( - size_t i, - size_t keys_size, - const ColumnRawPtrs & key_columns, - StringRefs & keys, - Arena & pool) -{ - for (size_t j = 0; j < keys_size; ++j) - { - keys[j] = key_columns[j]->getDataAtWithTerminatingZero(i); - char * place = pool.alloc(keys[j].size); - memcpy(place, keys[j].data, keys[j].size); - keys[j].data = place; - } - - /// Place the StringRefs on the newly copied keys in the pool. - char * res = pool.alloc(keys_size * sizeof(StringRef)); - memcpy(res, keys.data(), keys_size * sizeof(StringRef)); - - return reinterpret_cast(res); -} - - -/// Copy the specified keys to a continuous memory chunk of a pool. -/// Subsequently append StringRef objects referring to each key. -/// -/// [key1][key2]...[keyN][ref1][ref2]...[refN] -/// ^ ^ : | | -/// +-----|--------:-----+ | -/// : +--------:-----------+ -/// : : -/// <--------------> -/// (1) -/// -/// Return a StringRef object, referring to the area (1) of the memory -/// chunk that contains the keys. In other words, we ignore their StringRefs. -inline StringRef ALWAYS_INLINE extractKeysAndPlaceInPoolContiguous( - size_t i, - size_t keys_size, - const ColumnRawPtrs & key_columns, - StringRefs & keys, - const TiDB::TiDBCollators & collators, - std::vector & sort_key_containers, - Arena & pool) -{ - size_t sum_keys_size = 0; - for (size_t j = 0; j < keys_size; ++j) - { - keys[j] = key_columns[j]->getDataAtWithTerminatingZero(i); - if (!collators.empty() && collators[j] != nullptr) - { - // todo check if need to handle the terminating zero - keys[j] = collators[j]->sortKey(keys[j].data, keys[j].size - 1, sort_key_containers[j]); - } - sum_keys_size += keys[j].size; - } - - char * res = pool.alloc(sum_keys_size + keys_size * sizeof(StringRef)); - char * place = res; - - for (size_t j = 0; j < keys_size; ++j) - { - memcpy(place, keys[j].data, keys[j].size); - keys[j].data = place; - place += keys[j].size; - } - - /// Place the StringRefs on the newly copied keys in the pool. - memcpy(place, keys.data(), keys_size * sizeof(StringRef)); - - return {res, sum_keys_size}; -} -*/ - /** Serialize keys into a continuous chunk of memory. */ static inline StringRef ALWAYS_INLINE serializeKeysToPoolContiguous( diff --git a/dbms/src/Interpreters/JoinPartition.cpp b/dbms/src/Interpreters/JoinPartition.cpp index 9023fcac831..54faaae93a7 100644 --- a/dbms/src/Interpreters/JoinPartition.cpp +++ b/dbms/src/Interpreters/JoinPartition.cpp @@ -83,7 +83,7 @@ void RowsNotInsertToMap::insertRow(Block * stored_block, size_t index, bool need } else { - auto * elem = reinterpret_cast(pool.arena.alloc(sizeof(RowRefList))); + auto * elem = reinterpret_cast(pool.arena.alignedAlloc(sizeof(RowRefList), alignof(RowRefList))); new (elem) RowRefList(stored_block, index); /// don't need cache column since it will explicitly materialize of need_materialize is true insertRowToList(pool, &head, elem, 0); @@ -514,7 +514,8 @@ struct Inserter * We will insert each time the element into the second place. * That is, the former second element, if it was, will be the third, and so on. */ - auto elem = reinterpret_cast(pool.arena.alloc(sizeof(MappedType))); + auto elem + = reinterpret_cast(pool.arena.alignedAlloc(sizeof(MappedType), alignof(MappedType))); new (elem) typename Map::mapped_type(stored_block, i); insertRowToList(pool, &emplace_result.getMapped(), elem, cache_column_threshold); } diff --git a/dbms/src/Interpreters/SpecializedAggregator.h b/dbms/src/Interpreters/SpecializedAggregator.h index 29455d95d79..8e191926373 100644 --- a/dbms/src/Interpreters/SpecializedAggregator.h +++ b/dbms/src/Interpreters/SpecializedAggregator.h @@ -224,7 +224,8 @@ void NO_INLINE Aggregator::executeSpecializedCase( method.onNewKey(*it, params.keys_size, keys, *aggregates_pool); - AggregateDataPtr place = aggregates_pool->alloc(total_size_of_aggregate_states); + AggregateDataPtr place + = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states); AggregateFunctionsList::forEach( AggregateFunctionsCreator(aggregate_functions, offsets_of_aggregate_states, place)); From 875e24d8da08fc2c091dde4f54c19da14f9290e6 Mon Sep 17 00:00:00 2001 From: JaySon-Huang Date: Thu, 4 Dec 2025 18:59:39 +0800 Subject: [PATCH 2/2] Format files Signed-off-by: JaySon-Huang --- dbms/src/Interpreters/SpecializedAggregator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/SpecializedAggregator.h b/dbms/src/Interpreters/SpecializedAggregator.h index 8e191926373..14d9ba7b5ab 100644 --- a/dbms/src/Interpreters/SpecializedAggregator.h +++ b/dbms/src/Interpreters/SpecializedAggregator.h @@ -305,4 +305,4 @@ void NO_INLINE Aggregator::executeSpecializedWithoutKey( extern "C" void __attribute__((__visibility__("default"), __noreturn__)) __cxa_pure_virtual() { abort(); -}; +}