From 7358fb894c164dba3dedca715ce557ef04ec49ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Rafael?= Date: Tue, 10 Mar 2026 16:16:19 +0000 Subject: [PATCH 1/9] Add DuckLake truncate operator --- src/include/storage/ducklake_catalog.hpp | 1 + src/include/storage/ducklake_delete.hpp | 9 ++ src/include/storage/ducklake_truncate.hpp | 35 ++++++ src/storage/CMakeLists.txt | 1 + src/storage/ducklake_delete.cpp | 2 +- src/storage/ducklake_truncate.cpp | 111 ++++++++++++++++++ .../delete/ducklake_delete_all_simple.test | 32 +++++ test/sql/delete/ducklake_truncate_simple.test | 32 +++++ 8 files changed, 222 insertions(+), 1 deletion(-) create mode 100644 src/include/storage/ducklake_truncate.hpp create mode 100644 src/storage/ducklake_truncate.cpp create mode 100644 test/sql/delete/ducklake_delete_all_simple.test create mode 100644 test/sql/delete/ducklake_truncate_simple.test diff --git a/src/include/storage/ducklake_catalog.hpp b/src/include/storage/ducklake_catalog.hpp index 3255eadcffa..903af03137c 100644 --- a/src/include/storage/ducklake_catalog.hpp +++ b/src/include/storage/ducklake_catalog.hpp @@ -95,6 +95,7 @@ class DuckLakeCatalog : public Catalog { PhysicalOperator &plan) override; PhysicalOperator &PlanDelete(ClientContext &context, PhysicalPlanGenerator &planner, LogicalDelete &op, PhysicalOperator &plan) override; + PhysicalOperator &PlanDelete(ClientContext &context, PhysicalPlanGenerator &planner, LogicalDelete &op) override; PhysicalOperator &PlanUpdate(ClientContext &context, PhysicalPlanGenerator &planner, LogicalUpdate &op, PhysicalOperator &plan) override; PhysicalOperator &PlanMergeInto(ClientContext &context, PhysicalPlanGenerator &planner, LogicalMergeInto &op, diff --git a/src/include/storage/ducklake_delete.hpp b/src/include/storage/ducklake_delete.hpp index 99e06774e86..6cd7647ca12 100644 --- a/src/include/storage/ducklake_delete.hpp +++ b/src/include/storage/ducklake_delete.hpp @@ -125,6 +125,15 @@ struct DuckLakeDeleteMap { delete_data_map.emplace(filename, std::move(delete_data)); } + vector GetAllFileInfos() const { + vector result; + result.reserve(file_map.size()); + for (auto &entry : file_map) { + result.push_back(entry.second); + } + return result; + } + private: mutex lock; unordered_map file_map; diff --git a/src/include/storage/ducklake_truncate.hpp b/src/include/storage/ducklake_truncate.hpp new file mode 100644 index 00000000000..010b2f7d58d --- /dev/null +++ b/src/include/storage/ducklake_truncate.hpp @@ -0,0 +1,35 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// storage/ducklake_truncate.hpp +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/execution/physical_operator.hpp" +#include "storage/ducklake_table_entry.hpp" + +namespace duckdb { + +class DuckLakeTruncate : public PhysicalOperator { +public: + DuckLakeTruncate(PhysicalPlan &physical_plan, DuckLakeTableEntry &table); + + DuckLakeTableEntry &table; + +public: + SourceResultType GetDataInternal(ExecutionContext &context, DataChunk &chunk, + OperatorSourceInput &input) const override; + + bool IsSource() const override { + return true; + } + + unique_ptr GetGlobalSourceState(ClientContext &context) const override; + + string GetName() const override; + InsertionOrderPreservingMap ParamsToString() const override; +}; + +} // namespace duckdb diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt index 41e6ac9ec7c..a5456812cfa 100644 --- a/src/storage/CMakeLists.txt +++ b/src/storage/CMakeLists.txt @@ -20,6 +20,7 @@ add_library( ducklake_storage.cpp ducklake_delete.cpp ducklake_deletion_vector.cpp + ducklake_truncate.cpp ducklake_multi_file_reader.cpp ducklake_partition_data.cpp ducklake_secret.cpp diff --git a/src/storage/ducklake_delete.cpp b/src/storage/ducklake_delete.cpp index 75b0e56d497..e84f0f983a2 100644 --- a/src/storage/ducklake_delete.cpp +++ b/src/storage/ducklake_delete.cpp @@ -694,7 +694,7 @@ PhysicalOperator &DuckLakeCatalog::PlanDelete(ClientContext &context, PhysicalPl row_id_indexes.push_back(bound_ref.index); } return DuckLakeDelete::PlanDelete(context, planner, op.table.Cast(), child_plan, - std::move(row_id_indexes), std::move(encryption_key)); + std::move(row_id_indexes), std::move(encryption_key), true); } } // namespace duckdb diff --git a/src/storage/ducklake_truncate.cpp b/src/storage/ducklake_truncate.cpp new file mode 100644 index 00000000000..b18262b97cf --- /dev/null +++ b/src/storage/ducklake_truncate.cpp @@ -0,0 +1,111 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// storage/ducklake_truncate.cpp +// +//===----------------------------------------------------------------------===// + +#include "storage/ducklake_truncate.hpp" +#include "storage/ducklake_catalog.hpp" +#include "storage/ducklake_multi_file_list.hpp" +#include "storage/ducklake_scan.hpp" +#include "storage/ducklake_transaction.hpp" +#include "duckdb/planner/operator/logical_delete.hpp" + +namespace duckdb { + +class DuckLakeTruncateGlobalState : public GlobalSourceState { +public: + bool finished = false; +}; + +DuckLakeTruncate::DuckLakeTruncate(PhysicalPlan &physical_plan, DuckLakeTableEntry &table) + : PhysicalOperator(physical_plan, PhysicalOperatorType::EXTENSION, {LogicalType::BIGINT}, 0), table(table) { +} + +unique_ptr DuckLakeTruncate::GetGlobalSourceState(ClientContext &context) const { + return make_uniq(); +} + +SourceResultType DuckLakeTruncate::GetDataInternal(ExecutionContext &context, DataChunk &chunk, + OperatorSourceInput &input) const { + auto &gstate = input.global_state.Cast(); + if (gstate.finished) { + return SourceResultType::FINISHED; + } + gstate.finished = true; + + auto &transaction = DuckLakeTransaction::Get(context.client, table.catalog); + DuckLakeFunctionInfo read_info(table, transaction, transaction.GetSnapshot()); + auto transaction_local_files = transaction.GetTransactionLocalFiles(table.GetTableId()); + auto transaction_local_data = transaction.GetTransactionLocalInlinedData(table.GetTableId()); + DuckLakeMultiFileList file_list(read_info, std::move(transaction_local_files), transaction_local_data); + + idx_t total_deleted_count = 0; + auto &metadata_manager = transaction.GetMetadataManager(); + auto snapshot = transaction.GetSnapshot(); + for (auto &inlined_table : table.GetInlinedDataTables()) { + auto inlined_count = metadata_manager.GetNetInlinedRowCount(inlined_table.table_name, snapshot); + total_deleted_count += inlined_count; + metadata_manager.DeleteInlinedData(inlined_table); + } + + auto files = file_list.GetFilesExtended(); + for (auto &file_info : files) { + if (file_info.data_type == DuckLakeDataType::INLINED_DATA) { + // handled via metadata manager + continue; + } + idx_t visible_rows = file_info.row_count; + if (file_info.delete_count <= visible_rows) { + visible_rows -= file_info.delete_count; + } + total_deleted_count += visible_rows; + + if (file_info.data_type == DuckLakeDataType::DATA_FILE) { + if (file_info.file_id.IsValid()) { + transaction.DropFile(table.GetTableId(), file_info.file_id, file_info.file.path); + } else { + transaction.DropTransactionLocalFile(table.GetTableId(), file_info.file.path); + } + continue; + } + + set deletes; + for (idx_t i = 0; i < file_info.row_count; i++) { + deletes.insert(i); + } + if (file_info.data_type == DuckLakeDataType::TRANSACTION_LOCAL_INLINED_DATA) { + transaction.DeleteFromLocalInlinedData(table.GetTableId(), std::move(deletes)); + } else { + transaction.AddNewInlinedDeletes(table.GetTableId(), file_info.file.path, std::move(deletes)); + } + } + + chunk.SetCardinality(1); + chunk.SetValue(0, 0, Value::BIGINT(NumericCast(total_deleted_count))); + return SourceResultType::FINISHED; +} + +string DuckLakeTruncate::GetName() const { + return "DUCKLAKE_TRUNCATE"; +} + +InsertionOrderPreservingMap DuckLakeTruncate::ParamsToString() const { + InsertionOrderPreservingMap result; + result["Table Name"] = table.name; + return result; +} + +PhysicalOperator &DuckLakeCatalog::PlanDelete(ClientContext &context, PhysicalPlanGenerator &planner, LogicalDelete &op) { + bool delete_all = !op.children.empty() && op.children[0]->type == LogicalOperatorType::LOGICAL_GET; + if (!delete_all) { + return Catalog::PlanDelete(context, planner, op); + } + if (op.return_chunk) { + throw BinderException("RETURNING clause not yet supported for deletion of a DuckLake table"); + } + return planner.Make(op.table.Cast()); +} + +} // namespace duckdb diff --git a/test/sql/delete/ducklake_delete_all_simple.test b/test/sql/delete/ducklake_delete_all_simple.test new file mode 100644 index 00000000000..bf9f0775450 --- /dev/null +++ b/test/sql/delete/ducklake_delete_all_simple.test @@ -0,0 +1,32 @@ +# name: test/sql/delete/ducklake_delete_all_simple.test +# description: simple delete-all on ducklake table + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_delete_all_simple') + +statement ok +CREATE TABLE ducklake.delete_all_test(i INTEGER); + +statement ok +INSERT INTO ducklake.delete_all_test FROM range(10); + +query I +SELECT COUNT(*) FROM ducklake.delete_all_test; +---- +10 + +statement ok +DELETE FROM ducklake.delete_all_test; + +query I +SELECT COUNT(*) FROM ducklake.delete_all_test; +---- +0 diff --git a/test/sql/delete/ducklake_truncate_simple.test b/test/sql/delete/ducklake_truncate_simple.test new file mode 100644 index 00000000000..01fb7f748b3 --- /dev/null +++ b/test/sql/delete/ducklake_truncate_simple.test @@ -0,0 +1,32 @@ +# name: test/sql/delete/ducklake_truncate_simple.test +# description: simple TRUNCATE on ducklake table + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_truncate_simple') + +statement ok +CREATE TABLE ducklake.truncate_test(i INTEGER); + +statement ok +INSERT INTO ducklake.truncate_test FROM range(10); + +query I +SELECT COUNT(*) FROM ducklake.truncate_test; +---- +10 + +statement ok +TRUNCATE ducklake.truncate_test; + +query I +SELECT COUNT(*) FROM ducklake.truncate_test; +---- +0 From cc066a7c8fff059c189799993ff89aead2b4e236 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Rafael?= Date: Tue, 10 Mar 2026 16:53:05 +0000 Subject: [PATCH 2/9] Assert DuckLake truncate plan in tests --- test/sql/delete/ducklake_delete_all_simple.test | 5 +++++ test/sql/delete/ducklake_truncate_simple.test | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/test/sql/delete/ducklake_delete_all_simple.test b/test/sql/delete/ducklake_delete_all_simple.test index bf9f0775450..ae834ebfda0 100644 --- a/test/sql/delete/ducklake_delete_all_simple.test +++ b/test/sql/delete/ducklake_delete_all_simple.test @@ -18,6 +18,11 @@ CREATE TABLE ducklake.delete_all_test(i INTEGER); statement ok INSERT INTO ducklake.delete_all_test FROM range(10); +query II +EXPLAIN DELETE FROM ducklake.delete_all_test; +---- +physical_plan :.*DUCKLAKE_TRUNCATE.* + query I SELECT COUNT(*) FROM ducklake.delete_all_test; ---- diff --git a/test/sql/delete/ducklake_truncate_simple.test b/test/sql/delete/ducklake_truncate_simple.test index 01fb7f748b3..cd9f808a37d 100644 --- a/test/sql/delete/ducklake_truncate_simple.test +++ b/test/sql/delete/ducklake_truncate_simple.test @@ -18,6 +18,11 @@ CREATE TABLE ducklake.truncate_test(i INTEGER); statement ok INSERT INTO ducklake.truncate_test FROM range(10); +query II +EXPLAIN TRUNCATE ducklake.truncate_test; +---- +physical_plan :.*DUCKLAKE_TRUNCATE.* + query I SELECT COUNT(*) FROM ducklake.truncate_test; ---- From aff193adce72a240e40325eba7242e4714adcbe4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Rafael?= Date: Tue, 10 Mar 2026 17:14:39 +0000 Subject: [PATCH 3/9] Assert delete count invariant in truncate --- src/storage/ducklake_truncate.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/storage/ducklake_truncate.cpp b/src/storage/ducklake_truncate.cpp index b18262b97cf..ef02d100ab6 100644 --- a/src/storage/ducklake_truncate.cpp +++ b/src/storage/ducklake_truncate.cpp @@ -56,10 +56,8 @@ SourceResultType DuckLakeTruncate::GetDataInternal(ExecutionContext &context, Da // handled via metadata manager continue; } - idx_t visible_rows = file_info.row_count; - if (file_info.delete_count <= visible_rows) { - visible_rows -= file_info.delete_count; - } + D_ASSERT(file_info.delete_count <= file_info.row_count); + idx_t visible_rows = file_info.row_count - file_info.delete_count; total_deleted_count += visible_rows; if (file_info.data_type == DuckLakeDataType::DATA_FILE) { From 3ce9204ff76a71269daa4abd09e8763516ab3075 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Rafael?= Date: Thu, 12 Mar 2026 17:48:00 +0000 Subject: [PATCH 4/9] Add more unit tests and cleanup implementation --- .../storage/ducklake_metadata_manager.hpp | 1 + src/include/storage/ducklake_transaction.hpp | 2 + src/storage/ducklake_metadata_manager.cpp | 23 ++++++-- src/storage/ducklake_multi_file_list.cpp | 3 ++ src/storage/ducklake_scan.cpp | 3 +- src/storage/ducklake_transaction.cpp | 21 ++++++++ src/storage/ducklake_truncate.cpp | 54 +++++++++---------- test/sql/delete/truncate_table_inlined.test | 50 +++++++++++++++++ .../delete/truncate_table_return_value.test | 36 +++++++++++++ test/sql/delete/truncate_table_rollback.test | 39 ++++++++++++++ .../delete/truncate_table_time_travel.test | 41 ++++++++++++++ .../truncate_table_transactionality.test | 44 +++++++++++++++ 12 files changed, 283 insertions(+), 34 deletions(-) create mode 100644 test/sql/delete/truncate_table_inlined.test create mode 100644 test/sql/delete/truncate_table_return_value.test create mode 100644 test/sql/delete/truncate_table_rollback.test create mode 100644 test/sql/delete/truncate_table_time_travel.test create mode 100644 test/sql/delete/truncate_table_transactionality.test diff --git a/src/include/storage/ducklake_metadata_manager.hpp b/src/include/storage/ducklake_metadata_manager.hpp index eb5980eb53b..a2be06be83e 100644 --- a/src/include/storage/ducklake_metadata_manager.hpp +++ b/src/include/storage/ducklake_metadata_manager.hpp @@ -227,6 +227,7 @@ class DuckLakeMetadataManager { virtual shared_ptr TransformInlinedData(QueryResult &result, const vector &expected_types); + virtual void MarkInlinedDataDeleted(DuckLakeSnapshot snapshot, const string &inlined_table_name); virtual void DeleteInlinedData(const DuckLakeInlinedTableInfo &inlined_table); //! We delete at the flush virtual void DeleteFlushedInlinedData(const DuckLakeInlinedTableInfo &inlined_table, idx_t flush_snapshot_id); diff --git a/src/include/storage/ducklake_transaction.hpp b/src/include/storage/ducklake_transaction.hpp index 41c47815057..675aabe52b5 100644 --- a/src/include/storage/ducklake_transaction.hpp +++ b/src/include/storage/ducklake_transaction.hpp @@ -206,6 +206,7 @@ class DuckLakeTransaction : public Transaction, public enable_shared_from_this collection); void AddNewInlinedDeletes(TableIndex table_id, const string &table_name, set new_deletes); void DeleteFromLocalInlinedData(TableIndex table_id, set new_deletes); + void TruncateLocalInlinedData(TableIndex table_id); void AddColumnToLocalInlinedData(TableIndex table_id, const LogicalType &new_column_type, FieldIndex new_field_index, const Value &default_value = Value()); void RemoveColumnFromLocalInlinedData(TableIndex table_id, LogicalIndex removed_column_index, @@ -226,6 +227,7 @@ class DuckLakeTransaction : public Transaction, public enable_shared_from_this &snapshots); + void MarkInlinedDataDeleted(const string &inlined_table_name); void DeleteInlinedData(const DuckLakeInlinedTableInfo &inlined_table); //! Delete inlined data rows with begin_snapshot <= flush_snapshot_id void DeleteFlushedInlinedData(const DuckLakeInlinedTableInfo &inlined_table, idx_t flush_snapshot_id); diff --git a/src/storage/ducklake_metadata_manager.cpp b/src/storage/ducklake_metadata_manager.cpp index 820f385afb3..5c52ebec01d 100644 --- a/src/storage/ducklake_metadata_manager.cpp +++ b/src/storage/ducklake_metadata_manager.cpp @@ -1726,10 +1726,10 @@ DuckLakeMetadataManager::GetExtendedFilesForTable(DuckLakeTableEntry &table, Duc // Add base query query += StringUtil::Format(R"( -SELECT data.data_file_id, del.delete_file_id, data.record_count, %s +SELECT data.data_file_id, del.delete_file_id, data.record_count, COALESCE(del.delete_count, 0), %s FROM {METADATA_CATALOG}.ducklake_data_file data LEFT JOIN ( - SELECT * + SELECT * FROM {METADATA_CATALOG}.ducklake_delete_file WHERE table_id=%d AND {SNAPSHOT_ID} >= begin_snapshot AND ({SNAPSHOT_ID} < end_snapshot OR end_snapshot IS NULL) @@ -1755,7 +1755,8 @@ WHERE data.table_id=%d AND {SNAPSHOT_ID} >= data.begin_snapshot AND ({SNAPSHOT_I file_entry.delete_file_id = DataFileIndex(row.GetValue(1)); } file_entry.row_count = row.GetValue(2); - idx_t col_idx = 3; + file_entry.delete_count = row.GetValue(3); + idx_t col_idx = 4; file_entry.file = ReadDataFile(table, row, col_idx, IsEncrypted()); if (!row.IsNull(col_idx)) { file_entry.row_id_start = row.GetValue(col_idx); @@ -4410,8 +4411,8 @@ WHERE end_snapshot IS NOT NULL AND NOT EXISTS( void DuckLakeMetadataManager::DeleteInlinedData(const DuckLakeInlinedTableInfo &inlined_table) { auto result = transaction.Query(StringUtil::Format(R"( - DELETE FROM {METADATA_CATALOG}.%s -)", + DELETE FROM {METADATA_CATALOG}.%s + )", SQLIdentifier(inlined_table.table_name))); if (result->HasError()) { result->GetErrorObject().Throw("Failed to delete inlined data in DuckLake from table " + @@ -4441,6 +4442,18 @@ DuckLakeMetadataManager::GenerateDeleteFlushedInlinedData(const vectorHasError()) { + result->GetErrorObject().Throw("Failed to mark inlined data as deleted in DuckLake from table " + + inlined_table_name + ": "); + } +} string DuckLakeMetadataManager::InsertNewSchema(const DuckLakeSnapshot &snapshot, const set &table_ids) { if (table_ids.empty()) { return {}; diff --git a/src/storage/ducklake_multi_file_list.cpp b/src/storage/ducklake_multi_file_list.cpp index 256b036cc9e..860cef2e3c6 100644 --- a/src/storage/ducklake_multi_file_list.cpp +++ b/src/storage/ducklake_multi_file_list.cpp @@ -279,6 +279,9 @@ vector DuckLakeMultiFileList::GetFilesExtended() file_entry.file_id = DataFileIndex(); file_entry.delete_file_id = DataFileIndex(); file_entry.row_count = file.row_count; + if (!file.delete_files.empty()) { + file_entry.delete_count = file.delete_files.back().delete_count; + } file_entry.file = GetFileData(file); file_entry.delete_file = GetDeleteData(file); file_entry.row_id_start = transaction_row_start; diff --git a/src/storage/ducklake_scan.cpp b/src/storage/ducklake_scan.cpp index f329e15ebc3..982f991e157 100644 --- a/src/storage/ducklake_scan.cpp +++ b/src/storage/ducklake_scan.cpp @@ -213,7 +213,8 @@ TableFunction DuckLakeFunctions::GetDuckLakeScanFunction(DatabaseInstance &insta DuckLakeFunctionInfo::DuckLakeFunctionInfo(DuckLakeTableEntry &table, DuckLakeTransaction &transaction_p, DuckLakeSnapshot snapshot) - : table(table), transaction(transaction_p.shared_from_this()), snapshot(snapshot) { + : table(table), transaction(transaction_p.shared_from_this()), table_name(table.name), snapshot(snapshot), + table_id(table.GetTableId()) { } shared_ptr diff --git a/src/storage/ducklake_transaction.cpp b/src/storage/ducklake_transaction.cpp index ea038737910..9b80402e694 100644 --- a/src/storage/ducklake_transaction.cpp +++ b/src/storage/ducklake_transaction.cpp @@ -2560,6 +2560,11 @@ void DuckLakeTransaction::DeleteSnapshots(const vector &sn metadata_manager.DeleteSnapshots(snapshots); } +void DuckLakeTransaction::MarkInlinedDataDeleted(const string &inlined_table_name) { + auto &metadata_manager = GetMetadataManager(); + metadata_manager.MarkInlinedDataDeleted(GetSnapshot(), inlined_table_name); +} + void DuckLakeTransaction::DeleteInlinedData(const DuckLakeInlinedTableInfo &inlined_table) { auto &metadata_manager = GetMetadataManager(); metadata_manager.DeleteInlinedData(inlined_table); @@ -2712,6 +2717,22 @@ void DuckLakeTransaction::DeleteFromLocalInlinedData(TableIndex table_id, set guard(table_data_changes_lock); + auto entry = table_data_changes.find(table_id); + if (entry == table_data_changes.end()) { + throw InternalException("TruncateLocalInlinedData called but no transaction-local data exists for table"); + } + auto &table_changes = entry->second; + if (!table_changes.new_inlined_data) { + throw InternalException("TruncateLocalInlinedData called but no inlined data exists"); + } + table_changes.new_inlined_data.reset(); + if (table_changes.IsEmpty()) { + table_data_changes.erase(entry); + } +} + void DuckLakeTransaction::AddColumnToLocalInlinedData(TableIndex table_id, const LogicalType &new_column_type, FieldIndex new_field_index, const Value &default_value) { auto context_ref = context.lock(); diff --git a/src/storage/ducklake_truncate.cpp b/src/storage/ducklake_truncate.cpp index ef02d100ab6..c9062d83792 100644 --- a/src/storage/ducklake_truncate.cpp +++ b/src/storage/ducklake_truncate.cpp @@ -11,6 +11,7 @@ #include "storage/ducklake_scan.hpp" #include "storage/ducklake_transaction.hpp" #include "duckdb/planner/operator/logical_delete.hpp" +#include "duckdb/planner/operator/logical_get.hpp" namespace duckdb { @@ -20,7 +21,7 @@ class DuckLakeTruncateGlobalState : public GlobalSourceState { }; DuckLakeTruncate::DuckLakeTruncate(PhysicalPlan &physical_plan, DuckLakeTableEntry &table) - : PhysicalOperator(physical_plan, PhysicalOperatorType::EXTENSION, {LogicalType::BIGINT}, 0), table(table) { + : PhysicalOperator(physical_plan, PhysicalOperatorType::EXTENSION, {LogicalType::UBIGINT}, 0), table(table) { } unique_ptr DuckLakeTruncate::GetGlobalSourceState(ClientContext &context) const { @@ -41,47 +42,40 @@ SourceResultType DuckLakeTruncate::GetDataInternal(ExecutionContext &context, Da auto transaction_local_data = transaction.GetTransactionLocalInlinedData(table.GetTableId()); DuckLakeMultiFileList file_list(read_info, std::move(transaction_local_files), transaction_local_data); - idx_t total_deleted_count = 0; auto &metadata_manager = transaction.GetMetadataManager(); auto snapshot = transaction.GetSnapshot(); - for (auto &inlined_table : table.GetInlinedDataTables()) { - auto inlined_count = metadata_manager.GetNetInlinedRowCount(inlined_table.table_name, snapshot); - total_deleted_count += inlined_count; - metadata_manager.DeleteInlinedData(inlined_table); - } - + uint64_t total_deleted_count = 0; auto files = file_list.GetFilesExtended(); for (auto &file_info : files) { - if (file_info.data_type == DuckLakeDataType::INLINED_DATA) { - // handled via metadata manager - continue; - } - D_ASSERT(file_info.delete_count <= file_info.row_count); - idx_t visible_rows = file_info.row_count - file_info.delete_count; - total_deleted_count += visible_rows; - - if (file_info.data_type == DuckLakeDataType::DATA_FILE) { + switch (file_info.data_type) { + case DuckLakeDataType::DATA_FILE: { + D_ASSERT(file_info.delete_count <= file_info.row_count); + total_deleted_count += file_info.row_count - file_info.delete_count; if (file_info.file_id.IsValid()) { transaction.DropFile(table.GetTableId(), file_info.file_id, file_info.file.path); } else { transaction.DropTransactionLocalFile(table.GetTableId(), file_info.file.path); } - continue; + break; } - - set deletes; - for (idx_t i = 0; i < file_info.row_count; i++) { - deletes.insert(i); + case DuckLakeDataType::INLINED_DATA: { + auto inlined_count = metadata_manager.GetNetInlinedRowCount(file_info.file.path, snapshot); + total_deleted_count += inlined_count; + transaction.MarkInlinedDataDeleted(file_info.file.path); + break; + } + case DuckLakeDataType::TRANSACTION_LOCAL_INLINED_DATA: { + total_deleted_count += file_info.row_count; + transaction.TruncateLocalInlinedData(table.GetTableId()); + break; } - if (file_info.data_type == DuckLakeDataType::TRANSACTION_LOCAL_INLINED_DATA) { - transaction.DeleteFromLocalInlinedData(table.GetTableId(), std::move(deletes)); - } else { - transaction.AddNewInlinedDeletes(table.GetTableId(), file_info.file.path, std::move(deletes)); + default: + throw InternalException("Unsupported DuckLakeDataType in truncate"); } } chunk.SetCardinality(1); - chunk.SetValue(0, 0, Value::BIGINT(NumericCast(total_deleted_count))); + chunk.SetValue(0, 0, Value::UBIGINT(total_deleted_count)); return SourceResultType::FINISHED; } @@ -96,7 +90,11 @@ InsertionOrderPreservingMap DuckLakeTruncate::ParamsToString() const { } PhysicalOperator &DuckLakeCatalog::PlanDelete(ClientContext &context, PhysicalPlanGenerator &planner, LogicalDelete &op) { - bool delete_all = !op.children.empty() && op.children[0]->type == LogicalOperatorType::LOGICAL_GET; + bool delete_all = false; + if (op.children.size() == 1 && op.children[0]->type == LogicalOperatorType::LOGICAL_GET) { + auto &get = op.children[0]->Cast(); + delete_all = get.table_filters.filters.empty(); + } if (!delete_all) { return Catalog::PlanDelete(context, planner, op); } diff --git a/test/sql/delete/truncate_table_inlined.test b/test/sql/delete/truncate_table_inlined.test new file mode 100644 index 00000000000..12b4fed1654 --- /dev/null +++ b/test/sql/delete/truncate_table_inlined.test @@ -0,0 +1,50 @@ +# name: test/sql/delete/truncate_table_inlined.test +# description: Verify TRUNCATE works with inlined data +# group: [delete] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_truncate_inlined_files') + +statement ok +CALL ducklake.set_option('data_inlining_row_limit', 1000); + +statement ok +CREATE TABLE ducklake.test_inline AS SELECT i id FROM range(8) t(i); + +query I +TRUNCATE ducklake.test_inline +---- +8 + +query I +SELECT COUNT(*) FROM ducklake.test_inline +---- +0 + +statement ok +INSERT INTO ducklake.test_inline VALUES (1), (2), (3); + +statement ok +BEGIN + +query I +TRUNCATE ducklake.test_inline +---- +3 + +statement ok +ROLLBACK + +query I +SELECT COUNT(*) FROM ducklake.test_inline +---- +3 diff --git a/test/sql/delete/truncate_table_return_value.test b/test/sql/delete/truncate_table_return_value.test new file mode 100644 index 00000000000..4b4908b8505 --- /dev/null +++ b/test/sql/delete/truncate_table_return_value.test @@ -0,0 +1,36 @@ +# name: test/sql/delete/truncate_table_return_value.test +# description: Verify TRUNCATE return value including tables with existing deletes +# group: [delete] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_truncate_return_value_files') + +statement ok +CREATE TABLE ducklake.return_value_test AS SELECT i id FROM range(10000) t(i); + +statement ok +DELETE FROM ducklake.return_value_test WHERE id%8=0 + +query II +SELECT COUNT(*), SUM(id) FROM ducklake.return_value_test +---- +8750 43750000 + +query I +TRUNCATE ducklake.return_value_test +---- +8750 + +query II +SELECT COUNT(*), SUM(id) FROM ducklake.return_value_test +---- +0 NULL diff --git a/test/sql/delete/truncate_table_rollback.test b/test/sql/delete/truncate_table_rollback.test new file mode 100644 index 00000000000..574d7b06ca7 --- /dev/null +++ b/test/sql/delete/truncate_table_rollback.test @@ -0,0 +1,39 @@ +# name: test/sql/delete/truncate_table_rollback.test +# description: Verify TRUNCATE rollback behavior +# group: [delete] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_truncate_rollback_files') + +statement ok +CREATE TABLE ducklake.rollback_test AS SELECT i id FROM range(100) t(i); + +statement ok +BEGIN + +query I +TRUNCATE ducklake.rollback_test +---- +100 + +query II +SELECT COUNT(*), SUM(id) FROM ducklake.rollback_test +---- +0 NULL + +statement ok +ROLLBACK + +query II +SELECT COUNT(*), SUM(id) FROM ducklake.rollback_test +---- +100 4950 diff --git a/test/sql/delete/truncate_table_time_travel.test b/test/sql/delete/truncate_table_time_travel.test new file mode 100644 index 00000000000..765e3426393 --- /dev/null +++ b/test/sql/delete/truncate_table_time_travel.test @@ -0,0 +1,41 @@ +# name: test/sql/delete/truncate_table_time_travel.test +# description: Verify time travel works correctly across TRUNCATE +# group: [delete] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_truncate_time_travel_files', DATA_INLINING_ROW_LIMIT 0) + +statement ok +CREATE TABLE ducklake.time_travel_test(id INTEGER); + +statement ok +INSERT INTO ducklake.time_travel_test VALUES (1), (2), (3); + +statement ok +SET VARIABLE pre_truncate_snapshot = (SELECT max(snapshot_id) FROM ducklake.snapshots()); + +query I +TRUNCATE ducklake.time_travel_test +---- +3 + +query I +SELECT COUNT(*) FROM ducklake.time_travel_test +---- +0 + +query I +SELECT id FROM ducklake.time_travel_test AT (VERSION => getvariable('pre_truncate_snapshot')) ORDER BY ALL +---- +1 +2 +3 diff --git a/test/sql/delete/truncate_table_transactionality.test b/test/sql/delete/truncate_table_transactionality.test new file mode 100644 index 00000000000..ca8193b8e4b --- /dev/null +++ b/test/sql/delete/truncate_table_transactionality.test @@ -0,0 +1,44 @@ +# name: test/sql/delete/truncate_table_transactionality.test +# description: Verify uncommitted TRUNCATE is not visible to other connections +# group: [delete] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_truncate_transactionality_files') + +statement ok +CREATE TABLE ducklake.txn_test AS SELECT i id FROM range(25) t(i); + +statement ok con1 +BEGIN + +query I con1 +TRUNCATE ducklake.txn_test +---- +25 + +query II con1 +SELECT COUNT(*), SUM(id) FROM ducklake.txn_test +---- +0 NULL + +query II con2 +SELECT COUNT(*), SUM(id) FROM ducklake.txn_test +---- +25 300 + +statement ok con1 +COMMIT + +query II con2 +SELECT COUNT(*), SUM(id) FROM ducklake.txn_test +---- +0 NULL From c591189bb630f8d11aa1985bc4adc6fcffd07bb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Rafael?= Date: Thu, 12 Mar 2026 18:05:44 +0000 Subject: [PATCH 5/9] Cleanups --- src/include/storage/ducklake_delete.hpp | 9 --------- src/storage/ducklake_metadata_manager.cpp | 2 +- src/storage/ducklake_multi_file_list.cpp | 1 + 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/src/include/storage/ducklake_delete.hpp b/src/include/storage/ducklake_delete.hpp index 6cd7647ca12..99e06774e86 100644 --- a/src/include/storage/ducklake_delete.hpp +++ b/src/include/storage/ducklake_delete.hpp @@ -125,15 +125,6 @@ struct DuckLakeDeleteMap { delete_data_map.emplace(filename, std::move(delete_data)); } - vector GetAllFileInfos() const { - vector result; - result.reserve(file_map.size()); - for (auto &entry : file_map) { - result.push_back(entry.second); - } - return result; - } - private: mutex lock; unordered_map file_map; diff --git a/src/storage/ducklake_metadata_manager.cpp b/src/storage/ducklake_metadata_manager.cpp index 5c52ebec01d..d7ca2f5bb77 100644 --- a/src/storage/ducklake_metadata_manager.cpp +++ b/src/storage/ducklake_metadata_manager.cpp @@ -1729,7 +1729,7 @@ DuckLakeMetadataManager::GetExtendedFilesForTable(DuckLakeTableEntry &table, Duc SELECT data.data_file_id, del.delete_file_id, data.record_count, COALESCE(del.delete_count, 0), %s FROM {METADATA_CATALOG}.ducklake_data_file data LEFT JOIN ( - SELECT * + SELECT * FROM {METADATA_CATALOG}.ducklake_delete_file WHERE table_id=%d AND {SNAPSHOT_ID} >= begin_snapshot AND ({SNAPSHOT_ID} < end_snapshot OR end_snapshot IS NULL) diff --git a/src/storage/ducklake_multi_file_list.cpp b/src/storage/ducklake_multi_file_list.cpp index 860cef2e3c6..d6c464d0161 100644 --- a/src/storage/ducklake_multi_file_list.cpp +++ b/src/storage/ducklake_multi_file_list.cpp @@ -280,6 +280,7 @@ vector DuckLakeMultiFileList::GetFilesExtended() file_entry.delete_file_id = DataFileIndex(); file_entry.row_count = file.row_count; if (!file.delete_files.empty()) { + D_ASSERT(file.delete_files.size() == 1); file_entry.delete_count = file.delete_files.back().delete_count; } file_entry.file = GetFileData(file); From 97e22406ce732bc0a71847018b866c676cb0d02e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Rafael?= Date: Thu, 9 Apr 2026 16:28:12 +0000 Subject: [PATCH 6/9] Fix truncate local inlined data after rebase --- src/include/storage/ducklake_transaction.hpp | 1 + src/storage/ducklake_transaction.cpp | 30 +++++++++++--------- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/include/storage/ducklake_transaction.hpp b/src/include/storage/ducklake_transaction.hpp index 675aabe52b5..65d4f9be51b 100644 --- a/src/include/storage/ducklake_transaction.hpp +++ b/src/include/storage/ducklake_transaction.hpp @@ -71,6 +71,7 @@ struct LocalTableChanges { void AppendInlinedData(ClientContext &context, TableIndex table_id, unique_ptr new_data); void AddNewInlinedDeletes(TableIndex table_id, const string &table_name, set new_deletes); void DeleteFromLocalInlinedData(ClientContext &context, TableIndex table_id, set new_deletes); + void TruncateLocalInlinedData(TableIndex table_id); void AddColumnToLocalInlinedData(ClientContext &context, TableIndex table_id, const LogicalType &new_column_type, FieldIndex new_field_index, const Value &default_value); void RemoveColumnFromLocalInlinedData(ClientContext &context, TableIndex table_id, diff --git a/src/storage/ducklake_transaction.cpp b/src/storage/ducklake_transaction.cpp index 9b80402e694..1d2678709f2 100644 --- a/src/storage/ducklake_transaction.cpp +++ b/src/storage/ducklake_transaction.cpp @@ -290,6 +290,22 @@ void LocalTableChanges::DeleteFromLocalInlinedData(ClientContext &context, Table inlined_data.row_ids = std::move(new_row_ids); } +void LocalTableChanges::TruncateLocalInlinedData(TableIndex table_id) { + lock_guard guard(lock); + auto entry = changes.find(table_id); + if (entry == changes.end()) { + throw InternalException("TruncateLocalInlinedData called but no transaction-local data exists for table"); + } + auto &table_changes = entry->second; + if (!table_changes.new_inlined_data) { + throw InternalException("TruncateLocalInlinedData called but no inlined data exists"); + } + table_changes.new_inlined_data.reset(); + if (table_changes.IsEmpty()) { + changes.erase(entry); + } +} + static void RemoveFieldStats(map &column_stats, const DuckLakeFieldId &field_id) { column_stats.erase(field_id.GetFieldIndex()); for (auto &child_id : field_id.Children()) { @@ -2718,19 +2734,7 @@ void DuckLakeTransaction::DeleteFromLocalInlinedData(TableIndex table_id, set guard(table_data_changes_lock); - auto entry = table_data_changes.find(table_id); - if (entry == table_data_changes.end()) { - throw InternalException("TruncateLocalInlinedData called but no transaction-local data exists for table"); - } - auto &table_changes = entry->second; - if (!table_changes.new_inlined_data) { - throw InternalException("TruncateLocalInlinedData called but no inlined data exists"); - } - table_changes.new_inlined_data.reset(); - if (table_changes.IsEmpty()) { - table_data_changes.erase(entry); - } + local_changes.TruncateLocalInlinedData(table_id); } void DuckLakeTransaction::AddColumnToLocalInlinedData(TableIndex table_id, const LogicalType &new_column_type, From afb91b078515f29ba090ca0b389e4a6012ba1531 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Rafael?= Date: Thu, 9 Apr 2026 20:42:04 +0000 Subject: [PATCH 7/9] Fix truncate row count on repeated deletes --- src/storage/ducklake_truncate.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/storage/ducklake_truncate.cpp b/src/storage/ducklake_truncate.cpp index c9062d83792..e973ca892bf 100644 --- a/src/storage/ducklake_truncate.cpp +++ b/src/storage/ducklake_truncate.cpp @@ -42,15 +42,11 @@ SourceResultType DuckLakeTruncate::GetDataInternal(ExecutionContext &context, Da auto transaction_local_data = transaction.GetTransactionLocalInlinedData(table.GetTableId()); DuckLakeMultiFileList file_list(read_info, std::move(transaction_local_files), transaction_local_data); - auto &metadata_manager = transaction.GetMetadataManager(); - auto snapshot = transaction.GetSnapshot(); - uint64_t total_deleted_count = 0; + uint64_t total_deleted_count = table.GetNetDataFileRowCount(transaction) + table.GetNetInlinedRowCount(transaction); auto files = file_list.GetFilesExtended(); for (auto &file_info : files) { switch (file_info.data_type) { case DuckLakeDataType::DATA_FILE: { - D_ASSERT(file_info.delete_count <= file_info.row_count); - total_deleted_count += file_info.row_count - file_info.delete_count; if (file_info.file_id.IsValid()) { transaction.DropFile(table.GetTableId(), file_info.file_id, file_info.file.path); } else { @@ -59,13 +55,10 @@ SourceResultType DuckLakeTruncate::GetDataInternal(ExecutionContext &context, Da break; } case DuckLakeDataType::INLINED_DATA: { - auto inlined_count = metadata_manager.GetNetInlinedRowCount(file_info.file.path, snapshot); - total_deleted_count += inlined_count; transaction.MarkInlinedDataDeleted(file_info.file.path); break; } case DuckLakeDataType::TRANSACTION_LOCAL_INLINED_DATA: { - total_deleted_count += file_info.row_count; transaction.TruncateLocalInlinedData(table.GetTableId()); break; } @@ -98,10 +91,15 @@ PhysicalOperator &DuckLakeCatalog::PlanDelete(ClientContext &context, PhysicalPl if (!delete_all) { return Catalog::PlanDelete(context, planner, op); } + auto &table = op.table.Cast(); + auto &transaction = DuckLakeTransaction::Get(context, *this); + if (transaction.HasAnyLocalChanges(table.GetTableId())) { + return Catalog::PlanDelete(context, planner, op); + } if (op.return_chunk) { throw BinderException("RETURNING clause not yet supported for deletion of a DuckLake table"); } - return planner.Make(op.table.Cast()); + return planner.Make(table); } } // namespace duckdb From f7f6a8188aa080f9f3e8ed348460efabe411a8ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Rafael?= Date: Fri, 10 Apr 2026 08:38:39 +0000 Subject: [PATCH 8/9] Fix delete history and diagnostics after rebase --- src/storage/ducklake_metadata_manager.cpp | 10 ++++------ src/storage/ducklake_multi_file_list.cpp | 4 ---- src/storage/ducklake_truncate.cpp | 18 +++++++++++++++++- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/src/storage/ducklake_metadata_manager.cpp b/src/storage/ducklake_metadata_manager.cpp index d7ca2f5bb77..836fec950eb 100644 --- a/src/storage/ducklake_metadata_manager.cpp +++ b/src/storage/ducklake_metadata_manager.cpp @@ -1726,7 +1726,7 @@ DuckLakeMetadataManager::GetExtendedFilesForTable(DuckLakeTableEntry &table, Duc // Add base query query += StringUtil::Format(R"( -SELECT data.data_file_id, del.delete_file_id, data.record_count, COALESCE(del.delete_count, 0), %s +SELECT data.data_file_id, del.delete_file_id, data.record_count, %s FROM {METADATA_CATALOG}.ducklake_data_file data LEFT JOIN ( SELECT * @@ -1755,8 +1755,7 @@ WHERE data.table_id=%d AND {SNAPSHOT_ID} >= data.begin_snapshot AND ({SNAPSHOT_I file_entry.delete_file_id = DataFileIndex(row.GetValue(1)); } file_entry.row_count = row.GetValue(2); - file_entry.delete_count = row.GetValue(3); - idx_t col_idx = 4; + idx_t col_idx = 3; file_entry.file = ReadDataFile(table, row, col_idx, IsEncrypted()); if (!row.IsNull(col_idx)) { file_entry.row_id_start = row.GetValue(col_idx); @@ -1796,7 +1795,7 @@ vector DuckLakeMetadataManager::GetFilesForCompacti string deletion_threshold_clause; if (type == CompactionType::REWRITE_DELETES) { // Filter current data files in SQL, then apply the delete threshold in C++ so we can include - // metadata-only inlined file deletions as rewrite candidates + // metadata-only inlined file deletions as rewrite candidates. deletion_threshold_clause = " AND data.end_snapshot is null"; } // Add file size filtering for MERGE_ADJACENT_TABLES compaction @@ -1901,8 +1900,7 @@ ORDER BY data.begin_snapshot, data.row_id_start, data.data_file_id, del.begin_sn delete_file.data = ReadDeleteFile(table, row, col_idx, IsEncrypted()); file_entry.delete_files.push_back(std::move(delete_file)); } - - // Load inlined deletions for active files so rewrite compaction can treat them the same as delete files + // Load inlined deletions for active files so rewrite compaction can treat them the same as delete files. auto inlined_deletions = ReadInlinedFileDeletions(table_id, snapshot); for (auto &file : files) { auto entry = inlined_deletions.find(file.file.id.index); diff --git a/src/storage/ducklake_multi_file_list.cpp b/src/storage/ducklake_multi_file_list.cpp index d6c464d0161..256b036cc9e 100644 --- a/src/storage/ducklake_multi_file_list.cpp +++ b/src/storage/ducklake_multi_file_list.cpp @@ -279,10 +279,6 @@ vector DuckLakeMultiFileList::GetFilesExtended() file_entry.file_id = DataFileIndex(); file_entry.delete_file_id = DataFileIndex(); file_entry.row_count = file.row_count; - if (!file.delete_files.empty()) { - D_ASSERT(file.delete_files.size() == 1); - file_entry.delete_count = file.delete_files.back().delete_count; - } file_entry.file = GetFileData(file); file_entry.delete_file = GetDeleteData(file); file_entry.row_id_start = transaction_row_start; diff --git a/src/storage/ducklake_truncate.cpp b/src/storage/ducklake_truncate.cpp index e973ca892bf..4a742a426aa 100644 --- a/src/storage/ducklake_truncate.cpp +++ b/src/storage/ducklake_truncate.cpp @@ -20,6 +20,21 @@ class DuckLakeTruncateGlobalState : public GlobalSourceState { bool finished = false; }; +static set GetVisibleInlinedRowIds(DuckLakeTransaction &transaction, DuckLakeSnapshot snapshot, + const string &inlined_table_name) { + auto &metadata_manager = transaction.GetMetadataManager(); + auto query_result = metadata_manager.ReadInlinedData(snapshot, inlined_table_name, {"row_id"}); + if (query_result->HasError()) { + query_result->GetErrorObject().Throw("Failed to read inlined row ids during DuckLake truncate: "); + } + + set row_ids; + for (auto &row : *query_result) { + row_ids.insert(row.GetValue(0)); + } + return row_ids; +} + DuckLakeTruncate::DuckLakeTruncate(PhysicalPlan &physical_plan, DuckLakeTableEntry &table) : PhysicalOperator(physical_plan, PhysicalOperatorType::EXTENSION, {LogicalType::UBIGINT}, 0), table(table) { } @@ -55,7 +70,8 @@ SourceResultType DuckLakeTruncate::GetDataInternal(ExecutionContext &context, Da break; } case DuckLakeDataType::INLINED_DATA: { - transaction.MarkInlinedDataDeleted(file_info.file.path); + auto row_ids = GetVisibleInlinedRowIds(transaction, read_info.snapshot, file_info.file.path); + transaction.AddNewInlinedDeletes(table.GetTableId(), file_info.file.path, std::move(row_ids)); break; } case DuckLakeDataType::TRANSACTION_LOCAL_INLINED_DATA: { From 4d1b1552d3c18eaa4a22900e86ef62335e98e01d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Rafael?= Date: Fri, 10 Apr 2026 09:52:38 +0000 Subject: [PATCH 9/9] Avoid iterating rows for inlined truncate --- .../storage/ducklake_delete_filter.hpp | 1 + src/include/storage/ducklake_inlined_data.hpp | 1 + .../storage/ducklake_metadata_info.hpp | 1 + .../storage/ducklake_metadata_manager.hpp | 1 - src/include/storage/ducklake_transaction.hpp | 7 ++-- src/storage/ducklake_delete_filter.cpp | 11 ++++++ src/storage/ducklake_inlined_data_reader.cpp | 30 ++++++++-------- src/storage/ducklake_metadata_manager.cpp | 30 ++++++++-------- src/storage/ducklake_transaction.cpp | 35 ++++++++++++------- src/storage/ducklake_truncate.cpp | 18 +--------- 10 files changed, 71 insertions(+), 64 deletions(-) diff --git a/src/include/storage/ducklake_delete_filter.hpp b/src/include/storage/ducklake_delete_filter.hpp index 71a84dc5b55..afa4951d7ff 100644 --- a/src/include/storage/ducklake_delete_filter.hpp +++ b/src/include/storage/ducklake_delete_filter.hpp @@ -16,6 +16,7 @@ namespace duckdb { struct DuckLakeDeleteData { vector deleted_rows; vector snapshot_ids; + bool delete_all = false; //! For deletion scans: mapping from row_id to snapshot_id for rows that were deleted //! If scan_snapshot_map_uses_row_id is true, this is indexed by global row_id (from _ducklake_internal_row_id) //! Otherwise, it's indexed by file position diff --git a/src/include/storage/ducklake_inlined_data.hpp b/src/include/storage/ducklake_inlined_data.hpp index 0ce107fcdc8..4bc1d8eb15b 100644 --- a/src/include/storage/ducklake_inlined_data.hpp +++ b/src/include/storage/ducklake_inlined_data.hpp @@ -31,6 +31,7 @@ struct DuckLakeInlinedData { struct DuckLakeInlinedDataDeletes { set rows; + bool delete_all = false; }; //! Stores inlined file deletions for a table diff --git a/src/include/storage/ducklake_metadata_info.hpp b/src/include/storage/ducklake_metadata_info.hpp index 13c54b09a7f..d9e3f6bdd88 100644 --- a/src/include/storage/ducklake_metadata_info.hpp +++ b/src/include/storage/ducklake_metadata_info.hpp @@ -172,6 +172,7 @@ struct DuckLakeDeletedInlinedDataInfo { TableIndex table_id; string table_name; vector deleted_row_ids; + bool delete_all = false; }; //! Info for all inlined file deletions for a single table diff --git a/src/include/storage/ducklake_metadata_manager.hpp b/src/include/storage/ducklake_metadata_manager.hpp index a2be06be83e..eb5980eb53b 100644 --- a/src/include/storage/ducklake_metadata_manager.hpp +++ b/src/include/storage/ducklake_metadata_manager.hpp @@ -227,7 +227,6 @@ class DuckLakeMetadataManager { virtual shared_ptr TransformInlinedData(QueryResult &result, const vector &expected_types); - virtual void MarkInlinedDataDeleted(DuckLakeSnapshot snapshot, const string &inlined_table_name); virtual void DeleteInlinedData(const DuckLakeInlinedTableInfo &inlined_table); //! We delete at the flush virtual void DeleteFlushedInlinedData(const DuckLakeInlinedTableInfo &inlined_table, idx_t flush_snapshot_id); diff --git a/src/include/storage/ducklake_transaction.hpp b/src/include/storage/ducklake_transaction.hpp index 65d4f9be51b..5b8a104d4d7 100644 --- a/src/include/storage/ducklake_transaction.hpp +++ b/src/include/storage/ducklake_transaction.hpp @@ -69,7 +69,8 @@ struct LocalTableChanges { void DropTransactionLocalFile(ClientContext &context, TableIndex table_id, const string &path); void AppendFiles(TableIndex table_id, vector files); void AppendInlinedData(ClientContext &context, TableIndex table_id, unique_ptr new_data); - void AddNewInlinedDeletes(TableIndex table_id, const string &table_name, set new_deletes); + void AddNewInlinedDeletes(TableIndex table_id, const string &table_name, set new_deletes, + bool delete_all = false); void DeleteFromLocalInlinedData(ClientContext &context, TableIndex table_id, set new_deletes); void TruncateLocalInlinedData(TableIndex table_id); void AddColumnToLocalInlinedData(ClientContext &context, TableIndex table_id, const LogicalType &new_column_type, @@ -205,7 +206,8 @@ class DuckLakeTransaction : public Transaction, public enable_shared_from_this collection); - void AddNewInlinedDeletes(TableIndex table_id, const string &table_name, set new_deletes); + void AddNewInlinedDeletes(TableIndex table_id, const string &table_name, set new_deletes, + bool delete_all = false); void DeleteFromLocalInlinedData(TableIndex table_id, set new_deletes); void TruncateLocalInlinedData(TableIndex table_id); void AddColumnToLocalInlinedData(TableIndex table_id, const LogicalType &new_column_type, @@ -228,7 +230,6 @@ class DuckLakeTransaction : public Transaction, public enable_shared_from_this &snapshots); - void MarkInlinedDataDeleted(const string &inlined_table_name); void DeleteInlinedData(const DuckLakeInlinedTableInfo &inlined_table); //! Delete inlined data rows with begin_snapshot <= flush_snapshot_id void DeleteFlushedInlinedData(const DuckLakeInlinedTableInfo &inlined_table, idx_t flush_snapshot_id); diff --git a/src/storage/ducklake_delete_filter.cpp b/src/storage/ducklake_delete_filter.cpp index e7c9add822f..750703e581c 100644 --- a/src/storage/ducklake_delete_filter.cpp +++ b/src/storage/ducklake_delete_filter.cpp @@ -52,6 +52,9 @@ DuckLakeDeleteFilter::DuckLakeDeleteFilter() : delete_data(make_shared_ptrdelete_all = false; delete_data->deleted_rows = std::move(scan_result.deleted_rows); delete_data->snapshot_ids = std::move(scan_result.snapshot_ids); } void DuckLakeDeleteFilter::Initialize(const DuckLakeInlinedDataDeletes &inlined_deletes) { + delete_data->delete_all = false; + if (inlined_deletes.delete_all) { + delete_data->delete_all = true; + delete_data->deleted_rows.clear(); + delete_data->snapshot_ids.clear(); + return; + } D_ASSERT(std::is_sorted(delete_data->deleted_rows.begin(), delete_data->deleted_rows.end())); auto mid_idx = delete_data->deleted_rows.size(); for (auto &idx : inlined_deletes.rows) { diff --git a/src/storage/ducklake_inlined_data_reader.cpp b/src/storage/ducklake_inlined_data_reader.cpp index ce2e3bcbf45..5a8a947836f 100644 --- a/src/storage/ducklake_inlined_data_reader.cpp +++ b/src/storage/ducklake_inlined_data_reader.cpp @@ -131,23 +131,25 @@ bool DuckLakeInlinedDataReader::TryInitializeScan(ClientContext &context, Global if (deletion_filter) { // map the deleted row-ids to the deleted ordinals to obtain the correct deleted rows auto &filter = reinterpret_cast(*deletion_filter); - vector deleted_ordinals; - auto &deleted_row_ids = filter.delete_data->deleted_rows; - idx_t current_idx = 0; - idx_t ordinal_position = 0; - for (auto &chunk : data->data->Chunks()) { - auto &row_id_vector = chunk.data.back(); - auto row_id_data = FlatVector::GetData(row_id_vector); - for (idx_t r = 0; r < chunk.size(); r++) { - auto row_id = NumericCast(row_id_data[r]); - if (current_idx < deleted_row_ids.size() && deleted_row_ids[current_idx] == row_id) { - deleted_ordinals.push_back(ordinal_position); - current_idx++; + if (!filter.delete_data->delete_all) { + vector deleted_ordinals; + auto &deleted_row_ids = filter.delete_data->deleted_rows; + idx_t current_idx = 0; + idx_t ordinal_position = 0; + for (auto &chunk : data->data->Chunks()) { + auto &row_id_vector = chunk.data.back(); + auto row_id_data = FlatVector::GetData(row_id_vector); + for (idx_t r = 0; r < chunk.size(); r++) { + auto row_id = NumericCast(row_id_data[r]); + if (current_idx < deleted_row_ids.size() && deleted_row_ids[current_idx] == row_id) { + deleted_ordinals.push_back(ordinal_position); + current_idx++; + } + ordinal_position++; } - ordinal_position++; } + filter.delete_data->deleted_rows = std::move(deleted_ordinals); } - filter.delete_data->deleted_rows = std::move(deleted_ordinals); } data->data->InitializeScan(state); } else { diff --git a/src/storage/ducklake_metadata_manager.cpp b/src/storage/ducklake_metadata_manager.cpp index 836fec950eb..56239cf43ce 100644 --- a/src/storage/ducklake_metadata_manager.cpp +++ b/src/storage/ducklake_metadata_manager.cpp @@ -1795,7 +1795,7 @@ vector DuckLakeMetadataManager::GetFilesForCompacti string deletion_threshold_clause; if (type == CompactionType::REWRITE_DELETES) { // Filter current data files in SQL, then apply the delete threshold in C++ so we can include - // metadata-only inlined file deletions as rewrite candidates. + // metadata-only inlined file deletions as rewrite candidates deletion_threshold_clause = " AND data.end_snapshot is null"; } // Add file size filtering for MERGE_ADJACENT_TABLES compaction @@ -1900,7 +1900,8 @@ ORDER BY data.begin_snapshot, data.row_id_start, data.data_file_id, del.begin_sn delete_file.data = ReadDeleteFile(table, row, col_idx, IsEncrypted()); file_entry.delete_files.push_back(std::move(delete_file)); } - // Load inlined deletions for active files so rewrite compaction can treat them the same as delete files. + + // Load inlined deletions for active files so rewrite compaction can treat them the same as delete files auto inlined_deletions = ReadInlinedFileDeletions(table_id, snapshot); for (auto &file : files) { auto entry = inlined_deletions.find(file.file.id.index); @@ -2430,6 +2431,15 @@ string DuckLakeMetadataManager::WriteNewInlinedDeletes(const vectorHasError()) { result->GetErrorObject().Throw("Failed to delete inlined data in DuckLake from table " + @@ -4440,18 +4450,6 @@ DuckLakeMetadataManager::GenerateDeleteFlushedInlinedData(const vectorHasError()) { - result->GetErrorObject().Throw("Failed to mark inlined data as deleted in DuckLake from table " + - inlined_table_name + ": "); - } -} string DuckLakeMetadataManager::InsertNewSchema(const DuckLakeSnapshot &snapshot, const set &table_ids) { if (table_ids.empty()) { return {}; diff --git a/src/storage/ducklake_transaction.cpp b/src/storage/ducklake_transaction.cpp index 1d2678709f2..c88c7a31783 100644 --- a/src/storage/ducklake_transaction.cpp +++ b/src/storage/ducklake_transaction.cpp @@ -227,12 +227,24 @@ void LocalTableChanges::AppendInlinedData(ClientContext &context, TableIndex tab } } -void LocalTableChanges::AddNewInlinedDeletes(TableIndex table_id, const string &table_name, set new_deletes) { +void LocalTableChanges::AddNewInlinedDeletes(TableIndex table_id, const string &table_name, set new_deletes, + bool delete_all) { + if (new_deletes.empty() && !delete_all) { + return; + } lock_guard guard(lock); auto &table_changes = changes[table_id]; auto &table_deletes = table_changes.new_inlined_data_deletes; auto entry = table_deletes.find(table_name); if (entry != table_deletes.end()) { + if (delete_all) { + entry->second->delete_all = true; + entry->second->rows.clear(); + return; + } + if (entry->second->delete_all) { + return; + } // merge deletes auto &existing_rows = entry->second->rows; for (auto &row_idx : new_deletes) { @@ -240,6 +252,7 @@ void LocalTableChanges::AddNewInlinedDeletes(TableIndex table_id, const string & } } else { auto new_data = make_uniq(); + new_data->delete_all = delete_all; new_data->rows = std::move(new_deletes); table_deletes.emplace(table_name, std::move(new_data)); } @@ -2190,8 +2203,11 @@ DuckLakeTransaction::GetNewInlinedDeletes(DuckLakeCommitState &commit_state) con DuckLakeDeletedInlinedDataInfo info; info.table_id = table_id; info.table_name = delete_entry.first; - for (auto &row_id : delete_entry.second->rows) { - info.deleted_row_ids.push_back(row_id); + info.delete_all = delete_entry.second->delete_all; + if (!info.delete_all) { + for (auto &row_id : delete_entry.second->rows) { + info.deleted_row_ids.push_back(row_id); + } } result.push_back(std::move(info)); } @@ -2576,11 +2592,6 @@ void DuckLakeTransaction::DeleteSnapshots(const vector &sn metadata_manager.DeleteSnapshots(snapshots); } -void DuckLakeTransaction::MarkInlinedDataDeleted(const string &inlined_table_name) { - auto &metadata_manager = GetMetadataManager(); - metadata_manager.MarkInlinedDataDeleted(GetSnapshot(), inlined_table_name); -} - void DuckLakeTransaction::DeleteInlinedData(const DuckLakeInlinedTableInfo &inlined_table) { auto &metadata_manager = GetMetadataManager(); metadata_manager.DeleteInlinedData(inlined_table); @@ -2721,11 +2732,9 @@ void DuckLakeTransaction::AppendInlinedData(TableIndex table_id, unique_ptr new_deletes) { - if (new_deletes.empty()) { - return; - } - local_changes.AddNewInlinedDeletes(table_id, table_name, std::move(new_deletes)); +void DuckLakeTransaction::AddNewInlinedDeletes(TableIndex table_id, const string &table_name, set new_deletes, + bool delete_all) { + local_changes.AddNewInlinedDeletes(table_id, table_name, std::move(new_deletes), delete_all); } void DuckLakeTransaction::DeleteFromLocalInlinedData(TableIndex table_id, set new_deletes) { diff --git a/src/storage/ducklake_truncate.cpp b/src/storage/ducklake_truncate.cpp index 4a742a426aa..17a192b7473 100644 --- a/src/storage/ducklake_truncate.cpp +++ b/src/storage/ducklake_truncate.cpp @@ -20,21 +20,6 @@ class DuckLakeTruncateGlobalState : public GlobalSourceState { bool finished = false; }; -static set GetVisibleInlinedRowIds(DuckLakeTransaction &transaction, DuckLakeSnapshot snapshot, - const string &inlined_table_name) { - auto &metadata_manager = transaction.GetMetadataManager(); - auto query_result = metadata_manager.ReadInlinedData(snapshot, inlined_table_name, {"row_id"}); - if (query_result->HasError()) { - query_result->GetErrorObject().Throw("Failed to read inlined row ids during DuckLake truncate: "); - } - - set row_ids; - for (auto &row : *query_result) { - row_ids.insert(row.GetValue(0)); - } - return row_ids; -} - DuckLakeTruncate::DuckLakeTruncate(PhysicalPlan &physical_plan, DuckLakeTableEntry &table) : PhysicalOperator(physical_plan, PhysicalOperatorType::EXTENSION, {LogicalType::UBIGINT}, 0), table(table) { } @@ -70,8 +55,7 @@ SourceResultType DuckLakeTruncate::GetDataInternal(ExecutionContext &context, Da break; } case DuckLakeDataType::INLINED_DATA: { - auto row_ids = GetVisibleInlinedRowIds(transaction, read_info.snapshot, file_info.file.path); - transaction.AddNewInlinedDeletes(table.GetTableId(), file_info.file.path, std::move(row_ids)); + transaction.AddNewInlinedDeletes(table.GetTableId(), file_info.file.path, {}, true); break; } case DuckLakeDataType::TRANSACTION_LOCAL_INLINED_DATA: {