diff --git a/src/functions/ducklake_compaction_functions.cpp b/src/functions/ducklake_compaction_functions.cpp index 7e16234d8e8..b8bcf836d70 100644 --- a/src/functions/ducklake_compaction_functions.cpp +++ b/src/functions/ducklake_compaction_functions.cpp @@ -238,9 +238,10 @@ void DuckLakeCompactor::GenerateCompactions(DuckLakeTableEntry &table, // (does not apply to REWRITE_DELETES - delete files must be rewritten regardless of data file size) continue; } - if ((!candidate.delete_files.empty() && type == CompactionType::MERGE_ADJACENT_TABLES) || - candidate.file.end_snapshot.IsValid() || candidate.has_inlined_deletions) { - // Merge Adjacent Tables doesn't perform the merge if delete files are present + if (((!candidate.delete_files.empty() || !candidate.inlined_file_deletions.empty()) && + type == CompactionType::MERGE_ADJACENT_TABLES) || + candidate.file.end_snapshot.IsValid()) { + // Merge Adjacent Tables doesn't perform the merge if any deletes are present continue; } // construct the compaction group for this file - i.e. the set of candidate files we can compact it with @@ -415,9 +416,11 @@ DuckLakeCompactor::GenerateCompactionCommand(vector for (auto &source : source_files) { DuckLakeFileListEntry result; result.file = source.file.data; + result.file_id = source.file.id; result.row_id_start = source.file.row_id_start; result.snapshot_id = source.file.begin_snapshot; result.mapping_id = source.file.mapping_id; + result.inlined_file_deletions = source.inlined_file_deletions; switch (type) { case CompactionType::REWRITE_DELETES: { if (!source.delete_files.empty()) { diff --git a/src/include/storage/ducklake_catalog.hpp b/src/include/storage/ducklake_catalog.hpp index 3255eadcffa..903af03137c 100644 --- a/src/include/storage/ducklake_catalog.hpp +++ b/src/include/storage/ducklake_catalog.hpp @@ -95,6 +95,7 @@ class DuckLakeCatalog : public Catalog { PhysicalOperator &plan) override; PhysicalOperator &PlanDelete(ClientContext &context, PhysicalPlanGenerator &planner, LogicalDelete &op, PhysicalOperator &plan) override; + PhysicalOperator &PlanDelete(ClientContext &context, PhysicalPlanGenerator &planner, LogicalDelete &op) override; PhysicalOperator &PlanUpdate(ClientContext &context, PhysicalPlanGenerator &planner, LogicalUpdate &op, PhysicalOperator &plan) override; PhysicalOperator &PlanMergeInto(ClientContext &context, PhysicalPlanGenerator &planner, LogicalMergeInto &op, diff --git a/src/include/storage/ducklake_metadata_info.hpp b/src/include/storage/ducklake_metadata_info.hpp index 23ededb5a79..13c54b09a7f 100644 --- a/src/include/storage/ducklake_metadata_info.hpp +++ b/src/include/storage/ducklake_metadata_info.hpp @@ -446,8 +446,8 @@ struct DuckLakeCompactionFileEntry { vector delete_files; optional_idx max_partial_file_snapshot; idx_t schema_version; - //! Whether this file has inlined deletions (stored in metadata database rather than delete files) - bool has_inlined_deletions = false; + //! Inlined file deletions stored in the metadata database rather than delete files + set inlined_file_deletions; }; struct DuckLakeRewriteFileEntry { @@ -468,6 +468,7 @@ struct DuckLakeCompactedFileInfo { string path; DataFileIndex source_id; DataFileIndex new_id; + optional_idx rewrite_snapshot; //! Info on delete files, in case the compaction is a delete-rewrite string delete_file_path; DataFileIndex delete_file_id; diff --git a/src/include/storage/ducklake_metadata_manager.hpp b/src/include/storage/ducklake_metadata_manager.hpp index 9590aea18c9..a2be06be83e 100644 --- a/src/include/storage/ducklake_metadata_manager.hpp +++ b/src/include/storage/ducklake_metadata_manager.hpp @@ -209,6 +209,7 @@ class DuckLakeMetadataManager { SnapshotDeletedFromFiles GetFilesDeletedOrDroppedAfterSnapshot(const DuckLakeSnapshot &start_snapshot) const; virtual unique_ptr GetSnapshot(); virtual unique_ptr GetSnapshot(BoundAtClause &at_clause, SnapshotBound bound); + virtual unique_ptr GetSnapshotById(idx_t snapshot_id); virtual idx_t GetNextColumnId(TableIndex table_id); virtual unique_ptr ReadInlinedData(DuckLakeSnapshot snapshot, const string &inlined_table_name, const vector &columns_to_read); @@ -226,6 +227,7 @@ class DuckLakeMetadataManager { virtual shared_ptr TransformInlinedData(QueryResult &result, const vector &expected_types); + virtual void MarkInlinedDataDeleted(DuckLakeSnapshot snapshot, const string &inlined_table_name); virtual void DeleteInlinedData(const DuckLakeInlinedTableInfo &inlined_table); //! We delete at the flush virtual void DeleteFlushedInlinedData(const DuckLakeInlinedTableInfo &inlined_table, idx_t flush_snapshot_id); diff --git a/src/include/storage/ducklake_transaction.hpp b/src/include/storage/ducklake_transaction.hpp index 41c47815057..65d4f9be51b 100644 --- a/src/include/storage/ducklake_transaction.hpp +++ b/src/include/storage/ducklake_transaction.hpp @@ -71,6 +71,7 @@ struct LocalTableChanges { void AppendInlinedData(ClientContext &context, TableIndex table_id, unique_ptr new_data); void AddNewInlinedDeletes(TableIndex table_id, const string &table_name, set new_deletes); void DeleteFromLocalInlinedData(ClientContext &context, TableIndex table_id, set new_deletes); + void TruncateLocalInlinedData(TableIndex table_id); void AddColumnToLocalInlinedData(ClientContext &context, TableIndex table_id, const LogicalType &new_column_type, FieldIndex new_field_index, const Value &default_value); void RemoveColumnFromLocalInlinedData(ClientContext &context, TableIndex table_id, @@ -206,6 +207,7 @@ class DuckLakeTransaction : public Transaction, public enable_shared_from_this collection); void AddNewInlinedDeletes(TableIndex table_id, const string &table_name, set new_deletes); void DeleteFromLocalInlinedData(TableIndex table_id, set new_deletes); + void TruncateLocalInlinedData(TableIndex table_id); void AddColumnToLocalInlinedData(TableIndex table_id, const LogicalType &new_column_type, FieldIndex new_field_index, const Value &default_value = Value()); void RemoveColumnFromLocalInlinedData(TableIndex table_id, LogicalIndex removed_column_index, @@ -226,6 +228,7 @@ class DuckLakeTransaction : public Transaction, public enable_shared_from_this &snapshots); + void MarkInlinedDataDeleted(const string &inlined_table_name); void DeleteInlinedData(const DuckLakeInlinedTableInfo &inlined_table); //! Delete inlined data rows with begin_snapshot <= flush_snapshot_id void DeleteFlushedInlinedData(const DuckLakeInlinedTableInfo &inlined_table, idx_t flush_snapshot_id); diff --git a/src/include/storage/ducklake_truncate.hpp b/src/include/storage/ducklake_truncate.hpp new file mode 100644 index 00000000000..010b2f7d58d --- /dev/null +++ b/src/include/storage/ducklake_truncate.hpp @@ -0,0 +1,35 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// storage/ducklake_truncate.hpp +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/execution/physical_operator.hpp" +#include "storage/ducklake_table_entry.hpp" + +namespace duckdb { + +class DuckLakeTruncate : public PhysicalOperator { +public: + DuckLakeTruncate(PhysicalPlan &physical_plan, DuckLakeTableEntry &table); + + DuckLakeTableEntry &table; + +public: + SourceResultType GetDataInternal(ExecutionContext &context, DataChunk &chunk, + OperatorSourceInput &input) const override; + + bool IsSource() const override { + return true; + } + + unique_ptr GetGlobalSourceState(ClientContext &context) const override; + + string GetName() const override; + InsertionOrderPreservingMap ParamsToString() const override; +}; + +} // namespace duckdb diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt index 41e6ac9ec7c..a5456812cfa 100644 --- a/src/storage/CMakeLists.txt +++ b/src/storage/CMakeLists.txt @@ -20,6 +20,7 @@ add_library( ducklake_storage.cpp ducklake_delete.cpp ducklake_deletion_vector.cpp + ducklake_truncate.cpp ducklake_multi_file_reader.cpp ducklake_partition_data.cpp ducklake_secret.cpp diff --git a/src/storage/ducklake_delete.cpp b/src/storage/ducklake_delete.cpp index f9802927458..bd97b155870 100644 --- a/src/storage/ducklake_delete.cpp +++ b/src/storage/ducklake_delete.cpp @@ -681,7 +681,7 @@ PhysicalOperator &DuckLakeCatalog::PlanDelete(ClientContext &context, PhysicalPl row_id_indexes.push_back(bound_ref.index); } return DuckLakeDelete::PlanDelete(context, planner, op.table.Cast(), child_plan, - std::move(row_id_indexes), std::move(encryption_key)); + std::move(row_id_indexes), std::move(encryption_key), true); } } // namespace duckdb diff --git a/src/storage/ducklake_metadata_manager.cpp b/src/storage/ducklake_metadata_manager.cpp index 2b7c0e8e50c..88ca2931246 100644 --- a/src/storage/ducklake_metadata_manager.cpp +++ b/src/storage/ducklake_metadata_manager.cpp @@ -1794,9 +1794,9 @@ vector DuckLakeMetadataManager::GetFilesForCompacti string select_list = data_select_list + ", " + delete_select_list; string deletion_threshold_clause; if (type == CompactionType::REWRITE_DELETES) { - deletion_threshold_clause = StringUtil::Format( - " AND CAST(del.delete_count AS FLOAT)/CAST(data.record_count AS FLOAT) >= %f and data.end_snapshot is null", - deletion_threshold); + // Filter current data files in SQL, then apply the delete threshold in C++ so we can include + // metadata-only inlined file deletions as rewrite candidates. + deletion_threshold_clause = " AND data.end_snapshot is null"; } // Add file size filtering for MERGE_ADJACENT_TABLES compaction string file_size_filter_clause; @@ -1901,17 +1901,31 @@ ORDER BY data.begin_snapshot, data.row_id_start, data.data_file_id, del.begin_sn file_entry.delete_files.push_back(std::move(delete_file)); } - // Check for inlined deletions and mark affected files - // Gather file IDs first, then query only for existence - vector file_ids; - file_ids.reserve(files.size()); + // Load inlined deletions for active files so rewrite compaction can treat them the same as delete files. + auto inlined_deletions = ReadInlinedFileDeletions(table_id, snapshot); for (auto &file : files) { - file_ids.push_back(file.file.id.index); + auto entry = inlined_deletions.find(file.file.id.index); + if (entry != inlined_deletions.end()) { + file.inlined_file_deletions = std::move(entry->second); + } } - auto files_with_deletions = GetFileIdsWithInlinedDeletions(table_id, snapshot, file_ids); - for (auto &file : files) { - if (files_with_deletions.count(file.file.id.index)) { - file.has_inlined_deletions = true; + + if (type == CompactionType::REWRITE_DELETES) { + for (idx_t file_idx = 0; file_idx < files.size(); file_idx++) { + auto &file = files[file_idx]; + idx_t active_delete_count = 0; + if (!file.delete_files.empty() && !file.delete_files.back().end_snapshot.IsValid()) { + active_delete_count = file.delete_files.back().row_count; + } + auto total_delete_count = active_delete_count + file.inlined_file_deletions.size(); + double delete_ratio = 0; + if (file.file.row_count > 0) { + delete_ratio = static_cast(total_delete_count) / static_cast(file.file.row_count); + } + if (total_delete_count == 0 || delete_ratio < deletion_threshold) { + files.erase_at(file_idx); + file_idx--; + } } } @@ -3556,6 +3570,25 @@ WHERE snapshot_id = ( return snapshot; } +unique_ptr DuckLakeMetadataManager::GetSnapshotById(idx_t snapshot_id) { + auto query = StringUtil::Format(R"( +SELECT snapshot_id, schema_version, next_catalog_id, next_file_id +FROM {METADATA_CATALOG}.ducklake_snapshot +WHERE snapshot_id = %llu;)", + snapshot_id); + DuckLakeSnapshot dummy_snapshot(0, 0, 0, 0); + auto result = Query(dummy_snapshot, query); + if (result->HasError()) { + result->GetErrorObject().Throw( + StringUtil::Format("Failed to query snapshot %llu for DuckLake: ", snapshot_id)); + } + auto snapshot = TryGetSnapshotInternal(*result); + if (!snapshot) { + throw InvalidInputException("Snapshot %llu not found in DuckLake", snapshot_id); + } + return snapshot; +} + static unordered_map GetNewPartitions(const vector &old_partitions, const vector &new_partitions) { @@ -4112,7 +4145,7 @@ string DuckLakeMetadataManager::WriteDeleteRewrites(const vector 0; i--) { auto &compaction = compactions[i - 1]; if (table_idx_last_snapshot.find(compaction.table_index.index) == table_idx_last_snapshot.end()) { - table_idx_last_snapshot[compaction.table_index.index] = compaction.delete_file_start_snapshot.GetIndex(); + table_idx_last_snapshot[compaction.table_index.index] = compaction.rewrite_snapshot.GetIndex(); } } @@ -4120,12 +4153,12 @@ string DuckLakeMetadataManager::WriteDeleteRewrites(const vectorHasError()) { result->GetErrorObject().Throw("Failed to delete inlined data in DuckLake from table " + @@ -4408,6 +4441,18 @@ DuckLakeMetadataManager::GenerateDeleteFlushedInlinedData(const vectorHasError()) { + result->GetErrorObject().Throw("Failed to mark inlined data as deleted in DuckLake from table " + + inlined_table_name + ": "); + } +} string DuckLakeMetadataManager::InsertNewSchema(const DuckLakeSnapshot &snapshot, const set &table_ids) { if (table_ids.empty()) { return {}; diff --git a/src/storage/ducklake_multi_file_list.cpp b/src/storage/ducklake_multi_file_list.cpp index 3166bab68c1..256b036cc9e 100644 --- a/src/storage/ducklake_multi_file_list.cpp +++ b/src/storage/ducklake_multi_file_list.cpp @@ -307,38 +307,6 @@ vector DuckLakeMultiFileList::GetFilesExtended() file_entry.data_type = DuckLakeDataType::TRANSACTION_LOCAL_INLINED_DATA; result.push_back(std::move(file_entry)); } - if (!read_file_list) { - // we have not read the file list yet - construct it from the extended file list - // Read committed inlined file deletions from metadata - map> committed_inlined_deletions; - if (!read_info.table_id.IsTransactionLocal()) { - auto &metadata_manager = transaction.GetMetadataManager(); - committed_inlined_deletions = - metadata_manager.ReadInlinedFileDeletions(read_info.table_id, read_info.snapshot); - } - for (auto &file : result) { - DuckLakeFileListEntry file_entry; - file_entry.file = file.file; - file_entry.row_id_start = file.row_id_start; - file_entry.delete_file = file.delete_file; - file_entry.file_id = file.file_id; - file_entry.data_type = file.data_type; - // Apply committed inlined file deletions from metadata - if (file.file_id.IsValid()) { - auto it = committed_inlined_deletions.find(file.file_id.index); - if (it != committed_inlined_deletions.end()) { - file_entry.inlined_file_deletions = std::move(it->second); - } - } - // Apply local inlined file deletes if any (merges into committed deletions) - if (file.file_id.IsValid() && transaction.HasLocalInlinedFileDeletes(read_info.table_id)) { - transaction.GetLocalInlinedFileDeletesForFile(read_info.table_id, file.file_id.index, - file_entry.inlined_file_deletions); - } - files.emplace_back(std::move(file_entry)); - } - read_file_list = true; - } return result; } diff --git a/src/storage/ducklake_scan.cpp b/src/storage/ducklake_scan.cpp index f329e15ebc3..982f991e157 100644 --- a/src/storage/ducklake_scan.cpp +++ b/src/storage/ducklake_scan.cpp @@ -213,7 +213,8 @@ TableFunction DuckLakeFunctions::GetDuckLakeScanFunction(DatabaseInstance &insta DuckLakeFunctionInfo::DuckLakeFunctionInfo(DuckLakeTableEntry &table, DuckLakeTransaction &transaction_p, DuckLakeSnapshot snapshot) - : table(table), transaction(transaction_p.shared_from_this()), snapshot(snapshot) { + : table(table), transaction(transaction_p.shared_from_this()), table_name(table.name), snapshot(snapshot), + table_id(table.GetTableId()) { } shared_ptr diff --git a/src/storage/ducklake_transaction.cpp b/src/storage/ducklake_transaction.cpp index 02c0b181abb..1d2678709f2 100644 --- a/src/storage/ducklake_transaction.cpp +++ b/src/storage/ducklake_transaction.cpp @@ -290,6 +290,22 @@ void LocalTableChanges::DeleteFromLocalInlinedData(ClientContext &context, Table inlined_data.row_ids = std::move(new_row_ids); } +void LocalTableChanges::TruncateLocalInlinedData(TableIndex table_id) { + lock_guard guard(lock); + auto entry = changes.find(table_id); + if (entry == changes.end()) { + throw InternalException("TruncateLocalInlinedData called but no transaction-local data exists for table"); + } + auto &table_changes = entry->second; + if (!table_changes.new_inlined_data) { + throw InternalException("TruncateLocalInlinedData called but no inlined data exists"); + } + table_changes.new_inlined_data.reset(); + if (table_changes.IsEmpty()) { + changes.erase(entry); + } +} + static void RemoveFieldStats(map &column_stats, const DuckLakeFieldId &field_id) { column_stats.erase(field_id.GetFieldIndex()); for (auto &child_id : field_id.Children()) { @@ -2391,9 +2407,12 @@ CompactionInformation DuckLakeTransaction::GetCompactionChanges(DuckLakeCommitSt if (!compacted_file.delete_files.empty()) { row_id_limit -= compacted_file.delete_files.back().row_count; } + row_id_limit -= compacted_file.inlined_file_deletions.size(); DuckLakeCompactedFileInfo file_info; file_info.path = compacted_file.file.data.path; file_info.source_id = compacted_file.file.id; + file_info.table_index = entry.GetTableIndex(); + file_info.rewrite_snapshot = commit_snapshot.snapshot_id; if (has_new_file) { file_info.new_id = new_file.id; } @@ -2402,7 +2421,6 @@ CompactionInformation DuckLakeTransaction::GetCompactionChanges(DuckLakeCommitSt file_info.delete_file_path = compacted_file.delete_files.back().data.path; file_info.delete_file_id = compacted_file.delete_files.back().delete_file_id; file_info.start_snapshot = compacted_file.file.begin_snapshot; - file_info.table_index = entry.GetTableIndex(); file_info.delete_file_start_snapshot = commit_snapshot.snapshot_id; file_info.delete_file_end_snapshot = compacted_file.delete_files.back().end_snapshot; } @@ -2558,6 +2576,11 @@ void DuckLakeTransaction::DeleteSnapshots(const vector &sn metadata_manager.DeleteSnapshots(snapshots); } +void DuckLakeTransaction::MarkInlinedDataDeleted(const string &inlined_table_name) { + auto &metadata_manager = GetMetadataManager(); + metadata_manager.MarkInlinedDataDeleted(GetSnapshot(), inlined_table_name); +} + void DuckLakeTransaction::DeleteInlinedData(const DuckLakeInlinedTableInfo &inlined_table) { auto &metadata_manager = GetMetadataManager(); metadata_manager.DeleteInlinedData(inlined_table); @@ -2710,6 +2733,10 @@ void DuckLakeTransaction::DeleteFromLocalInlinedData(TableIndex table_id, set GetVisibleInlinedRowIds(DuckLakeTransaction &transaction, DuckLakeSnapshot snapshot, + const string &inlined_table_name) { + auto &metadata_manager = transaction.GetMetadataManager(); + auto query_result = metadata_manager.ReadInlinedData(snapshot, inlined_table_name, {"row_id"}); + if (query_result->HasError()) { + query_result->GetErrorObject().Throw("Failed to read inlined row ids during DuckLake truncate: "); + } + + set row_ids; + for (auto &row : *query_result) { + row_ids.insert(row.GetValue(0)); + } + return row_ids; +} + +DuckLakeTruncate::DuckLakeTruncate(PhysicalPlan &physical_plan, DuckLakeTableEntry &table) + : PhysicalOperator(physical_plan, PhysicalOperatorType::EXTENSION, {LogicalType::UBIGINT}, 0), table(table) { +} + +unique_ptr DuckLakeTruncate::GetGlobalSourceState(ClientContext &context) const { + return make_uniq(); +} + +SourceResultType DuckLakeTruncate::GetDataInternal(ExecutionContext &context, DataChunk &chunk, + OperatorSourceInput &input) const { + auto &gstate = input.global_state.Cast(); + if (gstate.finished) { + return SourceResultType::FINISHED; + } + gstate.finished = true; + + auto &transaction = DuckLakeTransaction::Get(context.client, table.catalog); + DuckLakeFunctionInfo read_info(table, transaction, transaction.GetSnapshot()); + auto transaction_local_files = transaction.GetTransactionLocalFiles(table.GetTableId()); + auto transaction_local_data = transaction.GetTransactionLocalInlinedData(table.GetTableId()); + DuckLakeMultiFileList file_list(read_info, std::move(transaction_local_files), transaction_local_data); + + uint64_t total_deleted_count = table.GetNetDataFileRowCount(transaction) + table.GetNetInlinedRowCount(transaction); + auto files = file_list.GetFilesExtended(); + for (auto &file_info : files) { + switch (file_info.data_type) { + case DuckLakeDataType::DATA_FILE: { + if (file_info.file_id.IsValid()) { + transaction.DropFile(table.GetTableId(), file_info.file_id, file_info.file.path); + } else { + transaction.DropTransactionLocalFile(table.GetTableId(), file_info.file.path); + } + break; + } + case DuckLakeDataType::INLINED_DATA: { + auto row_ids = GetVisibleInlinedRowIds(transaction, read_info.snapshot, file_info.file.path); + transaction.AddNewInlinedDeletes(table.GetTableId(), file_info.file.path, std::move(row_ids)); + break; + } + case DuckLakeDataType::TRANSACTION_LOCAL_INLINED_DATA: { + transaction.TruncateLocalInlinedData(table.GetTableId()); + break; + } + default: + throw InternalException("Unsupported DuckLakeDataType in truncate"); + } + } + + chunk.SetCardinality(1); + chunk.SetValue(0, 0, Value::UBIGINT(total_deleted_count)); + return SourceResultType::FINISHED; +} + +string DuckLakeTruncate::GetName() const { + return "DUCKLAKE_TRUNCATE"; +} + +InsertionOrderPreservingMap DuckLakeTruncate::ParamsToString() const { + InsertionOrderPreservingMap result; + result["Table Name"] = table.name; + return result; +} + +PhysicalOperator &DuckLakeCatalog::PlanDelete(ClientContext &context, PhysicalPlanGenerator &planner, LogicalDelete &op) { + bool delete_all = false; + if (op.children.size() == 1 && op.children[0]->type == LogicalOperatorType::LOGICAL_GET) { + auto &get = op.children[0]->Cast(); + delete_all = get.table_filters.filters.empty(); + } + if (!delete_all) { + return Catalog::PlanDelete(context, planner, op); + } + auto &table = op.table.Cast(); + auto &transaction = DuckLakeTransaction::Get(context, *this); + if (transaction.HasAnyLocalChanges(table.GetTableId())) { + return Catalog::PlanDelete(context, planner, op); + } + if (op.return_chunk) { + throw BinderException("RETURNING clause not yet supported for deletion of a DuckLake table"); + } + return planner.Make(table); +} + +} // namespace duckdb diff --git a/test/sql/delete/ducklake_delete_all_simple.test b/test/sql/delete/ducklake_delete_all_simple.test new file mode 100644 index 00000000000..ae834ebfda0 --- /dev/null +++ b/test/sql/delete/ducklake_delete_all_simple.test @@ -0,0 +1,37 @@ +# name: test/sql/delete/ducklake_delete_all_simple.test +# description: simple delete-all on ducklake table + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_delete_all_simple') + +statement ok +CREATE TABLE ducklake.delete_all_test(i INTEGER); + +statement ok +INSERT INTO ducklake.delete_all_test FROM range(10); + +query II +EXPLAIN DELETE FROM ducklake.delete_all_test; +---- +physical_plan :.*DUCKLAKE_TRUNCATE.* + +query I +SELECT COUNT(*) FROM ducklake.delete_all_test; +---- +10 + +statement ok +DELETE FROM ducklake.delete_all_test; + +query I +SELECT COUNT(*) FROM ducklake.delete_all_test; +---- +0 diff --git a/test/sql/delete/ducklake_truncate_simple.test b/test/sql/delete/ducklake_truncate_simple.test new file mode 100644 index 00000000000..cd9f808a37d --- /dev/null +++ b/test/sql/delete/ducklake_truncate_simple.test @@ -0,0 +1,37 @@ +# name: test/sql/delete/ducklake_truncate_simple.test +# description: simple TRUNCATE on ducklake table + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_truncate_simple') + +statement ok +CREATE TABLE ducklake.truncate_test(i INTEGER); + +statement ok +INSERT INTO ducklake.truncate_test FROM range(10); + +query II +EXPLAIN TRUNCATE ducklake.truncate_test; +---- +physical_plan :.*DUCKLAKE_TRUNCATE.* + +query I +SELECT COUNT(*) FROM ducklake.truncate_test; +---- +10 + +statement ok +TRUNCATE ducklake.truncate_test; + +query I +SELECT COUNT(*) FROM ducklake.truncate_test; +---- +0 diff --git a/test/sql/delete/truncate_table_inlined.test b/test/sql/delete/truncate_table_inlined.test new file mode 100644 index 00000000000..12b4fed1654 --- /dev/null +++ b/test/sql/delete/truncate_table_inlined.test @@ -0,0 +1,50 @@ +# name: test/sql/delete/truncate_table_inlined.test +# description: Verify TRUNCATE works with inlined data +# group: [delete] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_truncate_inlined_files') + +statement ok +CALL ducklake.set_option('data_inlining_row_limit', 1000); + +statement ok +CREATE TABLE ducklake.test_inline AS SELECT i id FROM range(8) t(i); + +query I +TRUNCATE ducklake.test_inline +---- +8 + +query I +SELECT COUNT(*) FROM ducklake.test_inline +---- +0 + +statement ok +INSERT INTO ducklake.test_inline VALUES (1), (2), (3); + +statement ok +BEGIN + +query I +TRUNCATE ducklake.test_inline +---- +3 + +statement ok +ROLLBACK + +query I +SELECT COUNT(*) FROM ducklake.test_inline +---- +3 diff --git a/test/sql/delete/truncate_table_return_value.test b/test/sql/delete/truncate_table_return_value.test new file mode 100644 index 00000000000..4b4908b8505 --- /dev/null +++ b/test/sql/delete/truncate_table_return_value.test @@ -0,0 +1,36 @@ +# name: test/sql/delete/truncate_table_return_value.test +# description: Verify TRUNCATE return value including tables with existing deletes +# group: [delete] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_truncate_return_value_files') + +statement ok +CREATE TABLE ducklake.return_value_test AS SELECT i id FROM range(10000) t(i); + +statement ok +DELETE FROM ducklake.return_value_test WHERE id%8=0 + +query II +SELECT COUNT(*), SUM(id) FROM ducklake.return_value_test +---- +8750 43750000 + +query I +TRUNCATE ducklake.return_value_test +---- +8750 + +query II +SELECT COUNT(*), SUM(id) FROM ducklake.return_value_test +---- +0 NULL diff --git a/test/sql/delete/truncate_table_rollback.test b/test/sql/delete/truncate_table_rollback.test new file mode 100644 index 00000000000..574d7b06ca7 --- /dev/null +++ b/test/sql/delete/truncate_table_rollback.test @@ -0,0 +1,39 @@ +# name: test/sql/delete/truncate_table_rollback.test +# description: Verify TRUNCATE rollback behavior +# group: [delete] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_truncate_rollback_files') + +statement ok +CREATE TABLE ducklake.rollback_test AS SELECT i id FROM range(100) t(i); + +statement ok +BEGIN + +query I +TRUNCATE ducklake.rollback_test +---- +100 + +query II +SELECT COUNT(*), SUM(id) FROM ducklake.rollback_test +---- +0 NULL + +statement ok +ROLLBACK + +query II +SELECT COUNT(*), SUM(id) FROM ducklake.rollback_test +---- +100 4950 diff --git a/test/sql/delete/truncate_table_time_travel.test b/test/sql/delete/truncate_table_time_travel.test new file mode 100644 index 00000000000..765e3426393 --- /dev/null +++ b/test/sql/delete/truncate_table_time_travel.test @@ -0,0 +1,41 @@ +# name: test/sql/delete/truncate_table_time_travel.test +# description: Verify time travel works correctly across TRUNCATE +# group: [delete] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_truncate_time_travel_files', DATA_INLINING_ROW_LIMIT 0) + +statement ok +CREATE TABLE ducklake.time_travel_test(id INTEGER); + +statement ok +INSERT INTO ducklake.time_travel_test VALUES (1), (2), (3); + +statement ok +SET VARIABLE pre_truncate_snapshot = (SELECT max(snapshot_id) FROM ducklake.snapshots()); + +query I +TRUNCATE ducklake.time_travel_test +---- +3 + +query I +SELECT COUNT(*) FROM ducklake.time_travel_test +---- +0 + +query I +SELECT id FROM ducklake.time_travel_test AT (VERSION => getvariable('pre_truncate_snapshot')) ORDER BY ALL +---- +1 +2 +3 diff --git a/test/sql/delete/truncate_table_transactionality.test b/test/sql/delete/truncate_table_transactionality.test new file mode 100644 index 00000000000..ca8193b8e4b --- /dev/null +++ b/test/sql/delete/truncate_table_transactionality.test @@ -0,0 +1,44 @@ +# name: test/sql/delete/truncate_table_transactionality.test +# description: Verify uncommitted TRUNCATE is not visible to other connections +# group: [delete] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db + +test-env DATA_PATH __TEST_DIR__ + + +statement ok +ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_truncate_transactionality_files') + +statement ok +CREATE TABLE ducklake.txn_test AS SELECT i id FROM range(25) t(i); + +statement ok con1 +BEGIN + +query I con1 +TRUNCATE ducklake.txn_test +---- +25 + +query II con1 +SELECT COUNT(*), SUM(id) FROM ducklake.txn_test +---- +0 NULL + +query II con2 +SELECT COUNT(*), SUM(id) FROM ducklake.txn_test +---- +25 300 + +statement ok con1 +COMMIT + +query II con2 +SELECT COUNT(*), SUM(id) FROM ducklake.txn_test +---- +0 NULL