Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/include/storage/ducklake_catalog.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ class DuckLakeCatalog : public Catalog {
PhysicalOperator &plan) override;
PhysicalOperator &PlanDelete(ClientContext &context, PhysicalPlanGenerator &planner, LogicalDelete &op,
PhysicalOperator &plan) override;
PhysicalOperator &PlanDelete(ClientContext &context, PhysicalPlanGenerator &planner, LogicalDelete &op) override;
PhysicalOperator &PlanUpdate(ClientContext &context, PhysicalPlanGenerator &planner, LogicalUpdate &op,
PhysicalOperator &plan) override;
PhysicalOperator &PlanMergeInto(ClientContext &context, PhysicalPlanGenerator &planner, LogicalMergeInto &op,
Expand Down
1 change: 1 addition & 0 deletions src/include/storage/ducklake_metadata_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ class DuckLakeMetadataManager {
virtual shared_ptr<DuckLakeInlinedData> TransformInlinedData(QueryResult &result,
const vector<LogicalType> &expected_types);

virtual void MarkInlinedDataDeleted(DuckLakeSnapshot snapshot, const string &inlined_table_name);
virtual void DeleteInlinedData(const DuckLakeInlinedTableInfo &inlined_table);
//! We delete at the flush
virtual void DeleteFlushedInlinedData(const DuckLakeInlinedTableInfo &inlined_table, idx_t flush_snapshot_id);
Expand Down
3 changes: 3 additions & 0 deletions src/include/storage/ducklake_transaction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ struct LocalTableChanges {
void AppendInlinedData(ClientContext &context, TableIndex table_id, unique_ptr<DuckLakeInlinedData> new_data);
void AddNewInlinedDeletes(TableIndex table_id, const string &table_name, set<idx_t> new_deletes);
void DeleteFromLocalInlinedData(ClientContext &context, TableIndex table_id, set<idx_t> new_deletes);
void TruncateLocalInlinedData(TableIndex table_id);
void AddColumnToLocalInlinedData(ClientContext &context, TableIndex table_id, const LogicalType &new_column_type,
FieldIndex new_field_index, const Value &default_value);
void RemoveColumnFromLocalInlinedData(ClientContext &context, TableIndex table_id,
Expand Down Expand Up @@ -206,6 +207,7 @@ class DuckLakeTransaction : public Transaction, public enable_shared_from_this<D
void AppendInlinedData(TableIndex table_id, unique_ptr<DuckLakeInlinedData> collection);
void AddNewInlinedDeletes(TableIndex table_id, const string &table_name, set<idx_t> new_deletes);
void DeleteFromLocalInlinedData(TableIndex table_id, set<idx_t> new_deletes);
void TruncateLocalInlinedData(TableIndex table_id);
void AddColumnToLocalInlinedData(TableIndex table_id, const LogicalType &new_column_type,
FieldIndex new_field_index, const Value &default_value = Value());
void RemoveColumnFromLocalInlinedData(TableIndex table_id, LogicalIndex removed_column_index,
Expand All @@ -226,6 +228,7 @@ class DuckLakeTransaction : public Transaction, public enable_shared_from_this<D
void DropFile(TableIndex table_id, DataFileIndex data_file_id, string path);

void DeleteSnapshots(const vector<DuckLakeSnapshotInfo> &snapshots);
void MarkInlinedDataDeleted(const string &inlined_table_name);
void DeleteInlinedData(const DuckLakeInlinedTableInfo &inlined_table);
//! Delete inlined data rows with begin_snapshot <= flush_snapshot_id
void DeleteFlushedInlinedData(const DuckLakeInlinedTableInfo &inlined_table, idx_t flush_snapshot_id);
Expand Down
35 changes: 35 additions & 0 deletions src/include/storage/ducklake_truncate.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
//===----------------------------------------------------------------------===//
// DuckDB
//
// storage/ducklake_truncate.hpp
//
//===----------------------------------------------------------------------===//

#pragma once

#include "duckdb/execution/physical_operator.hpp"
#include "storage/ducklake_table_entry.hpp"

namespace duckdb {

class DuckLakeTruncate : public PhysicalOperator {
public:
DuckLakeTruncate(PhysicalPlan &physical_plan, DuckLakeTableEntry &table);

DuckLakeTableEntry &table;

public:
SourceResultType GetDataInternal(ExecutionContext &context, DataChunk &chunk,
OperatorSourceInput &input) const override;

bool IsSource() const override {
return true;
}

unique_ptr<GlobalSourceState> GetGlobalSourceState(ClientContext &context) const override;

string GetName() const override;
InsertionOrderPreservingMap<string> ParamsToString() const override;
};

} // namespace duckdb
1 change: 1 addition & 0 deletions src/storage/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ add_library(
ducklake_storage.cpp
ducklake_delete.cpp
ducklake_deletion_vector.cpp
ducklake_truncate.cpp
ducklake_multi_file_reader.cpp
ducklake_partition_data.cpp
ducklake_secret.cpp
Expand Down
2 changes: 1 addition & 1 deletion src/storage/ducklake_delete.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,7 @@ PhysicalOperator &DuckLakeCatalog::PlanDelete(ClientContext &context, PhysicalPl
row_id_indexes.push_back(bound_ref.index);
}
return DuckLakeDelete::PlanDelete(context, planner, op.table.Cast<DuckLakeTableEntry>(), child_plan,
std::move(row_id_indexes), std::move(encryption_key));
std::move(row_id_indexes), std::move(encryption_key), true);
}

} // namespace duckdb
21 changes: 16 additions & 5 deletions src/storage/ducklake_metadata_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1795,7 +1795,7 @@ vector<DuckLakeCompactionFileEntry> DuckLakeMetadataManager::GetFilesForCompacti
string deletion_threshold_clause;
if (type == CompactionType::REWRITE_DELETES) {
// Filter current data files in SQL, then apply the delete threshold in C++ so we can include
// metadata-only inlined file deletions as rewrite candidates
// metadata-only inlined file deletions as rewrite candidates.
deletion_threshold_clause = " AND data.end_snapshot is null";
}
// Add file size filtering for MERGE_ADJACENT_TABLES compaction
Expand Down Expand Up @@ -1900,8 +1900,7 @@ ORDER BY data.begin_snapshot, data.row_id_start, data.data_file_id, del.begin_sn
delete_file.data = ReadDeleteFile(table, row, col_idx, IsEncrypted());
file_entry.delete_files.push_back(std::move(delete_file));
}

// Load inlined deletions for active files so rewrite compaction can treat them the same as delete files
// Load inlined deletions for active files so rewrite compaction can treat them the same as delete files.
auto inlined_deletions = ReadInlinedFileDeletions(table_id, snapshot);
for (auto &file : files) {
auto entry = inlined_deletions.find(file.file.id.index);
Expand Down Expand Up @@ -4410,8 +4409,8 @@ WHERE end_snapshot IS NOT NULL AND NOT EXISTS(

void DuckLakeMetadataManager::DeleteInlinedData(const DuckLakeInlinedTableInfo &inlined_table) {
auto result = transaction.Query(StringUtil::Format(R"(
DELETE FROM {METADATA_CATALOG}.%s
)",
DELETE FROM {METADATA_CATALOG}.%s
)",
SQLIdentifier(inlined_table.table_name)));
if (result->HasError()) {
result->GetErrorObject().Throw("Failed to delete inlined data in DuckLake from table " +
Expand Down Expand Up @@ -4441,6 +4440,18 @@ DuckLakeMetadataManager::GenerateDeleteFlushedInlinedData(const vector<FlushedIn
return result;
}

void DuckLakeMetadataManager::MarkInlinedDataDeleted(DuckLakeSnapshot snapshot, const string &inlined_table_name) {
auto result = transaction.Query(snapshot, StringUtil::Format(R"(
UPDATE {METADATA_CATALOG}.%s
SET end_snapshot = {SNAPSHOT_ID}
WHERE end_snapshot IS NULL AND begin_snapshot <= {SNAPSHOT_ID}
)",
SQLIdentifier(inlined_table_name)));
if (result->HasError()) {
result->GetErrorObject().Throw("Failed to mark inlined data as deleted in DuckLake from table " +
inlined_table_name + ": ");
}
}
string DuckLakeMetadataManager::InsertNewSchema(const DuckLakeSnapshot &snapshot, const set<TableIndex> &table_ids) {
if (table_ids.empty()) {
return {};
Expand Down
3 changes: 2 additions & 1 deletion src/storage/ducklake_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,8 @@ TableFunction DuckLakeFunctions::GetDuckLakeScanFunction(DatabaseInstance &insta

DuckLakeFunctionInfo::DuckLakeFunctionInfo(DuckLakeTableEntry &table, DuckLakeTransaction &transaction_p,
DuckLakeSnapshot snapshot)
: table(table), transaction(transaction_p.shared_from_this()), snapshot(snapshot) {
: table(table), transaction(transaction_p.shared_from_this()), table_name(table.name), snapshot(snapshot),
table_id(table.GetTableId()) {
}

shared_ptr<DuckLakeFunctionInfo>
Expand Down
25 changes: 25 additions & 0 deletions src/storage/ducklake_transaction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,22 @@ void LocalTableChanges::DeleteFromLocalInlinedData(ClientContext &context, Table
inlined_data.row_ids = std::move(new_row_ids);
}

void LocalTableChanges::TruncateLocalInlinedData(TableIndex table_id) {
lock_guard<mutex> guard(lock);
auto entry = changes.find(table_id);
if (entry == changes.end()) {
throw InternalException("TruncateLocalInlinedData called but no transaction-local data exists for table");
}
auto &table_changes = entry->second;
if (!table_changes.new_inlined_data) {
throw InternalException("TruncateLocalInlinedData called but no inlined data exists");
}
table_changes.new_inlined_data.reset();
if (table_changes.IsEmpty()) {
changes.erase(entry);
}
}

static void RemoveFieldStats(map<FieldIndex, DuckLakeColumnStats> &column_stats, const DuckLakeFieldId &field_id) {
column_stats.erase(field_id.GetFieldIndex());
for (auto &child_id : field_id.Children()) {
Expand Down Expand Up @@ -2560,6 +2576,11 @@ void DuckLakeTransaction::DeleteSnapshots(const vector<DuckLakeSnapshotInfo> &sn
metadata_manager.DeleteSnapshots(snapshots);
}

void DuckLakeTransaction::MarkInlinedDataDeleted(const string &inlined_table_name) {
auto &metadata_manager = GetMetadataManager();
metadata_manager.MarkInlinedDataDeleted(GetSnapshot(), inlined_table_name);
}

void DuckLakeTransaction::DeleteInlinedData(const DuckLakeInlinedTableInfo &inlined_table) {
auto &metadata_manager = GetMetadataManager();
metadata_manager.DeleteInlinedData(inlined_table);
Expand Down Expand Up @@ -2712,6 +2733,10 @@ void DuckLakeTransaction::DeleteFromLocalInlinedData(TableIndex table_id, set<id
local_changes.DeleteFromLocalInlinedData(*context_ref, table_id, std::move(new_deletes));
}

void DuckLakeTransaction::TruncateLocalInlinedData(TableIndex table_id) {
local_changes.TruncateLocalInlinedData(table_id);
}

void DuckLakeTransaction::AddColumnToLocalInlinedData(TableIndex table_id, const LogicalType &new_column_type,
FieldIndex new_field_index, const Value &default_value) {
auto context_ref = context.lock();
Expand Down
121 changes: 121 additions & 0 deletions src/storage/ducklake_truncate.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
//===----------------------------------------------------------------------===//
// DuckDB
//
// storage/ducklake_truncate.cpp
//
//===----------------------------------------------------------------------===//

#include "storage/ducklake_truncate.hpp"
#include "storage/ducklake_catalog.hpp"
#include "storage/ducklake_multi_file_list.hpp"
#include "storage/ducklake_scan.hpp"
#include "storage/ducklake_transaction.hpp"
#include "duckdb/planner/operator/logical_delete.hpp"
#include "duckdb/planner/operator/logical_get.hpp"

namespace duckdb {

class DuckLakeTruncateGlobalState : public GlobalSourceState {
public:
bool finished = false;
};

static set<idx_t> GetVisibleInlinedRowIds(DuckLakeTransaction &transaction, DuckLakeSnapshot snapshot,
const string &inlined_table_name) {
auto &metadata_manager = transaction.GetMetadataManager();
auto query_result = metadata_manager.ReadInlinedData(snapshot, inlined_table_name, {"row_id"});
if (query_result->HasError()) {
query_result->GetErrorObject().Throw("Failed to read inlined row ids during DuckLake truncate: ");
}

set<idx_t> row_ids;
for (auto &row : *query_result) {
row_ids.insert(row.GetValue<idx_t>(0));
}
return row_ids;
}

DuckLakeTruncate::DuckLakeTruncate(PhysicalPlan &physical_plan, DuckLakeTableEntry &table)
: PhysicalOperator(physical_plan, PhysicalOperatorType::EXTENSION, {LogicalType::UBIGINT}, 0), table(table) {
}

unique_ptr<GlobalSourceState> DuckLakeTruncate::GetGlobalSourceState(ClientContext &context) const {
return make_uniq<DuckLakeTruncateGlobalState>();
}

SourceResultType DuckLakeTruncate::GetDataInternal(ExecutionContext &context, DataChunk &chunk,
OperatorSourceInput &input) const {
auto &gstate = input.global_state.Cast<DuckLakeTruncateGlobalState>();
if (gstate.finished) {
return SourceResultType::FINISHED;
}
gstate.finished = true;

auto &transaction = DuckLakeTransaction::Get(context.client, table.catalog);
DuckLakeFunctionInfo read_info(table, transaction, transaction.GetSnapshot());
auto transaction_local_files = transaction.GetTransactionLocalFiles(table.GetTableId());
auto transaction_local_data = transaction.GetTransactionLocalInlinedData(table.GetTableId());
DuckLakeMultiFileList file_list(read_info, std::move(transaction_local_files), transaction_local_data);

uint64_t total_deleted_count = table.GetNetDataFileRowCount(transaction) + table.GetNetInlinedRowCount(transaction);
auto files = file_list.GetFilesExtended();
for (auto &file_info : files) {
switch (file_info.data_type) {
case DuckLakeDataType::DATA_FILE: {
if (file_info.file_id.IsValid()) {
transaction.DropFile(table.GetTableId(), file_info.file_id, file_info.file.path);
} else {
transaction.DropTransactionLocalFile(table.GetTableId(), file_info.file.path);
}
break;
}
case DuckLakeDataType::INLINED_DATA: {
auto row_ids = GetVisibleInlinedRowIds(transaction, read_info.snapshot, file_info.file.path);
transaction.AddNewInlinedDeletes(table.GetTableId(), file_info.file.path, std::move(row_ids));
Copy link
Copy Markdown
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The whole purpose of this PR is so that TRUNCATE doesn't have to iterate through rows.
Why can't this just set the entire inline "file" as deleted?

break;
}
case DuckLakeDataType::TRANSACTION_LOCAL_INLINED_DATA: {
transaction.TruncateLocalInlinedData(table.GetTableId());
break;
}
default:
throw InternalException("Unsupported DuckLakeDataType in truncate");
}
}

chunk.SetCardinality(1);
chunk.SetValue(0, 0, Value::UBIGINT(total_deleted_count));
return SourceResultType::FINISHED;
}

string DuckLakeTruncate::GetName() const {
return "DUCKLAKE_TRUNCATE";
}

InsertionOrderPreservingMap<string> DuckLakeTruncate::ParamsToString() const {
InsertionOrderPreservingMap<string> result;
result["Table Name"] = table.name;
return result;
}

PhysicalOperator &DuckLakeCatalog::PlanDelete(ClientContext &context, PhysicalPlanGenerator &planner, LogicalDelete &op) {
bool delete_all = false;
if (op.children.size() == 1 && op.children[0]->type == LogicalOperatorType::LOGICAL_GET) {
auto &get = op.children[0]->Cast<LogicalGet>();
delete_all = get.table_filters.filters.empty();
}
if (!delete_all) {
return Catalog::PlanDelete(context, planner, op);
}
auto &table = op.table.Cast<DuckLakeTableEntry>();
auto &transaction = DuckLakeTransaction::Get(context, *this);
if (transaction.HasAnyLocalChanges(table.GetTableId())) {
return Catalog::PlanDelete(context, planner, op);
}
if (op.return_chunk) {
throw BinderException("RETURNING clause not yet supported for deletion of a DuckLake table");
}
return planner.Make<DuckLakeTruncate>(table);
}

} // namespace duckdb
37 changes: 37 additions & 0 deletions test/sql/delete/ducklake_delete_all_simple.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# name: test/sql/delete/ducklake_delete_all_simple.test
# description: simple delete-all on ducklake table

require ducklake

require parquet

test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db

test-env DATA_PATH __TEST_DIR__

statement ok
ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_delete_all_simple')

statement ok
CREATE TABLE ducklake.delete_all_test(i INTEGER);

statement ok
INSERT INTO ducklake.delete_all_test FROM range(10);

query II
EXPLAIN DELETE FROM ducklake.delete_all_test;
----
physical_plan <REGEX>:.*DUCKLAKE_TRUNCATE.*

query I
SELECT COUNT(*) FROM ducklake.delete_all_test;
----
10

statement ok
DELETE FROM ducklake.delete_all_test;

query I
SELECT COUNT(*) FROM ducklake.delete_all_test;
----
0
37 changes: 37 additions & 0 deletions test/sql/delete/ducklake_truncate_simple.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# name: test/sql/delete/ducklake_truncate_simple.test
# description: simple TRUNCATE on ducklake table

require ducklake

require parquet

test-env DUCKLAKE_CONNECTION __TEST_DIR__/{UUID}.db

test-env DATA_PATH __TEST_DIR__

statement ok
ATTACH 'ducklake:${DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '${DATA_PATH}/ducklake_truncate_simple')

statement ok
CREATE TABLE ducklake.truncate_test(i INTEGER);

statement ok
INSERT INTO ducklake.truncate_test FROM range(10);

query II
EXPLAIN TRUNCATE ducklake.truncate_test;
----
physical_plan <REGEX>:.*DUCKLAKE_TRUNCATE.*

query I
SELECT COUNT(*) FROM ducklake.truncate_test;
----
10

statement ok
TRUNCATE ducklake.truncate_test;

query I
SELECT COUNT(*) FROM ducklake.truncate_test;
----
0
Loading
Loading