Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/common/ducklake_version.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ DuckLakeVersion DuckLakeVersionFromString(const string &version_str) {
if (version_str == "1.1-dev1") {
return DuckLakeVersion::V1_1_DEV_1;
}
if (version_str == "1.1-dev2") {
return DuckLakeVersion::V1_1_DEV_2;
}
throw InvalidInputException("Unsupported ducklake_version '%s'", version_str);
}

Expand All @@ -49,6 +52,8 @@ string DuckLakeVersionToString(DuckLakeVersion version) {
return "1.0";
case DuckLakeVersion::V1_1_DEV_1:
return "1.1-dev1";
case DuckLakeVersion::V1_1_DEV_2:
return "1.1-dev2";
default:
throw InternalException("DuckLakeVersionToString: unknown version");
}
Expand Down
5 changes: 3 additions & 2 deletions src/include/common/ducklake_version.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@ enum class DuckLakeVersion : uint8_t {
V0_4_DEV1 = 5,
V0_4 = 6,
V1_0 = 7,
V1_1_DEV_1 = 8
V1_1_DEV_1 = 8,
V1_1_DEV_2 = 9
};

static constexpr DuckLakeVersion DUCKLAKE_LATEST_VERSION = DuckLakeVersion::V1_1_DEV_1;
static constexpr DuckLakeVersion DUCKLAKE_LATEST_VERSION = DuckLakeVersion::V1_1_DEV_2;

DuckLakeVersion DuckLakeVersionFromString(const string &version_str);
string DuckLakeVersionToString(DuckLakeVersion version);
Expand Down
7 changes: 7 additions & 0 deletions src/include/common/local_change.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,19 @@ struct LocalChange {
LocalChangeType type;
//! For operations that alter individual columns
FieldIndex field_index;
//! For SET_COLUMN_COMMENT on views
string view_column_comment_name;

static LocalChange SetColumnComment(FieldIndex field_idx) {
LocalChange result(LocalChangeType::SET_COLUMN_COMMENT);
result.field_index = field_idx;
return result;
}
static LocalChange SetViewColumnComment(const string &column_name) {
LocalChange result(LocalChangeType::SET_COLUMN_COMMENT);
result.view_column_comment_name = column_name;
return result;
}
static LocalChange SetNull(FieldIndex field_idx) {
LocalChange result(LocalChangeType::SET_NULL);
result.field_index = field_idx;
Expand Down
14 changes: 14 additions & 0 deletions src/include/storage/ducklake_metadata_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,12 @@ struct DuckLakeSnapshotInfo {
Value commit_extra_info;
};

struct DuckLakeViewColumnTag {
string column_name;
string key;
Value value;
};

struct DuckLakeViewInfo {
TableIndex id;
SchemaIndex schema_id;
Expand All @@ -312,6 +318,14 @@ struct DuckLakeViewInfo {
vector<string> column_aliases;
string sql;
vector<DuckLakeTag> tags;
vector<DuckLakeViewColumnTag> column_tags;
};

struct DuckLakeViewColumnTagInfo {
TableIndex view_id;
string column_name;
string key;
Value value;
};

struct DuckLakeTagInfo {
Expand Down
3 changes: 3 additions & 0 deletions src/include/storage/ducklake_metadata_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ class DuckLakeMetadataManager {
virtual string WriteNewColumns(const vector<DuckLakeNewColumn> &new_columns);
virtual string WriteNewTags(const vector<DuckLakeTagInfo> &new_tags);
virtual string WriteNewColumnTags(const vector<DuckLakeColumnTagInfo> &new_tags);
virtual string WriteNewViewColumnTags(const vector<DuckLakeViewColumnTagInfo> &new_tags);
virtual string WriteNewDataFiles(DuckLakeSnapshot &commit_snapshot, const vector<DuckLakeFileInfo> &new_files,
const vector<DuckLakeTableInfo> &new_tables,
vector<DuckLakeSchemaInfo> &new_schemas_result);
Expand Down Expand Up @@ -258,6 +259,7 @@ class DuckLakeMetadataManager {
virtual void MigrateV03(bool allow_failures = false);
virtual void MigrateV04();
virtual void MigrateV10();
virtual void MigrateV11Dev1();
virtual void ExecuteMigration(string migrate_query, bool allow_failures, const string &from_version,
const string &to_version);

Expand All @@ -274,6 +276,7 @@ class DuckLakeMetadataManager {
virtual string ListAggregation(const vector<pair<string, string>> &fields) const;
//! Parse tag list from ListAggregation value
virtual vector<DuckLakeTag> LoadTags(const Value &tag_map) const;
virtual vector<DuckLakeViewColumnTag> LoadViewColumnTags(const Value &list) const;
//! Parse inlined data tables list from ListAggregation value
virtual vector<DuckLakeInlinedTableInfo> LoadInlinedDataTables(const Value &list) const;
//! Parse macro implementations list from ListAggregation value
Expand Down
3 changes: 3 additions & 0 deletions src/include/storage/ducklake_view_entry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

namespace duckdb {
struct SetCommentInfo;
struct SetColumnCommentInfo;
class DuckLakeTransaction;

class DuckLakeViewEntry : public ViewCatalogEntry {
Expand Down Expand Up @@ -53,6 +54,8 @@ class DuckLakeViewEntry : public ViewCatalogEntry {
// ALTER VIEW
DuckLakeViewEntry(DuckLakeViewEntry &parent, CreateViewInfo &info, LocalChange local_change);

unique_ptr<CatalogEntry> Alter(DuckLakeTransaction &transaction, SetColumnCommentInfo &info);

private:
unique_ptr<SelectStatement> ParseSelectStatement() const;

Expand Down
2 changes: 1 addition & 1 deletion src/metadata_manager/ducklake_metadata_manager_v1_1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ string DuckLakeMetadataManagerV1_1<Base>::GetCreateTableStatements() {

template <typename Base>
string DuckLakeMetadataManagerV1_1<Base>::GetVersionString() {
constexpr auto VERSION = DuckLakeVersion::V1_1_DEV_1;
constexpr auto VERSION = DuckLakeVersion::V1_1_DEV_2;
return DuckLakeVersionToString(VERSION);
}

Expand Down
5 changes: 5 additions & 0 deletions src/storage/ducklake_catalog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,11 @@ unique_ptr<DuckLakeCatalogSet> DuckLakeCatalog::LoadSchemaForSnapshot(DuckLakeTr
create_view_info->tags[tag.key] = tag.value;
}
}
for (auto &ct : view.column_tags) {
if (ct.key == "comment") {
create_view_info->column_comments_map[ct.column_name] = ct.value;
}
}
auto view_entry =
make_uniq<DuckLakeViewEntry>(*this, schema_entry, *create_view_info, view.id, std::move(view.uuid),
std::move(view.sql), LocalChangeType::NONE);
Expand Down
15 changes: 6 additions & 9 deletions src/storage/ducklake_delete_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
#include "duckdb/planner/expression/bound_constant_expression.hpp"
#include "duckdb/planner/expression/bound_reference_expression.hpp"


namespace duckdb {

//! FunctionInfo to pass delete file metadata to the MultiFileReader
Expand Down Expand Up @@ -139,8 +138,8 @@ DeleteFileScanResult DuckLakeDeleteFilter::ScanDeletionVectorFile(ClientContext
unique_ptr<ExpressionFilter> MakeComparisonFilter(ExpressionType comparison_type, Value constant) {
auto col_ref = make_uniq<BoundReferenceExpression>(LogicalType::BIGINT, storage_t(0));
auto bound_constant = make_uniq<BoundConstantExpression>(std::move(constant));
auto comparison = make_uniq<BoundComparisonExpression>(comparison_type, std::move(col_ref),
std::move(bound_constant));
auto comparison =
make_uniq<BoundComparisonExpression>(comparison_type, std::move(col_ref), std::move(bound_constant));
return make_uniq<ExpressionFilter>(std::move(comparison));
}

Expand Down Expand Up @@ -193,15 +192,13 @@ DeleteFileScanResult DuckLakeDeleteFilter::ScanDeleteFile(ClientContext &context

if (snapshot_filter_min.IsValid()) {
auto min_constant = Value::BIGINT(NumericCast<int64_t>(snapshot_filter_min.GetIndex()));
filters->PushFilter(snapshot_col_idx,
MakeComparisonFilter(ExpressionType::COMPARE_GREATERTHANOREQUALTO,
std::move(min_constant)));
filters->PushFilter(snapshot_col_idx, MakeComparisonFilter(ExpressionType::COMPARE_GREATERTHANOREQUALTO,
std::move(min_constant)));
}
if (snapshot_filter_max.IsValid()) {
auto max_constant = Value::BIGINT(NumericCast<int64_t>(snapshot_filter_max.GetIndex()));
filters->PushFilter(snapshot_col_idx,
MakeComparisonFilter(ExpressionType::COMPARE_LESSTHANOREQUALTO,
std::move(max_constant)));
filters->PushFilter(snapshot_col_idx, MakeComparisonFilter(ExpressionType::COMPARE_LESSTHANOREQUALTO,
std::move(max_constant)));
}
scanner.SetFilters(std::move(filters));
}
Expand Down
6 changes: 5 additions & 1 deletion src/storage/ducklake_initializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,10 @@ void DuckLakeInitializer::LoadExistingDuckLake(DuckLakeTransaction &transaction)
metadata_manager.MigrateV10();
catalog_version = DuckLakeVersion::V1_1_DEV_1;
}
if (catalog_version == DuckLakeVersion::V1_1_DEV_1) {
metadata_manager.MigrateV11Dev1();
catalog_version = DuckLakeVersion::V1_1_DEV_2;
}
if (catalog_version != DUCKLAKE_LATEST_VERSION) {
throw NotImplementedException("Unsupported DuckLake version '%s'",
DuckLakeVersionToString(catalog_version));
Expand Down Expand Up @@ -289,7 +293,7 @@ void DuckLakeInitializer::SetVersionedMetadataManager(DuckLakeTransaction &trans
}
auto &current = transaction.GetMetadataManager();
unique_ptr<DuckLakeMetadataManager> new_manager;
if (version == DuckLakeVersion::V1_1_DEV_1) {
if (version == DuckLakeVersion::V1_1_DEV_1 || version == DuckLakeVersion::V1_1_DEV_2) {
if (dynamic_cast<PostgresMetadataManager *>(&current)) {
new_manager = make_uniq<DuckLakeMetadataManagerV1_1<PostgresMetadataManager>>(transaction);
} else if (dynamic_cast<SQLiteMetadataManager *>(&current)) {
Expand Down
3 changes: 1 addition & 2 deletions src/storage/ducklake_merge_into.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -520,8 +520,7 @@ static unique_ptr<MergeIntoOperator> DuckLakePlanMergeIntoAction(DuckLakeCatalog
auto copy_options = DuckLakeInsert::GetCopyOptions(context, copy_input);

auto &physical_copy = DuckLakeInsert::PlanCopyForInsert(context, planner, copy_input, nullptr);
auto &copy_dummy =
planner.Make<PhysicalDummyScan>(physical_copy.Cast<PhysicalCopyToFile>().expected_types, 1);
auto &copy_dummy = planner.Make<PhysicalDummyScan>(physical_copy.Cast<PhysicalCopyToFile>().expected_types, 1);
physical_copy.children.push_back(copy_dummy);
auto &insert =
DuckLakeInsert::PlanInsert(context, planner, ducklake_table, std::move(copy_input.encryption_key));
Expand Down
95 changes: 92 additions & 3 deletions src/storage/ducklake_metadata_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ CREATE TABLE {METADATA_CATALOG}.ducklake_table(table_id BIGINT, table_uuid UUID,
CREATE TABLE {METADATA_CATALOG}.ducklake_view(view_id BIGINT, view_uuid UUID, begin_snapshot BIGINT, end_snapshot BIGINT, schema_id BIGINT, view_name VARCHAR, dialect VARCHAR, sql VARCHAR, column_aliases VARCHAR);
CREATE TABLE {METADATA_CATALOG}.ducklake_tag(object_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, key VARCHAR, value VARCHAR);
CREATE TABLE {METADATA_CATALOG}.ducklake_column_tag(table_id BIGINT, column_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, key VARCHAR, value VARCHAR);
CREATE TABLE {METADATA_CATALOG}.ducklake_view_column_tag(view_id BIGINT, column_name VARCHAR, begin_snapshot BIGINT, end_snapshot BIGINT, key VARCHAR, value VARCHAR);
CREATE TABLE {METADATA_CATALOG}.ducklake_data_file(data_file_id BIGINT PRIMARY KEY, table_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, file_order BIGINT, path VARCHAR, path_is_relative BOOLEAN, file_format VARCHAR, record_count BIGINT, file_size_bytes BIGINT, footer_size BIGINT, row_id_start BIGINT, partition_id BIGINT, encryption_key VARCHAR, mapping_id BIGINT, partial_max BIGINT);
CREATE TABLE {METADATA_CATALOG}.ducklake_file_column_stats(data_file_id BIGINT, table_id BIGINT, column_id BIGINT, column_size_bytes BIGINT, value_count BIGINT, null_count BIGINT, min_value VARCHAR, max_value VARCHAR, contains_nan BOOLEAN, extra_stats VARCHAR);
CREATE TABLE {METADATA_CATALOG}.ducklake_file_variant_stats(data_file_id BIGINT, table_id BIGINT, column_id BIGINT, variant_path VARCHAR, shredded_type VARCHAR, column_size_bytes BIGINT, value_count BIGINT, null_count BIGINT, min_value VARCHAR, max_value VARCHAR, contains_nan BOOLEAN, extra_stats VARCHAR);
Expand Down Expand Up @@ -335,6 +336,18 @@ UPDATE {METADATA_CATALOG}.ducklake_metadata SET value = '1.1-dev1' WHERE key = '
}
}

void DuckLakeMetadataManager::MigrateV11Dev1() {
auto result = transaction.Query(R"(
CREATE TABLE IF NOT EXISTS {METADATA_CATALOG}.ducklake_view_column_tag(
view_id BIGINT, column_name VARCHAR, begin_snapshot BIGINT, end_snapshot BIGINT, key VARCHAR, value VARCHAR
);
UPDATE {METADATA_CATALOG}.ducklake_metadata SET value = '1.1-dev2' WHERE key = 'version';
)");
if (result->HasError()) {
result->GetErrorObject().Throw("Failed to migrate DuckLake from v1.1-dev1 to v1.1-dev2: ");
}
}

DuckLakeMetadata DuckLakeMetadataManager::LoadDuckLake() {
auto result = transaction.Query(R"(
SELECT key, value, scope, scope_id FROM {METADATA_CATALOG}.ducklake_metadata
Expand Down Expand Up @@ -412,6 +425,26 @@ vector<DuckLakeTag> DuckLakeMetadataManager::LoadTags(const Value &tag_map) cons
return result;
}

vector<DuckLakeViewColumnTag> DuckLakeMetadataManager::LoadViewColumnTags(const Value &list) const {
vector<DuckLakeViewColumnTag> result;
if (list.IsNull()) {
return result;
}
for (auto &val : ListValue::GetChildren(list)) {
auto &struct_children = StructValue::GetChildren(val);
DuckLakeViewColumnTag tag;
tag.column_name = struct_children[0].ToString();
tag.key = struct_children[1].ToString();
if (struct_children.size() > 2) {
tag.value = struct_children[2].IsNull() ? Value() : struct_children[2];
} else {
tag.value = Value();
}
result.push_back(std::move(tag));
}
return result;
}

vector<DuckLakeInlinedTableInfo> DuckLakeMetadataManager::LoadInlinedDataTables(const Value &list) const {
vector<DuckLakeInlinedTableInfo> result;
for (auto &val : ListValue::GetChildren(list)) {
Expand Down Expand Up @@ -580,6 +613,11 @@ WHERE {SNAPSHOT_ID} >= begin_snapshot AND ({SNAPSHOT_ID} < end_snapshot OR end_s
{"name", "table_name"},
{"schema_version", "schema_version"},
};
static const vector<pair<string, string>> VIEW_COLUMN_TAG_FIELDS = {
{"column_name", "column_name"},
{"key", "key"},
{"value", "value"},
};

// load the table information
result = transaction.Query(snapshot, StringUtil::Format(R"(
Expand Down Expand Up @@ -705,11 +743,18 @@ SELECT view_id, view_uuid, schema_id, view_name, dialect, sql, column_aliases,
FROM {METADATA_CATALOG}.ducklake_tag tag
WHERE object_id=view_id AND
{SNAPSHOT_ID} >= tag.begin_snapshot AND ({SNAPSHOT_ID} < tag.end_snapshot OR tag.end_snapshot IS NULL)
) AS tag
) AS tag,
(
SELECT %s
FROM {METADATA_CATALOG}.ducklake_view_column_tag vct
WHERE vct.view_id=view.view_id AND
{SNAPSHOT_ID} >= vct.begin_snapshot AND ({SNAPSHOT_ID} < vct.end_snapshot OR vct.end_snapshot IS NULL)
) AS view_column_tags
FROM {METADATA_CATALOG}.ducklake_view view
WHERE {SNAPSHOT_ID} >= begin_snapshot AND ({SNAPSHOT_ID} < view.end_snapshot OR view.end_snapshot IS NULL)
)",
ListAggregation(TAG_FIELDS)));
ListAggregation(TAG_FIELDS),
ListAggregation(VIEW_COLUMN_TAG_FIELDS)));
if (result->HasError()) {
result->GetErrorObject().Throw("Failed to get partition information from DuckLake: ");
}
Expand All @@ -727,6 +772,9 @@ WHERE {SNAPSHOT_ID} >= begin_snapshot AND ({SNAPSHOT_ID} < view.end_snapshot OR
auto tags = row.GetValue<Value>(7);
view_info.tags = LoadTags(tags);
}
if (!row.IsNull(8)) {
view_info.column_tags = LoadViewColumnTags(row.GetValue<Value>(8));
}
views.push_back(std::move(view_info));
}

Expand Down Expand Up @@ -2125,6 +2173,7 @@ string DuckLakeMetadataManager::DropTables(const set<TableIndex> &ids, bool rena
string DuckLakeMetadataManager::DropViews(const set<TableIndex> &ids) {
string batch_query = FlushDrop("ducklake_view", "view_id", ids);
batch_query += FlushDrop("ducklake_tag", "object_id", ids);
batch_query += FlushDrop("ducklake_view_column_tag", "view_id", ids);
return batch_query;
}

Expand Down Expand Up @@ -4006,6 +4055,45 @@ WHERE table_id=tid AND column_id=cid AND ducklake_column_tag.key=overwritten_tag
return batch_query;
}

string DuckLakeMetadataManager::WriteNewViewColumnTags(const vector<DuckLakeViewColumnTagInfo> &new_tags) {
if (new_tags.empty()) {
return {};
}
string tags_list;
for (auto &tag : new_tags) {
if (!tags_list.empty()) {
tags_list += ", ";
}
tags_list +=
StringUtil::Format("(%d, %s, %s)", tag.view_id.index, SQLString(tag.column_name), SQLString(tag.key));
}

string batch_query = StringUtil::Format(R"(
WITH overwritten_tags(vid, col, k) AS (
VALUES %s
)
UPDATE {METADATA_CATALOG}.ducklake_view_column_tag
SET end_snapshot = {SNAPSHOT_ID}
FROM overwritten_tags
WHERE view_id=vid AND ducklake_view_column_tag.column_name=overwritten_tags.col AND
ducklake_view_column_tag.key=overwritten_tags.k AND end_snapshot IS NULL
;)",
tags_list);

string new_tag_query;
for (auto &tag : new_tags) {
if (!new_tag_query.empty()) {
new_tag_query += ", ";
}
new_tag_query += StringUtil::Format("(%d, %s, {SNAPSHOT_ID}, NULL, %s, %s)", tag.view_id.index,
SQLString(tag.column_name), SQLString(tag.key), tag.value.ToSQLString());
}

new_tag_query = "INSERT INTO {METADATA_CATALOG}.ducklake_view_column_tag VALUES " + new_tag_query + ";";
batch_query += new_tag_query;
return batch_query;
}

struct ColumnStatsSQL {
string contains_null;
string contains_nan;
Expand Down Expand Up @@ -4522,7 +4610,8 @@ WHERE table_id IN (%s);)",
}

// delete any views, schemas, macros, etc that are no longer referenced
tables_to_delete_from = {"ducklake_schema", "ducklake_view", "ducklake_tag", "ducklake_macro"};
tables_to_delete_from = {"ducklake_schema", "ducklake_view", "ducklake_view_column_tag", "ducklake_tag",
"ducklake_macro"};
for (auto &delete_tbl : tables_to_delete_from) {
auto result = transaction.Query(StringUtil::Format(R"(
DELETE FROM {METADATA_CATALOG}.%s
Expand Down
Loading
Loading