diff --git a/src/common/ducklake_version.cpp b/src/common/ducklake_version.cpp index 43e373b11ed..15d04d54536 100644 --- a/src/common/ducklake_version.cpp +++ b/src/common/ducklake_version.cpp @@ -28,6 +28,9 @@ DuckLakeVersion DuckLakeVersionFromString(const string &version_str) { if (version_str == "1.1-dev1") { return DuckLakeVersion::V1_1_DEV_1; } + if (version_str == "1.1-dev2") { + return DuckLakeVersion::V1_1_DEV_2; + } throw InvalidInputException("Unsupported ducklake_version '%s'", version_str); } @@ -49,6 +52,8 @@ string DuckLakeVersionToString(DuckLakeVersion version) { return "1.0"; case DuckLakeVersion::V1_1_DEV_1: return "1.1-dev1"; + case DuckLakeVersion::V1_1_DEV_2: + return "1.1-dev2"; default: throw InternalException("DuckLakeVersionToString: unknown version"); } diff --git a/src/include/common/ducklake_version.hpp b/src/include/common/ducklake_version.hpp index 962893a4a60..79e00b9d26d 100644 --- a/src/include/common/ducklake_version.hpp +++ b/src/include/common/ducklake_version.hpp @@ -21,10 +21,11 @@ enum class DuckLakeVersion : uint8_t { V0_4_DEV1 = 5, V0_4 = 6, V1_0 = 7, - V1_1_DEV_1 = 8 + V1_1_DEV_1 = 8, + V1_1_DEV_2 = 9 }; -static constexpr DuckLakeVersion DUCKLAKE_LATEST_VERSION = DuckLakeVersion::V1_1_DEV_1; +static constexpr DuckLakeVersion DUCKLAKE_LATEST_VERSION = DuckLakeVersion::V1_1_DEV_2; DuckLakeVersion DuckLakeVersionFromString(const string &version_str); string DuckLakeVersionToString(DuckLakeVersion version); diff --git a/src/include/common/local_change.hpp b/src/include/common/local_change.hpp index 4ba5b919e94..0fc37d7e016 100644 --- a/src/include/common/local_change.hpp +++ b/src/include/common/local_change.hpp @@ -38,12 +38,19 @@ struct LocalChange { LocalChangeType type; //! For operations that alter individual columns FieldIndex field_index; + //! For SET_COLUMN_COMMENT on views + string view_column_comment_name; static LocalChange SetColumnComment(FieldIndex field_idx) { LocalChange result(LocalChangeType::SET_COLUMN_COMMENT); result.field_index = field_idx; return result; } + static LocalChange SetViewColumnComment(const string &column_name) { + LocalChange result(LocalChangeType::SET_COLUMN_COMMENT); + result.view_column_comment_name = column_name; + return result; + } static LocalChange SetNull(FieldIndex field_idx) { LocalChange result(LocalChangeType::SET_NULL); result.field_index = field_idx; diff --git a/src/include/storage/ducklake_metadata_info.hpp b/src/include/storage/ducklake_metadata_info.hpp index 000a71faaaf..b02ddc2fb45 100644 --- a/src/include/storage/ducklake_metadata_info.hpp +++ b/src/include/storage/ducklake_metadata_info.hpp @@ -303,6 +303,12 @@ struct DuckLakeSnapshotInfo { Value commit_extra_info; }; +struct DuckLakeViewColumnTag { + string column_name; + string key; + Value value; +}; + struct DuckLakeViewInfo { TableIndex id; SchemaIndex schema_id; @@ -312,6 +318,14 @@ struct DuckLakeViewInfo { vector column_aliases; string sql; vector tags; + vector column_tags; +}; + +struct DuckLakeViewColumnTagInfo { + TableIndex view_id; + string column_name; + string key; + Value value; }; struct DuckLakeTagInfo { diff --git a/src/include/storage/ducklake_metadata_manager.hpp b/src/include/storage/ducklake_metadata_manager.hpp index 596e221fc7f..b896852e3e6 100644 --- a/src/include/storage/ducklake_metadata_manager.hpp +++ b/src/include/storage/ducklake_metadata_manager.hpp @@ -182,6 +182,7 @@ class DuckLakeMetadataManager { virtual string WriteNewColumns(const vector &new_columns); virtual string WriteNewTags(const vector &new_tags); virtual string WriteNewColumnTags(const vector &new_tags); + virtual string WriteNewViewColumnTags(const vector &new_tags); virtual string WriteNewDataFiles(DuckLakeSnapshot &commit_snapshot, const vector &new_files, const vector &new_tables, vector &new_schemas_result); @@ -258,6 +259,7 @@ class DuckLakeMetadataManager { virtual void MigrateV03(bool allow_failures = false); virtual void MigrateV04(); virtual void MigrateV10(); + virtual void MigrateV11Dev1(); virtual void ExecuteMigration(string migrate_query, bool allow_failures, const string &from_version, const string &to_version); @@ -274,6 +276,7 @@ class DuckLakeMetadataManager { virtual string ListAggregation(const vector> &fields) const; //! Parse tag list from ListAggregation value virtual vector LoadTags(const Value &tag_map) const; + virtual vector LoadViewColumnTags(const Value &list) const; //! Parse inlined data tables list from ListAggregation value virtual vector LoadInlinedDataTables(const Value &list) const; //! Parse macro implementations list from ListAggregation value diff --git a/src/include/storage/ducklake_view_entry.hpp b/src/include/storage/ducklake_view_entry.hpp index 7a116a46b25..0c8ddbfb19c 100644 --- a/src/include/storage/ducklake_view_entry.hpp +++ b/src/include/storage/ducklake_view_entry.hpp @@ -16,6 +16,7 @@ namespace duckdb { struct SetCommentInfo; +struct SetColumnCommentInfo; class DuckLakeTransaction; class DuckLakeViewEntry : public ViewCatalogEntry { @@ -53,6 +54,8 @@ class DuckLakeViewEntry : public ViewCatalogEntry { // ALTER VIEW DuckLakeViewEntry(DuckLakeViewEntry &parent, CreateViewInfo &info, LocalChange local_change); + unique_ptr Alter(DuckLakeTransaction &transaction, SetColumnCommentInfo &info); + private: unique_ptr ParseSelectStatement() const; diff --git a/src/metadata_manager/ducklake_metadata_manager_v1_1.cpp b/src/metadata_manager/ducklake_metadata_manager_v1_1.cpp index 8c8ef49a14b..39335a0cf1b 100644 --- a/src/metadata_manager/ducklake_metadata_manager_v1_1.cpp +++ b/src/metadata_manager/ducklake_metadata_manager_v1_1.cpp @@ -13,7 +13,7 @@ string DuckLakeMetadataManagerV1_1::GetCreateTableStatements() { template string DuckLakeMetadataManagerV1_1::GetVersionString() { - constexpr auto VERSION = DuckLakeVersion::V1_1_DEV_1; + constexpr auto VERSION = DuckLakeVersion::V1_1_DEV_2; return DuckLakeVersionToString(VERSION); } diff --git a/src/storage/ducklake_catalog.cpp b/src/storage/ducklake_catalog.cpp index c2c7e1f8a5a..e2021fcdf9a 100644 --- a/src/storage/ducklake_catalog.cpp +++ b/src/storage/ducklake_catalog.cpp @@ -455,6 +455,11 @@ unique_ptr DuckLakeCatalog::LoadSchemaForSnapshot(DuckLakeTr create_view_info->tags[tag.key] = tag.value; } } + for (auto &ct : view.column_tags) { + if (ct.key == "comment") { + create_view_info->column_comments_map[ct.column_name] = ct.value; + } + } auto view_entry = make_uniq(*this, schema_entry, *create_view_info, view.id, std::move(view.uuid), std::move(view.sql), LocalChangeType::NONE); diff --git a/src/storage/ducklake_delete_filter.cpp b/src/storage/ducklake_delete_filter.cpp index 3f80866c197..123dd445634 100644 --- a/src/storage/ducklake_delete_filter.cpp +++ b/src/storage/ducklake_delete_filter.cpp @@ -11,7 +11,6 @@ #include "duckdb/planner/expression/bound_constant_expression.hpp" #include "duckdb/planner/expression/bound_reference_expression.hpp" - namespace duckdb { //! FunctionInfo to pass delete file metadata to the MultiFileReader @@ -139,8 +138,8 @@ DeleteFileScanResult DuckLakeDeleteFilter::ScanDeletionVectorFile(ClientContext unique_ptr MakeComparisonFilter(ExpressionType comparison_type, Value constant) { auto col_ref = make_uniq(LogicalType::BIGINT, storage_t(0)); auto bound_constant = make_uniq(std::move(constant)); - auto comparison = make_uniq(comparison_type, std::move(col_ref), - std::move(bound_constant)); + auto comparison = + make_uniq(comparison_type, std::move(col_ref), std::move(bound_constant)); return make_uniq(std::move(comparison)); } @@ -193,15 +192,13 @@ DeleteFileScanResult DuckLakeDeleteFilter::ScanDeleteFile(ClientContext &context if (snapshot_filter_min.IsValid()) { auto min_constant = Value::BIGINT(NumericCast(snapshot_filter_min.GetIndex())); - filters->PushFilter(snapshot_col_idx, - MakeComparisonFilter(ExpressionType::COMPARE_GREATERTHANOREQUALTO, - std::move(min_constant))); + filters->PushFilter(snapshot_col_idx, MakeComparisonFilter(ExpressionType::COMPARE_GREATERTHANOREQUALTO, + std::move(min_constant))); } if (snapshot_filter_max.IsValid()) { auto max_constant = Value::BIGINT(NumericCast(snapshot_filter_max.GetIndex())); - filters->PushFilter(snapshot_col_idx, - MakeComparisonFilter(ExpressionType::COMPARE_LESSTHANOREQUALTO, - std::move(max_constant))); + filters->PushFilter(snapshot_col_idx, MakeComparisonFilter(ExpressionType::COMPARE_LESSTHANOREQUALTO, + std::move(max_constant))); } scanner.SetFilters(std::move(filters)); } diff --git a/src/storage/ducklake_initializer.cpp b/src/storage/ducklake_initializer.cpp index 3b930baceb5..4758a8d3839 100644 --- a/src/storage/ducklake_initializer.cpp +++ b/src/storage/ducklake_initializer.cpp @@ -219,6 +219,10 @@ void DuckLakeInitializer::LoadExistingDuckLake(DuckLakeTransaction &transaction) metadata_manager.MigrateV10(); catalog_version = DuckLakeVersion::V1_1_DEV_1; } + if (catalog_version == DuckLakeVersion::V1_1_DEV_1) { + metadata_manager.MigrateV11Dev1(); + catalog_version = DuckLakeVersion::V1_1_DEV_2; + } if (catalog_version != DUCKLAKE_LATEST_VERSION) { throw NotImplementedException("Unsupported DuckLake version '%s'", DuckLakeVersionToString(catalog_version)); @@ -289,7 +293,7 @@ void DuckLakeInitializer::SetVersionedMetadataManager(DuckLakeTransaction &trans } auto ¤t = transaction.GetMetadataManager(); unique_ptr new_manager; - if (version == DuckLakeVersion::V1_1_DEV_1) { + if (version == DuckLakeVersion::V1_1_DEV_1 || version == DuckLakeVersion::V1_1_DEV_2) { if (dynamic_cast(¤t)) { new_manager = make_uniq>(transaction); } else if (dynamic_cast(¤t)) { diff --git a/src/storage/ducklake_merge_into.cpp b/src/storage/ducklake_merge_into.cpp index 6cbf2068209..31daaa8310c 100644 --- a/src/storage/ducklake_merge_into.cpp +++ b/src/storage/ducklake_merge_into.cpp @@ -520,8 +520,7 @@ static unique_ptr DuckLakePlanMergeIntoAction(DuckLakeCatalog auto copy_options = DuckLakeInsert::GetCopyOptions(context, copy_input); auto &physical_copy = DuckLakeInsert::PlanCopyForInsert(context, planner, copy_input, nullptr); - auto ©_dummy = - planner.Make(physical_copy.Cast().expected_types, 1); + auto ©_dummy = planner.Make(physical_copy.Cast().expected_types, 1); physical_copy.children.push_back(copy_dummy); auto &insert = DuckLakeInsert::PlanInsert(context, planner, ducklake_table, std::move(copy_input.encryption_key)); diff --git a/src/storage/ducklake_metadata_manager.cpp b/src/storage/ducklake_metadata_manager.cpp index 9b4f6cbf8ae..bd30a5df5fe 100644 --- a/src/storage/ducklake_metadata_manager.cpp +++ b/src/storage/ducklake_metadata_manager.cpp @@ -180,6 +180,7 @@ CREATE TABLE {METADATA_CATALOG}.ducklake_table(table_id BIGINT, table_uuid UUID, CREATE TABLE {METADATA_CATALOG}.ducklake_view(view_id BIGINT, view_uuid UUID, begin_snapshot BIGINT, end_snapshot BIGINT, schema_id BIGINT, view_name VARCHAR, dialect VARCHAR, sql VARCHAR, column_aliases VARCHAR); CREATE TABLE {METADATA_CATALOG}.ducklake_tag(object_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, key VARCHAR, value VARCHAR); CREATE TABLE {METADATA_CATALOG}.ducklake_column_tag(table_id BIGINT, column_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, key VARCHAR, value VARCHAR); +CREATE TABLE {METADATA_CATALOG}.ducklake_view_column_tag(view_id BIGINT, column_name VARCHAR, begin_snapshot BIGINT, end_snapshot BIGINT, key VARCHAR, value VARCHAR); CREATE TABLE {METADATA_CATALOG}.ducklake_data_file(data_file_id BIGINT PRIMARY KEY, table_id BIGINT, begin_snapshot BIGINT, end_snapshot BIGINT, file_order BIGINT, path VARCHAR, path_is_relative BOOLEAN, file_format VARCHAR, record_count BIGINT, file_size_bytes BIGINT, footer_size BIGINT, row_id_start BIGINT, partition_id BIGINT, encryption_key VARCHAR, mapping_id BIGINT, partial_max BIGINT); CREATE TABLE {METADATA_CATALOG}.ducklake_file_column_stats(data_file_id BIGINT, table_id BIGINT, column_id BIGINT, column_size_bytes BIGINT, value_count BIGINT, null_count BIGINT, min_value VARCHAR, max_value VARCHAR, contains_nan BOOLEAN, extra_stats VARCHAR); CREATE TABLE {METADATA_CATALOG}.ducklake_file_variant_stats(data_file_id BIGINT, table_id BIGINT, column_id BIGINT, variant_path VARCHAR, shredded_type VARCHAR, column_size_bytes BIGINT, value_count BIGINT, null_count BIGINT, min_value VARCHAR, max_value VARCHAR, contains_nan BOOLEAN, extra_stats VARCHAR); @@ -335,6 +336,18 @@ UPDATE {METADATA_CATALOG}.ducklake_metadata SET value = '1.1-dev1' WHERE key = ' } } +void DuckLakeMetadataManager::MigrateV11Dev1() { + auto result = transaction.Query(R"( +CREATE TABLE IF NOT EXISTS {METADATA_CATALOG}.ducklake_view_column_tag( + view_id BIGINT, column_name VARCHAR, begin_snapshot BIGINT, end_snapshot BIGINT, key VARCHAR, value VARCHAR +); +UPDATE {METADATA_CATALOG}.ducklake_metadata SET value = '1.1-dev2' WHERE key = 'version'; + )"); + if (result->HasError()) { + result->GetErrorObject().Throw("Failed to migrate DuckLake from v1.1-dev1 to v1.1-dev2: "); + } +} + DuckLakeMetadata DuckLakeMetadataManager::LoadDuckLake() { auto result = transaction.Query(R"( SELECT key, value, scope, scope_id FROM {METADATA_CATALOG}.ducklake_metadata @@ -412,6 +425,26 @@ vector DuckLakeMetadataManager::LoadTags(const Value &tag_map) cons return result; } +vector DuckLakeMetadataManager::LoadViewColumnTags(const Value &list) const { + vector result; + if (list.IsNull()) { + return result; + } + for (auto &val : ListValue::GetChildren(list)) { + auto &struct_children = StructValue::GetChildren(val); + DuckLakeViewColumnTag tag; + tag.column_name = struct_children[0].ToString(); + tag.key = struct_children[1].ToString(); + if (struct_children.size() > 2) { + tag.value = struct_children[2].IsNull() ? Value() : struct_children[2]; + } else { + tag.value = Value(); + } + result.push_back(std::move(tag)); + } + return result; +} + vector DuckLakeMetadataManager::LoadInlinedDataTables(const Value &list) const { vector result; for (auto &val : ListValue::GetChildren(list)) { @@ -580,6 +613,11 @@ WHERE {SNAPSHOT_ID} >= begin_snapshot AND ({SNAPSHOT_ID} < end_snapshot OR end_s {"name", "table_name"}, {"schema_version", "schema_version"}, }; + static const vector> VIEW_COLUMN_TAG_FIELDS = { + {"column_name", "column_name"}, + {"key", "key"}, + {"value", "value"}, + }; // load the table information result = transaction.Query(snapshot, StringUtil::Format(R"( @@ -705,11 +743,18 @@ SELECT view_id, view_uuid, schema_id, view_name, dialect, sql, column_aliases, FROM {METADATA_CATALOG}.ducklake_tag tag WHERE object_id=view_id AND {SNAPSHOT_ID} >= tag.begin_snapshot AND ({SNAPSHOT_ID} < tag.end_snapshot OR tag.end_snapshot IS NULL) - ) AS tag + ) AS tag, + ( + SELECT %s + FROM {METADATA_CATALOG}.ducklake_view_column_tag vct + WHERE vct.view_id=view.view_id AND + {SNAPSHOT_ID} >= vct.begin_snapshot AND ({SNAPSHOT_ID} < vct.end_snapshot OR vct.end_snapshot IS NULL) + ) AS view_column_tags FROM {METADATA_CATALOG}.ducklake_view view WHERE {SNAPSHOT_ID} >= begin_snapshot AND ({SNAPSHOT_ID} < view.end_snapshot OR view.end_snapshot IS NULL) )", - ListAggregation(TAG_FIELDS))); + ListAggregation(TAG_FIELDS), + ListAggregation(VIEW_COLUMN_TAG_FIELDS))); if (result->HasError()) { result->GetErrorObject().Throw("Failed to get partition information from DuckLake: "); } @@ -727,6 +772,9 @@ WHERE {SNAPSHOT_ID} >= begin_snapshot AND ({SNAPSHOT_ID} < view.end_snapshot OR auto tags = row.GetValue(7); view_info.tags = LoadTags(tags); } + if (!row.IsNull(8)) { + view_info.column_tags = LoadViewColumnTags(row.GetValue(8)); + } views.push_back(std::move(view_info)); } @@ -2125,6 +2173,7 @@ string DuckLakeMetadataManager::DropTables(const set &ids, bool rena string DuckLakeMetadataManager::DropViews(const set &ids) { string batch_query = FlushDrop("ducklake_view", "view_id", ids); batch_query += FlushDrop("ducklake_tag", "object_id", ids); + batch_query += FlushDrop("ducklake_view_column_tag", "view_id", ids); return batch_query; } @@ -4006,6 +4055,45 @@ WHERE table_id=tid AND column_id=cid AND ducklake_column_tag.key=overwritten_tag return batch_query; } +string DuckLakeMetadataManager::WriteNewViewColumnTags(const vector &new_tags) { + if (new_tags.empty()) { + return {}; + } + string tags_list; + for (auto &tag : new_tags) { + if (!tags_list.empty()) { + tags_list += ", "; + } + tags_list += + StringUtil::Format("(%d, %s, %s)", tag.view_id.index, SQLString(tag.column_name), SQLString(tag.key)); + } + + string batch_query = StringUtil::Format(R"( +WITH overwritten_tags(vid, col, k) AS ( +VALUES %s +) +UPDATE {METADATA_CATALOG}.ducklake_view_column_tag +SET end_snapshot = {SNAPSHOT_ID} +FROM overwritten_tags +WHERE view_id=vid AND ducklake_view_column_tag.column_name=overwritten_tags.col AND + ducklake_view_column_tag.key=overwritten_tags.k AND end_snapshot IS NULL +;)", + tags_list); + + string new_tag_query; + for (auto &tag : new_tags) { + if (!new_tag_query.empty()) { + new_tag_query += ", "; + } + new_tag_query += StringUtil::Format("(%d, %s, {SNAPSHOT_ID}, NULL, %s, %s)", tag.view_id.index, + SQLString(tag.column_name), SQLString(tag.key), tag.value.ToSQLString()); + } + + new_tag_query = "INSERT INTO {METADATA_CATALOG}.ducklake_view_column_tag VALUES " + new_tag_query + ";"; + batch_query += new_tag_query; + return batch_query; +} + struct ColumnStatsSQL { string contains_null; string contains_nan; @@ -4522,7 +4610,8 @@ WHERE table_id IN (%s);)", } // delete any views, schemas, macros, etc that are no longer referenced - tables_to_delete_from = {"ducklake_schema", "ducklake_view", "ducklake_tag", "ducklake_macro"}; + tables_to_delete_from = {"ducklake_schema", "ducklake_view", "ducklake_view_column_tag", "ducklake_tag", + "ducklake_macro"}; for (auto &delete_tbl : tables_to_delete_from) { auto result = transaction.Query(StringUtil::Format(R"( DELETE FROM {METADATA_CATALOG}.%s diff --git a/src/storage/ducklake_schema_entry.cpp b/src/storage/ducklake_schema_entry.cpp index 10840d41b9f..a29f275e6d7 100644 --- a/src/storage/ducklake_schema_entry.cpp +++ b/src/storage/ducklake_schema_entry.cpp @@ -194,6 +194,42 @@ optional_ptr DuckLakeSchemaEntry::CreateType(CatalogTransaction tr throw NotImplementedException("DuckLake does not support user-defined types"); } +namespace { + +bool TryApplySetColumnCommentToTable(DuckLakeTransaction &transaction, CatalogTransaction catalog_transaction, + DuckLakeSchemaEntry &schema, SetColumnCommentInfo &alter) { + auto table_entry = schema.GetEntry(catalog_transaction, CatalogType::TABLE_ENTRY, alter.name); + if (!table_entry || table_entry->type != CatalogType::TABLE_ENTRY) { + return false; + } + auto &table = table_entry->Cast(); + auto new_table = table.Alter(transaction, alter); + transaction.AlterEntry(table, std::move(new_table)); + return true; +} + +void ApplySetColumnCommentToTable(DuckLakeTransaction &transaction, CatalogTransaction catalog_transaction, + DuckLakeSchemaEntry &schema, SetColumnCommentInfo &alter, + const char *not_a_table_message) { + if (!TryApplySetColumnCommentToTable(transaction, catalog_transaction, schema, alter)) { + throw BinderException(not_a_table_message, alter.name); + } +} + +void ApplySetColumnCommentToView(DuckLakeTransaction &transaction, CatalogTransaction catalog_transaction, + DuckLakeSchemaEntry &schema, SetColumnCommentInfo &alter, + const char *not_a_view_message) { + auto view_entry = schema.GetEntry(catalog_transaction, CatalogType::VIEW_ENTRY, alter.name); + if (!view_entry || view_entry->type != CatalogType::VIEW_ENTRY) { + throw BinderException(not_a_view_message, alter.name); + } + auto &view = view_entry->Cast(); + auto new_view = view.Alter(transaction, alter); + transaction.AlterEntry(view, std::move(new_view)); +} + +} // namespace + void DuckLakeSchemaEntry::Alter(CatalogTransaction catalog_transaction, AlterInfo &info) { auto &context = catalog_transaction.GetContext(); auto &transaction = DuckLakeTransaction::Get(context, catalog); @@ -266,13 +302,16 @@ void DuckLakeSchemaEntry::Alter(CatalogTransaction catalog_transaction, AlterInf } case AlterType::SET_COLUMN_COMMENT: { auto &alter = info.Cast(); - auto table_entry = GetEntry(catalog_transaction, CatalogType::TABLE_ENTRY, alter.name); - if (table_entry->type != CatalogType::TABLE_ENTRY) { - throw BinderException("Cannot comment on columns for entry %s - it is not a table", alter.name); + if (alter.catalog_entry_type == CatalogType::VIEW_ENTRY) { + ApplySetColumnCommentToView(transaction, catalog_transaction, *this, alter, + "Cannot comment on columns for entry %s - it is not a view"); + } else if (alter.catalog_entry_type == CatalogType::TABLE_ENTRY) { + ApplySetColumnCommentToTable(transaction, catalog_transaction, *this, alter, + "Cannot comment on columns for entry %s - it is not a table"); + } else if (!TryApplySetColumnCommentToTable(transaction, catalog_transaction, *this, alter)) { + ApplySetColumnCommentToView(transaction, catalog_transaction, *this, alter, + "Cannot comment on columns for entry %s - could not find table or view"); } - auto &table = table_entry->Cast(); - auto new_table = table.Alter(transaction, alter); - transaction.AlterEntry(table, std::move(new_table)); break; } default: diff --git a/src/storage/ducklake_transaction.cpp b/src/storage/ducklake_transaction.cpp index 6a7f3ae064c..a7cc2a5a899 100644 --- a/src/storage/ducklake_transaction.cpp +++ b/src/storage/ducklake_transaction.cpp @@ -830,6 +830,13 @@ void GetTransactionViewChanges(reference view_entry, TransactionCh } break; } + case LocalChangeType::SET_COLUMN_COMMENT: { + auto view_id = view.GetViewId(); + if (!IsTransactionLocal(view_id)) { + changes.altered_views.insert(view_id); + } + break; + } case LocalChangeType::NONE: case LocalChangeType::CREATED: case LocalChangeType::RENAMED: { @@ -1506,6 +1513,7 @@ struct NewTableInfo { vector new_partition_keys; vector new_tags; vector new_column_tags; + vector new_view_column_tags; vector dropped_columns; vector new_columns; vector new_inlined_data_tables; @@ -1871,12 +1879,17 @@ void DuckLakeTransaction::GetNewViewInfo(DuckLakeCommitState &commit_state, Duck } // count comment operations for deduplication idx_t view_comment_count = 0; + map view_column_comment_count; for (idx_t view_idx = 0; view_idx < views.size(); view_idx++) { - if (views[view_idx].get().GetLocalChange().type == LocalChangeType::SET_COMMENT) { + auto lc = views[view_idx].get().GetLocalChange(); + if (lc.type == LocalChangeType::SET_COMMENT) { view_comment_count++; + } else if (lc.type == LocalChangeType::SET_COLUMN_COMMENT && !lc.view_column_comment_name.empty()) { + view_column_comment_count[lc.view_column_comment_name]++; } } idx_t view_comment_remaining = view_comment_count; + map view_column_comment_remaining(std::move(view_column_comment_count)); // traverse in reverse order for (idx_t view_idx = views.size(); view_idx > 0; view_idx--) { auto &view = views[view_idx - 1].get(); @@ -1895,6 +1908,29 @@ void DuckLakeTransaction::GetNewViewInfo(DuckLakeCommitState &commit_state, Duck transaction_changes.altered_views.insert(view.GetViewId()); break; } + case LocalChangeType::SET_COLUMN_COMMENT: { + auto local_change = view.GetLocalChange(); + if (local_change.view_column_comment_name.empty()) { + throw InternalException("SET_COLUMN_COMMENT on view without view_column_comment_name"); + } + auto &rem = view_column_comment_remaining[local_change.view_column_comment_name]; + rem--; + if (rem > 0) { + break; + } + DuckLakeViewColumnTagInfo comment_info; + comment_info.view_id = commit_state.GetViewId(view); + comment_info.column_name = local_change.view_column_comment_name; + comment_info.key = "comment"; + auto create_info_ptr = view.GetInfo(); + auto &view_info = create_info_ptr->Cast(); + auto cit = view_info.column_comments_map.find(local_change.view_column_comment_name); + comment_info.value = cit != view_info.column_comments_map.end() ? cit->second : Value(); + result.new_view_column_tags.push_back(std::move(comment_info)); + + transaction_changes.altered_views.insert(view.GetViewId()); + break; + } case LocalChangeType::NONE: case LocalChangeType::CREATED: case LocalChangeType::RENAMED: { @@ -2358,6 +2394,7 @@ string DuckLakeTransaction::CommitChanges(DuckLakeCommitState &commit_state, batch_queries += metadata_manager->WriteNewViews(result.new_views); batch_queries += metadata_manager->WriteNewTags(result.new_tags); batch_queries += metadata_manager->WriteNewColumnTags(result.new_column_tags); + batch_queries += metadata_manager->WriteNewViewColumnTags(result.new_view_column_tags); batch_queries += metadata_manager->WriteDroppedColumns(result.dropped_columns); batch_queries += metadata_manager->WriteNewColumns(result.new_columns); batch_queries += metadata_manager->WriteNewInlinedTables(commit_snapshot, result.new_inlined_data_tables); @@ -3145,6 +3182,7 @@ void DuckLakeTransaction::AlterEntryInternal(DuckLakeViewEntry &view, unique_ptr break; } case LocalChangeType::SET_COMMENT: + case LocalChangeType::SET_COLUMN_COMMENT: break; default: throw NotImplementedException("Alter type not supported in DuckLakeTransaction::AlterEntry"); diff --git a/src/storage/ducklake_view_entry.cpp b/src/storage/ducklake_view_entry.cpp index 7165ab03aa0..631db4d19e5 100644 --- a/src/storage/ducklake_view_entry.cpp +++ b/src/storage/ducklake_view_entry.cpp @@ -1,12 +1,17 @@ #include "storage/ducklake_view_entry.hpp" +#include "storage/ducklake_transaction.hpp" #include "duckdb/parser/parsed_data/create_view_info.hpp" +#include "duckdb/parser/parsed_data/comment_on_column_info.hpp" #include "duckdb/parser/parser.hpp" #include "duckdb/parser/parsed_data/alter_info.hpp" #include "duckdb/parser/parsed_data/alter_table_info.hpp" #include "duckdb/parser/keyword_helper.hpp" +#include "duckdb/common/exception/binder_exception.hpp" #include "duckdb/common/string_util.hpp" #include "common/ducklake_util.hpp" +#include + namespace duckdb { DuckLakeViewEntry::DuckLakeViewEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateViewInfo &info, @@ -50,6 +55,25 @@ unique_ptr DuckLakeViewEntry::AlterEntry(ClientContext &context, A } } +unique_ptr DuckLakeViewEntry::Alter(DuckLakeTransaction &transaction, SetColumnCommentInfo &info) { + auto context = transaction.context.lock(); + if (!context) { + throw InternalException("Alter view column comment: missing client context"); + } + BindView(*context); + auto view_columns = GetColumnInfo(); + if (view_columns) { + auto &names = view_columns->names; + if (std::find(names.begin(), names.end(), info.column_name) == names.end()) { + throw BinderException("View \"%s\" does not have a column with name \"%s\"", name, info.column_name); + } + } + auto create_info = GetInfo(); + auto &view_info = create_info->Cast(); + view_info.column_comments_map[info.column_name] = info.comment_value; + return make_uniq(*this, view_info, LocalChange::SetViewColumnComment(info.column_name)); +} + unique_ptr DuckLakeViewEntry::GetInfo() const { auto info = ViewCatalogEntry::GetInfo(); auto &view_info = info->Cast(); diff --git a/test/sql/attach/attach_ducklake_version.test b/test/sql/attach/attach_ducklake_version.test index 5a6016af7c7..adf5d2b12f2 100644 --- a/test/sql/attach/attach_ducklake_version.test +++ b/test/sql/attach/attach_ducklake_version.test @@ -82,7 +82,7 @@ ATTACH 'ducklake:{TEST_DIR}/ducklake_version_10.db' AS ducklake(DATA_PATH '{TEST query I SELECT value FROM __ducklake_metadata_ducklake.ducklake_metadata WHERE key = 'version' ---- -1.1-dev1 +1.1-dev2 query I SELECT * FROM ducklake.t1 @@ -92,14 +92,14 @@ SELECT * FROM ducklake.t1 statement ok DETACH ducklake -# after migration, re-attach without version should succeed (now at 1.1-dev1) +# after migration, re-attach without version should succeed (now at 1.1-dev2) statement ok ATTACH 'ducklake:{TEST_DIR}/ducklake_version_10.db' AS ducklake(DATA_PATH '{TEST_DIR}/ducklake_version_10_files') query I SELECT value FROM __ducklake_metadata_ducklake.ducklake_metadata WHERE key = 'version' ---- -1.1-dev1 +1.1-dev2 query I SELECT * FROM ducklake.t1 @@ -109,20 +109,20 @@ SELECT * FROM ducklake.t1 statement ok DETACH ducklake -# cannot downgrade: attach a 1.1-dev1 catalog with DUCKLAKE_VERSION '1.0' +# cannot downgrade: attach a 1.1-dev2 catalog with DUCKLAKE_VERSION '1.0' statement error ATTACH 'ducklake:{TEST_DIR}/ducklake_version_10.db' AS ducklake(DATA_PATH '{TEST_DIR}/ducklake_version_10_files', DUCKLAKE_VERSION '1.0') ---- Cannot downgrade -# create a new database with default version (1.1-dev1) +# create a new database with default version (1.1-dev2) statement ok ATTACH 'ducklake:{TEST_DIR}/ducklake_version_default.db' AS ducklake(DATA_PATH '{TEST_DIR}/ducklake_version_default_files') query I SELECT value FROM __ducklake_metadata_ducklake.ducklake_metadata WHERE key = 'version' ---- -1.1-dev1 +1.1-dev2 statement ok CREATE TABLE ducklake.t2(j INT); @@ -145,7 +145,7 @@ ATTACH 'ducklake:{TEST_DIR}/ducklake_version_default.db' AS ducklake(DATA_PATH ' query I SELECT value FROM __ducklake_metadata_ducklake.ducklake_metadata WHERE key = 'version' ---- -1.1-dev1 +1.1-dev2 query I SELECT * FROM ducklake.t2 diff --git a/test/sql/comments/comment_on_view_column.test b/test/sql/comments/comment_on_view_column.test new file mode 100644 index 00000000000..55cc78bef52 --- /dev/null +++ b/test/sql/comments/comment_on_view_column.test @@ -0,0 +1,51 @@ +# name: test/sql/comments/comment_on_view_column.test +# description: COMMENT ON COLUMN for DuckLake views persists and shows in duckdb_columns() +# group: [comments] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION {TEST_DIR}/{BASE_TEST_NAME}.db + +test-env DATA_PATH {TEST_DIR} + + +statement ok +ATTACH 'ducklake:{DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '{DATA_PATH}/ducklake_view_col_comment_files') + +statement ok +CREATE VIEW ducklake.v AS SELECT 1 AS col_a + +query I +SELECT count(*) FROM duckdb_columns() +WHERE table_name = 'v' AND column_name = 'col_a' AND schema_name = 'main' AND database_name = 'ducklake' AND comment IS NULL +---- +1 + +statement ok +COMMENT ON COLUMN ducklake.v.col_a IS 'view column note' + +query I +SELECT count(*) FROM duckdb_columns() +WHERE table_name = 'v' AND column_name = 'col_a' AND schema_name = 'main' AND database_name = 'ducklake' AND comment = 'view column note' +---- +1 + +statement ok +USE memory + +statement ok +DETACH ducklake + +statement ok +ATTACH 'ducklake:{DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '{DATA_PATH}/ducklake_view_col_comment_files') + +statement ok +USE ducklake + +query I +SELECT count(*) FROM duckdb_columns() +WHERE table_name = 'v' AND column_name = 'col_a' AND schema_name = 'main' AND database_name = 'ducklake' AND comment = 'view column note' +---- +1 diff --git a/test/sql/comments/comment_on_view_column_conflict.test b/test/sql/comments/comment_on_view_column_conflict.test new file mode 100644 index 00000000000..b62ec3bc73f --- /dev/null +++ b/test/sql/comments/comment_on_view_column_conflict.test @@ -0,0 +1,46 @@ +# name: test/sql/comments/comment_on_view_column_conflict.test +# description: Concurrent COMMENT ON COLUMN on the same view column conflicts +# group: [comments] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION {TEST_DIR}/{UUID}.db + +test-env DATA_PATH {TEST_DIR} + + +statement ok +ATTACH 'ducklake:{DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '{DATA_PATH}/ducklake_view_col_comment_cf/') + +statement ok +CREATE VIEW ducklake.vcf AS SELECT 100 AS k + +statement ok +SET immediate_transaction_mode=true + +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +COMMENT ON COLUMN ducklake.vcf.k IS 'a' + +statement ok con2 +COMMENT ON COLUMN ducklake.vcf.k IS 'b' + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- +another transaction has altered it + +query I +SELECT comment FROM duckdb_columns() WHERE table_name = 'vcf' AND column_name = 'k' AND database_name = 'ducklake' +---- +a diff --git a/test/sql/comments/comment_on_view_column_invalid.test b/test/sql/comments/comment_on_view_column_invalid.test new file mode 100644 index 00000000000..cbf634153de --- /dev/null +++ b/test/sql/comments/comment_on_view_column_invalid.test @@ -0,0 +1,23 @@ +# name: test/sql/comments/comment_on_view_column_invalid.test +# description: COMMENT ON COLUMN fails for a non-existent view column +# group: [comments] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION {TEST_DIR}/{UUID}.db + +test-env DATA_PATH {TEST_DIR} + + +statement ok +ATTACH 'ducklake:{DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '{DATA_PATH}/ducklake_view_col_comment_bad/') + +statement ok +CREATE VIEW ducklake.vbad AS SELECT 1 AS only_col + +statement error +COMMENT ON COLUMN ducklake.vbad.nope IS 'x' +---- +Binder Error diff --git a/test/sql/comments/comment_on_view_column_update.test b/test/sql/comments/comment_on_view_column_update.test new file mode 100644 index 00000000000..9d340298345 --- /dev/null +++ b/test/sql/comments/comment_on_view_column_update.test @@ -0,0 +1,35 @@ +# name: test/sql/comments/comment_on_view_column_update.test +# description: Updating a view column comment expires the prior ducklake_view_column_tag row +# group: [comments] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION {TEST_DIR}/{UUID}.db + +test-env DATA_PATH {TEST_DIR} + + +statement ok +ATTACH 'ducklake:{DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '{DATA_PATH}/ducklake_view_col_comment_upd/') + +statement ok +CREATE VIEW ducklake.vw AS SELECT 42 AS x + +statement ok +COMMENT ON COLUMN ducklake.vw.x IS 'first' + +statement ok +COMMENT ON COLUMN ducklake.vw.x IS 'second' + +query III +SELECT column_name, key, value FROM __ducklake_metadata_ducklake.ducklake_view_column_tag WHERE end_snapshot IS NULL ORDER BY key +---- +x comment second + +query IIII +SELECT column_name, key, value, end_snapshot IS NOT NULL FROM __ducklake_metadata_ducklake.ducklake_view_column_tag ORDER BY begin_snapshot +---- +x comment first true +x comment second false diff --git a/test/sql/comments/drop_view_expires_tags.test b/test/sql/comments/drop_view_expires_tags.test index ba489c7a1b4..810bf702d2a 100644 --- a/test/sql/comments/drop_view_expires_tags.test +++ b/test/sql/comments/drop_view_expires_tags.test @@ -1,5 +1,5 @@ # name: test/sql/comments/drop_view_expires_tags.test -# description: Verify DROP VIEW expires associated tags in ducklake_tag +# description: Verify DROP VIEW expires ducklake_tag (view comment) and ducklake_view_column_tag (column comments). # group: [comments] require ducklake @@ -12,27 +12,45 @@ statement ok ATTACH 'ducklake:{DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '{TEST_DIR}/drop_view_expires_tags/') statement ok -CREATE VIEW ducklake.v AS SELECT 1 +CREATE VIEW ducklake.v AS SELECT 1 AS c statement ok COMMENT ON VIEW ducklake.v IS 'my view comment' -# verify the tag is active +statement ok +COMMENT ON COLUMN ducklake.v.c IS 'my col comment' + +# verify both tag rows are active query II SELECT key, value FROM __ducklake_metadata_ducklake.ducklake_tag WHERE end_snapshot IS NULL; ---- comment my view comment +query II +SELECT key, value FROM __ducklake_metadata_ducklake.ducklake_view_column_tag WHERE end_snapshot IS NULL; +---- +comment my col comment + statement ok DROP VIEW ducklake.v -# after DROP VIEW, the tag should be expired +# after DROP VIEW, no active rows in either tag table (DropViews expires ducklake_tag + ducklake_view_column_tag) query I SELECT count(*) FROM __ducklake_metadata_ducklake.ducklake_tag WHERE end_snapshot IS NULL; ---- 0 +query I +SELECT count(*) FROM __ducklake_metadata_ducklake.ducklake_view_column_tag WHERE end_snapshot IS NULL; +---- +0 + query I SELECT count(*) FROM __ducklake_metadata_ducklake.ducklake_tag WHERE end_snapshot IS NOT NULL; ---- 1 + +query I +SELECT count(*) FROM __ducklake_metadata_ducklake.ducklake_view_column_tag WHERE end_snapshot IS NOT NULL; +---- +1 diff --git a/test/sql/compaction/expire_snapshots_schema.test b/test/sql/compaction/expire_snapshots_schema.test index a9967984d77..61c97ce50da 100644 --- a/test/sql/compaction/expire_snapshots_schema.test +++ b/test/sql/compaction/expire_snapshots_schema.test @@ -49,7 +49,7 @@ SELECT COUNT(*) FROM metadata.ducklake_schema 1 # all traces of the schema are gone -foreach tbl ducklake_view ducklake_table ducklake_column ducklake_table_stats ducklake_table_column_stats ducklake_data_file ducklake_delete_file +foreach tbl ducklake_view ducklake_view_column_tag ducklake_table ducklake_column ducklake_table_stats ducklake_table_column_stats ducklake_data_file ducklake_delete_file query I SELECT COUNT(*) FROM metadata.{tbl}