From 3fa5ba250037245fa41a01f9f3e9ecda0999649f Mon Sep 17 00:00:00 2001 From: Alex-Monahan Date: Thu, 30 Apr 2026 20:31:58 -0700 Subject: [PATCH 01/12] CREATE and CTAS SORTED BY and PARTITIONED BY --- .../ducklake_compaction_functions.cpp | 15 +- .../ducklake_compaction_functions.hpp | 4 + src/include/storage/ducklake_catalog.hpp | 3 + src/include/storage/ducklake_insert.hpp | 27 +- .../storage/ducklake_partition_data.hpp | 9 + src/include/storage/ducklake_schema_entry.hpp | 8 +- src/include/storage/ducklake_table_entry.hpp | 13 +- src/storage/ducklake_catalog.cpp | 11 + src/storage/ducklake_insert.cpp | 126 +++-- src/storage/ducklake_schema_entry.cpp | 35 +- src/storage/ducklake_table_entry.cpp | 96 ++-- src/storage/ducklake_transaction.cpp | 12 + .../create_table_inline_basic.test | 311 ++++++++++++ .../create_table_inline_ctas.test | 248 ++++++++++ ...e_table_inline_ctas_sort_verification.test | 88 ++++ .../create_table_inline_errors.test | 96 ++++ .../create_table_inline_transactions.test | 453 ++++++++++++++++++ .../create_table_inline_with_inlining.test | 215 +++++++++ 18 files changed, 1695 insertions(+), 75 deletions(-) create mode 100644 test/sql/create_table_inline_partition_sort/create_table_inline_basic.test create mode 100644 test/sql/create_table_inline_partition_sort/create_table_inline_ctas.test create mode 100644 test/sql/create_table_inline_partition_sort/create_table_inline_ctas_sort_verification.test create mode 100644 test/sql/create_table_inline_partition_sort/create_table_inline_errors.test create mode 100644 test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test create mode 100644 test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test diff --git a/src/functions/ducklake_compaction_functions.cpp b/src/functions/ducklake_compaction_functions.cpp index 92ad52f05f4..dcd89b363be 100644 --- a/src/functions/ducklake_compaction_functions.cpp +++ b/src/functions/ducklake_compaction_functions.cpp @@ -47,15 +47,16 @@ vector DuckLakeCompactor::ParseSortOrders(const DuckLakeSort &sort_ } //! Binds ORDER BY expressions directly using ExpressionBinder. -vector DuckLakeCompactor::BindSortOrders(Binder &binder, DuckLakeTableEntry &table, idx_t table_index, +vector DuckLakeCompactor::BindSortOrders(Binder &binder, const ColumnList &columns, + const string &table_name, TableIndex table_index, vector &pre_bound_orders) { - auto &columns = table.GetColumns(); auto column_names = columns.GetColumnNames(); auto column_types = columns.GetColumnTypes(); // Create a child binder with the table columns in scope auto child_binder = Binder::CreateBinder(binder.context, &binder); - child_binder->bind_context.AddGenericBinding(table_index, table.name, column_names, column_types); + // AddGenericBinding takes the raw idx_t binding id, not DuckLake's TableIndex wrapper. + child_binder->bind_context.AddGenericBinding(table_index.index, table_name, column_names, column_types); // Bind each ORDER BY expression directly vector orders; @@ -68,6 +69,12 @@ vector DuckLakeCompactor::BindSortOrders(Binder &binder, DuckL return orders; } +vector DuckLakeCompactor::BindSortOrders(Binder &binder, DuckLakeTableEntry &table, + idx_t table_index, + vector &pre_bound_orders) { + return BindSortOrders(binder, table.GetColumns(), table.name, TableIndex(table_index), pre_bound_orders); +} + //===--------------------------------------------------------------------===// // Compaction Operator //===--------------------------------------------------------------------===// @@ -384,7 +391,7 @@ unique_ptr DuckLakeCompactor::InsertSort(Binder &binder, unique } // Validate all column references in sort expressions exist in the table - DuckLakeTableEntry::ValidateSortExpressionColumns(table, pre_bound_orders); + DuckLakeTableEntry::ValidateSortExpressionColumns(table.GetColumns(), pre_bound_orders); // Resolve types for the input plan (could be LogicalGet or LogicalProjection) plan->ResolveOperatorTypes(); diff --git a/src/include/functions/ducklake_compaction_functions.hpp b/src/include/functions/ducklake_compaction_functions.hpp index ef69944ad52..94db42bc1f6 100644 --- a/src/include/functions/ducklake_compaction_functions.hpp +++ b/src/include/functions/ducklake_compaction_functions.hpp @@ -93,6 +93,10 @@ class DuckLakeCompactor { static vector ParseSortOrders(const DuckLakeSort &sort_data); static vector BindSortOrders(Binder &binder, DuckLakeTableEntry &table, idx_t table_index, vector &pre_bound_orders); + //! Overload that takes (columns, table_name) directly so CTAS planning can sort-bind before any + //! DuckLakeTableEntry exists. Behaves identically to the entry-taking overload. + static vector BindSortOrders(Binder &binder, const ColumnList &columns, const string &table_name, + TableIndex table_index, vector &pre_bound_orders); private: ClientContext &context; diff --git a/src/include/storage/ducklake_catalog.hpp b/src/include/storage/ducklake_catalog.hpp index f3d5cb014ab..a2b69e56b43 100644 --- a/src/include/storage/ducklake_catalog.hpp +++ b/src/include/storage/ducklake_catalog.hpp @@ -28,6 +28,7 @@ class DuckLakeFieldData; struct DuckLakeFileListEntry; struct DuckLakeConfigOption; struct DeleteFileMap; +struct BoundCreateTableInfo; class LogicalGet; //! Cache entry for DuckLake table statistics @@ -135,6 +136,8 @@ class DuckLakeCatalog : public Catalog { optional_ptr CreateSchema(CatalogTransaction transaction, CreateSchemaInfo &info) override; + ErrorData SupportsCreateTable(BoundCreateTableInfo &info) override; + void ScanSchemas(ClientContext &context, std::function callback) override; optional_ptr LookupSchema(CatalogTransaction transaction, const EntryLookupInfo &schema_lookup, diff --git a/src/include/storage/ducklake_insert.hpp b/src/include/storage/ducklake_insert.hpp index c591763ca50..4bd5ad493d3 100644 --- a/src/include/storage/ducklake_insert.hpp +++ b/src/include/storage/ducklake_insert.hpp @@ -15,13 +15,14 @@ #include "storage/ducklake_stats.hpp" #include "common/ducklake_data_file.hpp" #include "storage/ducklake_field_data.hpp" +#include "storage/ducklake_partition_data.hpp" +#include "storage/ducklake_sort_data.hpp" namespace duckdb { class DuckLakeCatalog; class DuckLakeSchemaEntry; class DuckLakeTableEntry; class DuckLakeFieldData; -struct DuckLakePartition; struct DuckLakeCopyOptions; struct DuckLakeCopyInput; @@ -41,15 +42,19 @@ class DuckLakeInsertGlobalState : public GlobalSinkState { class DuckLakeInsert : public PhysicalOperator { public: - //! INSERT INTO + //! INSERT INTO an existing table. DuckLakeInsert(PhysicalPlan &physical_plan, const vector &types, DuckLakeTableEntry &table, optional_idx partition_id, string encryption_key); - //! CREATE TABLE AS + //! CREATE TABLE AS - the table is created in GetGlobalSinkState. Any inline + //! PARTITIONED BY / SORTED BY clauses are pre-built into ctas_partition_data / ctas_sort_data at planning + //! time so the physical_copy upstream of this operator can hive-partition the write with a partition_id + //! that matches what gets attached to the table entry at sink-state-init. DuckLakeInsert(PhysicalPlan &physical_plan, const vector &types, SchemaCatalogEntry &schema, unique_ptr info, string table_uuid, string table_data_path, - string encryption_key); + unique_ptr ctas_partition_data, unique_ptr ctas_sort_data, + optional_idx partition_id, string encryption_key); - //! The table to insert into + //! The table to insert into (only set for INSERT INTO; nullptr for CTAS until GetGlobalSinkState resolves) optional_ptr table; //! Table schema, in case of CREATE TABLE AS optional_ptr schema; @@ -59,6 +64,11 @@ class DuckLakeInsert : public PhysicalOperator { string table_uuid; //! The table data path, in case of CREATE TABLE AS string table_data_path; + //! Pre-built partition spec for CTAS (allocated at planning time so the physical write carries the + //! correct partition_id). + unique_ptr ctas_partition_data; + //! Pre-built sort spec for CTAS (same lifecycle as ctas_partition_data). + unique_ptr ctas_sort_data; //! The partition id we are writing into (if any) optional_idx partition_id; //! The encryption key used for writing the Parquet files @@ -140,8 +150,13 @@ struct DuckLakeCopyOptions { struct DuckLakeCopyInput { explicit DuckLakeCopyInput(ClientContext &context, DuckLakeTableEntry &table, const string &hive_partition = ""); + //! CTAS-flavored: take the field-id mapping and (optional) partition spec from the planning-time-built + //! spec rather than from a DuckLakeTableEntry that hasn't been created yet. field_data is required + //! because the parquet writer always needs it; partition_data is null when CREATE TABLE AS has no + //! inline PARTITIONED BY clause. DuckLakeCopyInput(ClientContext &context, DuckLakeSchemaEntry &schema, const ColumnList &columns, - const string &data_path_p); + const string &data_path_p, DuckLakeFieldData &field_data, + optional_ptr partition_data = nullptr); DuckLakeCatalog &catalog; optional_ptr partition_data; diff --git a/src/include/storage/ducklake_partition_data.hpp b/src/include/storage/ducklake_partition_data.hpp index 7fa7c34f94b..f65300a56ff 100644 --- a/src/include/storage/ducklake_partition_data.hpp +++ b/src/include/storage/ducklake_partition_data.hpp @@ -20,12 +20,21 @@ enum class DuckLakeTransformType { IDENTITY, BUCKET, YEAR, MONTH, DAY, HOUR }; struct DuckLakeTransform { DuckLakeTransformType type; idx_t bucket_count = 0; // only for BUCKET + + bool operator==(const DuckLakeTransform &other) const { + return type == other.type && bucket_count == other.bucket_count; + } }; struct DuckLakePartitionField { idx_t partition_key_index = 0; FieldIndex field_id; DuckLakeTransform transform; + + bool operator==(const DuckLakePartitionField &other) const { + return partition_key_index == other.partition_key_index && field_id == other.field_id && + transform == other.transform; + } }; struct DuckLakePartition { diff --git a/src/include/storage/ducklake_schema_entry.hpp b/src/include/storage/ducklake_schema_entry.hpp index d50f8e637c9..3257444acd8 100644 --- a/src/include/storage/ducklake_schema_entry.hpp +++ b/src/include/storage/ducklake_schema_entry.hpp @@ -10,6 +10,8 @@ #include "duckdb/catalog/catalog_entry/schema_catalog_entry.hpp" #include "storage/ducklake_catalog_set.hpp" +#include "storage/ducklake_partition_data.hpp" +#include "storage/ducklake_sort_data.hpp" namespace duckdb { class DuckLakeTransaction; @@ -32,8 +34,12 @@ class DuckLakeSchemaEntry : public SchemaCatalogEntry { } public: + //! Create a DuckLakeTableEntry and register it with the transaction. + //! When prebuilt_partition_data / prebuilt_sort_data are used by CTAS when partitioned or sorted optional_ptr CreateTableExtended(CatalogTransaction transaction, BoundCreateTableInfo &info, - string table_uuid, string table_data_path); + string table_uuid, string table_data_path, + unique_ptr prebuilt_partition_data = nullptr, + unique_ptr prebuilt_sort_data = nullptr); unique_ptr GetInfo() const override; optional_ptr CreateTable(CatalogTransaction transaction, BoundCreateTableInfo &info) override; optional_ptr CreateFunction(CatalogTransaction transaction, CreateFunctionInfo &info) override; diff --git a/src/include/storage/ducklake_table_entry.hpp b/src/include/storage/ducklake_table_entry.hpp index 0f97e664a2f..8dc63131f21 100644 --- a/src/include/storage/ducklake_table_entry.hpp +++ b/src/include/storage/ducklake_table_entry.hpp @@ -122,8 +122,17 @@ class DuckLakeTableEntry : public TableCatalogEntry { virtual_column_map_t GetVirtualColumns() const override; vector GetRowIdColumns() const override; - //! Validates that all column references in sort expressions exist in the table - static void ValidateSortExpressionColumns(DuckLakeTableEntry &table, const vector &orders); + //! Validates that all column references in sort expressions exist in the column list. + //! Takes (columns) directly so it can run at CTAS planning time before a DuckLakeTableEntry exists. + static void ValidateSortExpressionColumns(const ColumnList &columns, const vector &orders); + + //! Build a DuckLakePartition from raw partition expressions (allocates a transaction-local id). + static unique_ptr BuildPartitionData(DuckLakeTransaction &transaction, const ColumnList &columns, + DuckLakeFieldData &field_data, + const vector> &partition_keys); + //! Build a DuckLakeSort from a vector of OrderByNode (allocates a transaction-local id). + static unique_ptr BuildSortData(DuckLakeTransaction &transaction, const ColumnList &columns, + const vector &orders); private: unique_ptr AlterTable(DuckLakeTransaction &transaction, RenameTableInfo &info); diff --git a/src/storage/ducklake_catalog.cpp b/src/storage/ducklake_catalog.cpp index c2c7e1f8a5a..ec77b4bfed4 100644 --- a/src/storage/ducklake_catalog.cpp +++ b/src/storage/ducklake_catalog.cpp @@ -10,6 +10,7 @@ #include "duckdb/parser/parsed_data/create_table_info.hpp" #include "duckdb/parser/parsed_data/create_view_info.hpp" #include "duckdb/parser/parsed_data/drop_info.hpp" +#include "duckdb/planner/parsed_data/bound_create_table_info.hpp" #include "duckdb/storage/database_size.hpp" #include "storage/ducklake_initializer.hpp" #include "storage/ducklake_schema_entry.hpp" @@ -155,6 +156,16 @@ optional_ptr DuckLakeCatalog::CreateSchema(CatalogTransaction tran return result; } +ErrorData DuckLakeCatalog::SupportsCreateTable(BoundCreateTableInfo &info) { + auto &base = info.Base().Cast(); + if (!base.options.empty()) { + return ErrorData( + ExceptionType::CATALOG, + StringUtil::Format("WITH clause is not supported for tables in a %s catalog", GetCatalogType())); + } + return ErrorData(); +} + void DuckLakeCatalog::DropSchema(ClientContext &context, DropInfo &info) { auto schema = GetSchema(GetCatalogTransaction(context), info.name, info.if_not_found); if (!schema) { diff --git a/src/storage/ducklake_insert.cpp b/src/storage/ducklake_insert.cpp index ba3cc8bcf84..81f4375d70d 100644 --- a/src/storage/ducklake_insert.cpp +++ b/src/storage/ducklake_insert.cpp @@ -42,10 +42,13 @@ DuckLakeInsert::DuckLakeInsert(PhysicalPlan &physical_plan, const vector &types, SchemaCatalogEntry &schema, unique_ptr info, string table_uuid_p, - string table_data_path_p, string encryption_key_p) + string table_data_path_p, unique_ptr ctas_partition_data_p, + unique_ptr ctas_sort_data_p, optional_idx partition_id, + string encryption_key_p) : PhysicalOperator(physical_plan, PhysicalOperatorType::EXTENSION, types, 1), table(nullptr), schema(&schema), info(std::move(info)), table_uuid(std::move(table_uuid_p)), table_data_path(std::move(table_data_path_p)), - encryption_key(std::move(encryption_key_p)) { + ctas_partition_data(std::move(ctas_partition_data_p)), ctas_sort_data(std::move(ctas_sort_data_p)), + partition_id(partition_id), encryption_key(std::move(encryption_key_p)) { } //===--------------------------------------------------------------------===// @@ -58,12 +61,18 @@ DuckLakeInsertGlobalState::DuckLakeInsertGlobalState(DuckLakeTableEntry &table) unique_ptr DuckLakeInsert::GetGlobalSinkState(ClientContext &context) const { optional_ptr table_ptr; if (info) { - // CREATE TABLE AS - create the table + // CREATE TABLE AS - materialize the table entry now, attaching a *clone* of the partition / sort spec + // we built at planning time so the on-disk partition_id and the in-memory partition_data agree. + // We clone (rather than std::move) so the operator's prebuilt spec stays intact across executions. + auto partition_clone = ctas_partition_data ? make_uniq(*ctas_partition_data) : nullptr; + auto sort_clone = ctas_sort_data ? make_uniq(*ctas_sort_data) : nullptr; + auto &catalog = schema->catalog; auto &ducklake_schema = schema.get_mutable()->Cast(); auto transaction = catalog.GetCatalogTransaction(context); - table_ptr = &ducklake_schema.CreateTableExtended(transaction, *info, table_uuid, table_data_path) - ->Cast(); + auto created = ducklake_schema.CreateTableExtended(transaction, *info, table_uuid, table_data_path, + std::move(partition_clone), std::move(sort_clone)); + table_ptr = &created->Cast(); } else { // INSERT INTO table_ptr = table; @@ -344,8 +353,11 @@ DuckLakeCopyInput::DuckLakeCopyInput(ClientContext &context, DuckLakeTableEntry } DuckLakeCopyInput::DuckLakeCopyInput(ClientContext &context, DuckLakeSchemaEntry &schema, const ColumnList &columns, - const string &data_path_p) + const string &data_path_p, DuckLakeFieldData &field_data_p, + optional_ptr partition_data_p) : catalog(schema.ParentCatalog().Cast()), columns(columns), data_path(data_path_p) { + field_data = &field_data_p; + partition_data = partition_data_p; schema_id = schema.GetSchemaId(); encryption_key = catalog.GenerateEncryptionKey(context); } @@ -735,40 +747,43 @@ static void ResolveColumnRefs(unique_ptr &expr) { ExpressionIterator::EnumerateChildren(*expr, [](unique_ptr &child) { ResolveColumnRefs(child); }); } -static optional_ptr PlanInsertSort(ClientContext &context, PhysicalPlanGenerator &planner, - PhysicalOperator &plan, DuckLakeTableEntry &table, - optional_ptr sort_data) { - // Parse the sort expressions from the sort_data +static optional_ptr PlanInsertSortFromColumns(ClientContext &context, PhysicalPlanGenerator &planner, + PhysicalOperator &plan, const ColumnList &columns, + const string &table_name, + optional_ptr sort_data) { + // Shared between the INSERT path (called with an existing table's columns/name) and the CTAS path + // (called with the planning-time CreateTableInfo's columns and the to-be-created table's name). auto pre_bound_orders = DuckLakeCompactor::ParseSortOrders(*sort_data); if (pre_bound_orders.empty()) { return nullptr; } + DuckLakeTableEntry::ValidateSortExpressionColumns(columns, pre_bound_orders); - // Validate all column references in sort expressions exist in the table - DuckLakeTableEntry::ValidateSortExpressionColumns(table, pre_bound_orders); - - // Bind the ORDER BY expressions auto binder = Binder::CreateBinder(context); - idx_t table_index = 0; - auto orders = DuckLakeCompactor::BindSortOrders(*binder, table, table_index, pre_bound_orders); + TableIndex table_index(0); + auto orders = DuckLakeCompactor::BindSortOrders(*binder, columns, table_name, table_index, pre_bound_orders); // Convert BoundColumnRefExpression to BoundReferenceExpression for physical plan for (auto &order : orders) { ResolveColumnRefs(order.expression); } - // Create identity projection map vector projection_map; for (idx_t i = 0; i < plan.types.size(); i++) { projection_map.push_back(i); } - auto &order_op = planner.Make(plan.types, std::move(orders), std::move(projection_map), plan.estimated_cardinality); order_op.children.push_back(plan); return &order_op; } +static optional_ptr PlanInsertSort(ClientContext &context, PhysicalPlanGenerator &planner, + PhysicalOperator &plan, DuckLakeTableEntry &table, + optional_ptr sort_data) { + return PlanInsertSortFromColumns(context, planner, plan, table.GetColumns(), table.name, sort_data); +} + PhysicalOperator &DuckLakeCatalog::PlanInsert(ClientContext &context, PhysicalPlanGenerator &planner, LogicalInsert &op, optional_ptr plan) { if (op.return_chunk) { @@ -782,10 +797,10 @@ PhysicalOperator &DuckLakeCatalog::PlanInsert(ClientContext &context, PhysicalPl } auto &ducklake_table = op.table.Cast(); - // Sort data according to the table's SET SORTED BY configuration + // Sort wrap (when SET SORTED BY is configured and sort_on_insert is enabled). auto sort_data = ducklake_table.GetSortData(); - auto &ducklake_schema_for_sort = ducklake_table.ParentSchema().Cast(); - bool sort_on_insert = GetConfigOption("sort_on_insert", ducklake_schema_for_sort.GetSchemaId(), + auto &ducklake_schema = ducklake_table.ParentSchema().Cast(); + bool sort_on_insert = GetConfigOption("sort_on_insert", ducklake_schema.GetSchemaId(), ducklake_table.GetTableId(), "true") == "true"; if (sort_data && sort_on_insert) { auto sorted_plan = PlanInsertSort(context, planner, *plan, ducklake_table, sort_data); @@ -794,17 +809,13 @@ PhysicalOperator &DuckLakeCatalog::PlanInsert(ClientContext &context, PhysicalPl } } + // Data-inlining wrap. When sort_on_insert=false and inlining is enabled, sort AFTER the inline operator + // so overflow rows reach Parquet sorted; inlined rows skip the sort (they live in metadata, not Parquet). optional_ptr inline_data; - idx_t data_inlining_row_limit = GetInliningLimit(context, ducklake_table); if (data_inlining_row_limit > 0) { plan = planner.Make(*plan, data_inlining_row_limit); inline_data = plan->Cast(); - - // When sort_on_insert=false but inlining is enabled, add sorting AFTER - // the inline data operator. Data that exceeds the inlining limit passes - // through to parquet files and must be sorted. Data that is inlined - // (absorbed by DuckLakeInlineData) never reaches this sort operator. if (sort_data && !sort_on_insert) { auto sorted_plan = PlanInsertSort(context, planner, *plan, ducklake_table, sort_data); if (sorted_plan) { @@ -812,6 +823,7 @@ PhysicalOperator &DuckLakeCatalog::PlanInsert(ClientContext &context, PhysicalPl } } } + DuckLakeCopyInput copy_input(context, ducklake_table); auto &physical_copy = DuckLakeInsert::PlanCopyForInsert(context, planner, copy_input, plan); auto &insert = DuckLakeInsert::PlanInsert(context, planner, ducklake_table, std::move(copy_input.encryption_key)); @@ -828,8 +840,53 @@ PhysicalOperator &DuckLakeCatalog::PlanCreateTableAs(ClientContext &context, Phy auto &columns = create_info.columns; auto &duck_transaction = DuckLakeTransaction::Get(context, *this); auto &duck_schema = op.schema.Cast(); + // FIXME: if table already exists and we are doing CREATE IF NOT EXISTS - skip + + // CTAS planning time: the table entry does NOT exist yet (it will be materialized at sink-state-init by + // DuckLakeInsert::GetGlobalSinkState). However the physical write we're + // about to build needs the partition spec + field-id mapping right now so the parquet files land in the + // right hive directories with the right partition_id stamped on them. + // + // The split: we build field_data + partition_data + sort_data SYNTHETICALLY here from the raw inline + // clauses on `op.info`. We hand the partition_data / sort_data to the DuckLakeInsert operator. + + idx_t column_id = 1; + auto field_data = DuckLakeFieldData::FromColumns(columns, column_id); + unique_ptr partition_data; + if (!create_info.partition_keys.empty()) { + partition_data = + DuckLakeTableEntry::BuildPartitionData(duck_transaction, columns, *field_data, create_info.partition_keys); + } + + unique_ptr sort_data; + if (!create_info.sort_keys.empty()) { + // FIXME: TODO: The syntax needs to be enabled in DuckDB upstream + // CREATE TABLE AS ... SORTED BY (e) accepts only raw expressions; wrap each in an OrderByNode using the + // same defaults the DuckDB transformer produces for a bare expression list (ASCENDING + ORDER_DEFAULT). + // Matches the ALTER TABLE ... SET SORTED BY (e) path. + vector orders; + orders.reserve(create_info.sort_keys.size()); + for (auto &expr : create_info.sort_keys) { + orders.emplace_back(OrderType::ASCENDING, OrderByNullType::ORDER_DEFAULT, expr->Copy()); + } + sort_data = DuckLakeTableEntry::BuildSortData(duck_transaction, columns, orders); + } + reference root = plan; + + // Sort wrap (mirrors PlanInsert's sort_on_insert wrap). For CTAS we don't have a table_id yet so the + // table-level sort_on_insert override doesn't apply; we honor the schema-level setting (defaults to true). + bool sort_on_insert = + GetConfigOption("sort_on_insert", duck_schema.GetSchemaId(), TableIndex(), "true") == "true"; + if (sort_data && sort_on_insert) { + auto sorted_plan = + PlanInsertSortFromColumns(context, planner, root.get(), columns, create_info.table, sort_data.get()); + if (sorted_plan) { + root = *sorted_plan; + } + } + optional_ptr inline_data; idx_t data_inlining_row_limit = DataInliningRowLimit(context, duck_schema.GetSchemaId(), TableIndex()); auto &metadata_manager = duck_transaction.GetMetadataManager(); @@ -837,17 +894,24 @@ PhysicalOperator &DuckLakeCatalog::PlanCreateTableAs(ClientContext &context, Phy root = planner.Make(root.get(), data_inlining_row_limit); inline_data = root.get().Cast(); } - for (auto &col : op.info->Base().columns.Logical()) { + for (auto &col : columns.Logical()) { DuckLakeTypes::CheckSupportedType(col.Type()); } auto table_uuid = duck_transaction.GenerateUUID(); auto table_data_path = duck_schema.DataPath() + DuckLakeCatalog::GeneratePathFromName(table_uuid, create_info.table); - DuckLakeCopyInput copy_input(context, duck_schema, columns, table_data_path); + DuckLakeCopyInput copy_input(context, duck_schema, columns, table_data_path, *field_data, partition_data.get()); auto &physical_copy = DuckLakeInsert::PlanCopyForInsert(context, planner, copy_input, root.get()); - auto &insert = planner.Make(op.types, op.schema, std::move(op.info), std::move(table_uuid), - std::move(table_data_path), std::move(copy_input.encryption_key)); + + optional_idx insert_partition_id; + if (partition_data) { + insert_partition_id = partition_data->partition_id; + } + + auto &insert = planner.Make( + op.types, op.schema, std::move(op.info), std::move(table_uuid), std::move(table_data_path), + std::move(partition_data), std::move(sort_data), insert_partition_id, std::move(copy_input.encryption_key)); if (inline_data) { inline_data->insert = insert.Cast(); } diff --git a/src/storage/ducklake_schema_entry.cpp b/src/storage/ducklake_schema_entry.cpp index 10840d41b9f..b7b03fc962b 100644 --- a/src/storage/ducklake_schema_entry.cpp +++ b/src/storage/ducklake_schema_entry.cpp @@ -4,6 +4,7 @@ #include "duckdb/parser/parsed_data/comment_on_column_info.hpp" #include "duckdb/parser/parsed_data/create_view_info.hpp" #include "duckdb/parser/parsed_data/drop_info.hpp" +#include "duckdb/parser/result_modifier.hpp" #include "duckdb/planner/parsed_data/bound_create_table_info.hpp" #include "storage/ducklake_catalog.hpp" #include "storage/ducklake_table_entry.hpp" @@ -61,9 +62,10 @@ bool DuckLakeSchemaEntry::HandleCreateConflict(CatalogTransaction transaction, C return true; } -optional_ptr DuckLakeSchemaEntry::CreateTableExtended(CatalogTransaction transaction, - BoundCreateTableInfo &info, string table_uuid, - string table_data_path) { +optional_ptr +DuckLakeSchemaEntry::CreateTableExtended(CatalogTransaction transaction, BoundCreateTableInfo &info, string table_uuid, + string table_data_path, unique_ptr prebuilt_partition_data, + unique_ptr prebuilt_sort_data) { auto &duck_transaction = transaction.transaction->Cast(); auto &base_info = info.Base(); // check if we have an existing entry with this name @@ -85,6 +87,33 @@ optional_ptr DuckLakeSchemaEntry::CreateTableExtended(CatalogTrans auto table_entry = make_uniq(ParentCatalog(), *this, base_info, table_id, std::move(table_uuid), std::move(table_data_path), std::move(field_data), column_id, std::move(inlined_tables), LocalChangeType::CREATED); + // Two routes for partition / sort data: + // (A) CTAS: partition_data and sort_data were pre-built at planning time so the planner-built physical + // write could embed the right partition_id into the data files. Attach them as-is - rebuilding here + // would allocate new ids that wouldn't match the ids baked into the on-disk files. + // (B) Plain CREATE TABLE (non-CTAS): rebuild from the inline clauses on info.Base().partition_keys / + // sort_keys. There's no planning-time write to coordinate with, so the id allocated here is the only id. + if (prebuilt_partition_data) { + table_entry->SetPartitionData(std::move(prebuilt_partition_data)); + } else if (!base_info.partition_keys.empty()) { + table_entry->SetPartitionData(DuckLakeTableEntry::BuildPartitionData( + duck_transaction, table_entry->GetColumns(), table_entry->GetFieldData(), base_info.partition_keys)); + } + if (prebuilt_sort_data) { + table_entry->SetSortData(std::move(prebuilt_sort_data)); + } else if (!base_info.sort_keys.empty()) { + // TODO: FIXME: Needs a fix in upstream DuckDB then a fix here + // CREATE TABLE ... SORTED BY (e) accepts only raw expressions (no ASC/DESC/NULLS modifiers). + // Wrap each in an OrderByNode using the same defaults the DuckDB transformer produces for a bare + // expression list (ASCENDING + ORDER_DEFAULT) - matches the ALTER TABLE ... SET SORTED BY (e) path. + vector orders; + orders.reserve(base_info.sort_keys.size()); + for (auto &expr : base_info.sort_keys) { + orders.emplace_back(OrderType::ASCENDING, OrderByNullType::ORDER_DEFAULT, expr->Copy()); + } + table_entry->SetSortData( + DuckLakeTableEntry::BuildSortData(duck_transaction, table_entry->GetColumns(), orders)); + } auto result = table_entry.get(); duck_transaction.CreateEntry(std::move(table_entry)); return result; diff --git a/src/storage/ducklake_table_entry.cpp b/src/storage/ducklake_table_entry.cpp index 0903f800de4..628b23a1b56 100644 --- a/src/storage/ducklake_table_entry.cpp +++ b/src/storage/ducklake_table_entry.cpp @@ -422,7 +422,7 @@ string GetPartitionColumnName(ColumnRefExpression &colref) { return colref.GetColumnName(); } -void DuckLakeTableEntry::ValidateSortExpressionColumns(DuckLakeTableEntry &table, const vector &orders) { +void DuckLakeTableEntry::ValidateSortExpressionColumns(const ColumnList &columns, const vector &orders) { vector missing_columns; for (auto &order : orders) { ParsedExpressionIterator::VisitExpression( @@ -432,7 +432,7 @@ void DuckLakeTableEntry::ValidateSortExpressionColumns(DuckLakeTableEntry &table "Unexpected qualified column reference - only unqualified columns are supported"); } string column_name = colref.GetColumnName(); - if (!table.ColumnExists(column_name)) { + if (!columns.ColumnExists(column_name)) { if (std::find(missing_columns.begin(), missing_columns.end(), column_name) == missing_columns.end()) { missing_columns.push_back(column_name); @@ -453,7 +453,8 @@ void DuckLakeTableEntry::ValidateSortExpressionColumns(DuckLakeTableEntry &table } } -DuckLakePartitionField GetPartitionField(DuckLakeTableEntry &table, ParsedExpression &expr) { +DuckLakePartitionField GetPartitionField(const ColumnList &columns, DuckLakeFieldData &field_data, + ParsedExpression &expr) { string column_name; DuckLakePartitionField field; @@ -521,28 +522,68 @@ DuckLakePartitionField GetPartitionField(DuckLakeTableEntry &table, ParsedExpres "Unsupported partition key %s - only identity columns and year/month/day/hour/bucket are supported", expr.ToString()); } - if (!table.ColumnExists(column_name)) { + if (!columns.ColumnExists(column_name)) { throw CatalogException("Unexpected partition key - column \"%s\" does not exist", column_name); } - auto &col = table.GetColumn(column_name); + auto &col = columns.GetColumn(column_name); PhysicalIndex column_index(col.StorageOid()); - auto &field_id = table.GetFieldData().GetByRootIndex(column_index); + auto &field_id = field_data.GetByRootIndex(column_index); field.field_id = field_id.GetFieldIndex(); return field; } -unique_ptr DuckLakeTableEntry::AlterTable(DuckLakeTransaction &transaction, SetPartitionedByInfo &info) { - auto create_info = GetInfo(); - auto &table_info = create_info->Cast(); - // create a complete copy of this table with the partition info added +static bool PartitionFieldsMatch(optional_ptr current_partition, + const DuckLakePartition &requested_partition) { + if (!current_partition) { + return requested_partition.fields.empty(); + } + if (current_partition->fields.size() != requested_partition.fields.size()) { + return false; + } + for (idx_t i = 0; i < current_partition->fields.size(); i++) { + if (!(current_partition->fields[i] == requested_partition.fields[i])) { + return false; + } + } + return true; +} + +unique_ptr +DuckLakeTableEntry::BuildPartitionData(DuckLakeTransaction &transaction, const ColumnList &columns, + DuckLakeFieldData &field_data, + const vector> &partition_keys) { + // Always returns a non-null DuckLakePartition. Empty partition_keys yields a partition with no fields, + // which is the shape RESET PARTITIONED BY (and SET PARTITIONED BY () with empty list) needs to drop the + // existing partition spec at commit time. CTAS callers that mean "no PARTITIONED BY clause at all" should + // short-circuit and not call this helper. auto partition_data = make_uniq(); partition_data->partition_id = transaction.GetLocalCatalogId(); - for (idx_t expr_idx = 0; expr_idx < info.partition_keys.size(); expr_idx++) { - auto &expr = *info.partition_keys[expr_idx]; - auto partition_field = GetPartitionField(*this, expr); + for (idx_t expr_idx = 0; expr_idx < partition_keys.size(); expr_idx++) { + auto &expr = *partition_keys[expr_idx]; + auto partition_field = GetPartitionField(columns, field_data, expr); + // Reject duplicate partition keys: two expressions that resolve to the same (field_id, transform). + // `(year(ts), month(ts))` is fine - same column, different transforms - but `(a, a)` is not. + for (auto &existing : partition_data->fields) { + // partition_key_index is intentionally excluded here: it gets assigned below, after the + // duplicate check. Two fields are duplicates if they resolve to the same (field_id, transform). + if (existing.field_id == partition_field.field_id && existing.transform == partition_field.transform) { + throw BinderException("Duplicate partition key: expression \"%s\" matches an earlier partition key", + expr.ToString()); + } + } partition_field.partition_key_index = expr_idx; partition_data->fields.push_back(partition_field); } + return partition_data; +} + +unique_ptr DuckLakeTableEntry::AlterTable(DuckLakeTransaction &transaction, SetPartitionedByInfo &info) { + auto create_info = GetInfo(); + auto &table_info = create_info->Cast(); + auto partition_data = BuildPartitionData(transaction, GetColumns(), GetFieldData(), info.partition_keys); + if (PartitionFieldsMatch(GetPartitionData(), *partition_data)) { + return nullptr; + } auto new_entry = make_uniq(*this, table_info, std::move(partition_data)); return std::move(new_entry); @@ -1239,24 +1280,16 @@ unique_ptr DuckLakeTableEntry::AlterTable(DuckLakeTransaction &tra return std::move(new_entry); } -unique_ptr DuckLakeTableEntry::AlterTable(DuckLakeTransaction &transaction, SetSortedByInfo &info) { - auto create_info = GetInfo(); - auto &table_info = create_info->Cast(); - - if (info.orders.empty()) { - // RESET SORTED BY - clear sort data - auto new_entry = make_uniq(*this, table_info, unique_ptr()); - return std::move(new_entry); +unique_ptr DuckLakeTableEntry::BuildSortData(DuckLakeTransaction &transaction, const ColumnList &columns, + const vector &orders) { + if (orders.empty()) { + return nullptr; } - - // Validate all column references in all sort expressions - ValidateSortExpressionColumns(*this, info.orders); - + ValidateSortExpressionColumns(columns, orders); auto sort_data = make_uniq(); sort_data->sort_id = transaction.GetLocalCatalogId(); - for (idx_t order_node_idx = 0; order_node_idx < info.orders.size(); order_node_idx++) { - auto &order_node = info.orders[order_node_idx]; - + for (idx_t order_node_idx = 0; order_node_idx < orders.size(); order_node_idx++) { + auto &order_node = orders[order_node_idx]; DuckLakeSortField sort_field; sort_field.sort_key_index = order_node_idx; sort_field.expression = order_node.expression->ToString(); @@ -1265,7 +1298,14 @@ unique_ptr DuckLakeTableEntry::AlterTable(DuckLakeTransaction &tra sort_field.null_order = order_node.null_order; sort_data->fields.push_back(sort_field); } + return sort_data; +} + +unique_ptr DuckLakeTableEntry::AlterTable(DuckLakeTransaction &transaction, SetSortedByInfo &info) { + auto create_info = GetInfo(); + auto &table_info = create_info->Cast(); + auto sort_data = BuildSortData(transaction, GetColumns(), info.orders); auto new_entry = make_uniq(*this, table_info, std::move(sort_data)); return std::move(new_entry); } diff --git a/src/storage/ducklake_transaction.cpp b/src/storage/ducklake_transaction.cpp index 728d1a8c0a7..6bc5e6c2575 100644 --- a/src/storage/ducklake_transaction.cpp +++ b/src/storage/ducklake_transaction.cpp @@ -1751,6 +1751,18 @@ void DuckLakeTransaction::GetNewTableInfo(DuckLakeCommitState &commit_state, Duc inlined_entry.uuid = latest_table.GetTableUUID(); inlined_entry.columns = latest_table.GetTableColumns(); result.new_inlined_data_tables.push_back(std::move(inlined_entry)); + + // CREATE TABLE ... PARTITIONED BY / SORTED BY + if (local_change.type == LocalChangeType::CREATED) { + if (table.GetPartitionData()) { + auto partition_key = GetNewPartitionKey(commit_state, table); + result.new_partition_keys.push_back(std::move(partition_key)); + } + if (table.GetSortData()) { + auto sort_key = GetNewSortKey(commit_state, table); + result.new_sort_keys.push_back(std::move(sort_key)); + } + } break; } default: diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_basic.test b/test/sql/create_table_inline_partition_sort/create_table_inline_basic.test new file mode 100644 index 00000000000..38b97dd29f4 --- /dev/null +++ b/test/sql/create_table_inline_partition_sort/create_table_inline_basic.test @@ -0,0 +1,311 @@ +# name: test/sql/create_table_inline_partition_sort/create_table_inline_basic.test +# description: CREATE TABLE with inline PARTITIONED BY / SORTED BY clauses +# group: [create_table_inline_partition_sort] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION {TEST_DIR}/{UUID}.db + +test-env DATA_PATH {TEST_DIR} + +statement ok +ATTACH 'ducklake:{DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '{DATA_PATH}/inline_basic', METADATA_CATALOG 'ducklake_metadata', DATA_INLINING_ROW_LIMIT 0) + +statement ok +USE ducklake + +# Test 1: CREATE TABLE with inline PARTITIONED BY only (single column) +statement ok +CREATE TABLE t_part (i INTEGER, v VARCHAR) PARTITIONED BY (i) + +statement ok +INSERT INTO t_part VALUES (1, 'a'), (2, 'b'), (1, 'c') + +query II +SELECT i, v FROM t_part ORDER BY i, v +---- +1 a +1 c +2 b + +# verify partition metadata was persisted at CREATE time (no ALTER ran) +query I +SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_part') +---- +1 + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_part') +---- +1 + +# verify the on-disk Parquet files landed in hive-style partition-specific directories +# (DATA_INLINING_ROW_LIMIT 0 forces INSERT to write Parquet directly; one file per partition value) +query I +SELECT count(*) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_part') +---- +2 + +query I +SELECT DISTINCT regexp_extract(path, '.*(i=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_part') ORDER BY 1 +---- +i=1 +i=2 + +# Test 2: CREATE TABLE with inline SORTED BY only (single column) +statement ok +CREATE TABLE t_sort (i INTEGER, v VARCHAR) SORTED BY (i) + +statement ok +INSERT INTO t_sort VALUES (3, 'x'), (1, 'y'), (2, 'z') + +# t_sort has a single file (DATA_INLINING_ROW_LIMIT 0, no partitioning, one INSERT batch). +# SORTED BY (i) means rows are i-ascending in the file - SELECT without ORDER BY returns the +# file's row order directly, which IS the sort verification. +query II +SELECT i, v FROM t_sort +---- +1 y +2 z +3 x + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_sort') +---- +1 + +query II +SELECT sort_key_index, expression FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_sort') ORDER BY sort_key_index +---- +0 i + +# verify the inline path persisted the same defaults a bare ALTER TABLE ... SET SORTED BY (col) would +# (the metadata writer collapses OrderType::ASCENDING -> "ASC" and OrderByNullType::ORDER_DEFAULT -> "NULLS_LAST") +query III +SELECT sort_direction, null_order, dialect FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_sort') +---- +ASC NULLS_LAST duckdb + +# Test 3: CREATE TABLE with both clauses, PARTITIONED first +statement ok +CREATE TABLE t_both1 (a INTEGER, b INTEGER, v VARCHAR) PARTITIONED BY (a) SORTED BY (b) + +statement ok +INSERT INTO t_both1 VALUES (1, 30, 'p'), (2, 10, 'q'), (1, 20, 'r') + +# t_both1 is PARTITIONED BY (a), so each value of a lives in its own file. SORTED BY (b) means +# within each partition's file rows are b-ascending. Filter to one partition at a time and verify +# row order without ORDER BY - that order IS the on-disk row order of the partition's file. +query III +SELECT a, b, v FROM t_both1 WHERE a = 1 +---- +1 20 r +1 30 p + +# on-disk: one Parquet file per distinct partition value (a=1 and a=2), each in its own a=N directory +query I +SELECT DISTINCT regexp_extract(path, '.*(a=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_both1') ORDER BY 1 +---- +a=1 +a=2 + +# Test 4: CREATE TABLE with both clauses, SORTED first (verify either-order grammar) +statement ok +CREATE TABLE t_both2 (a INTEGER, b INTEGER) SORTED BY (b) PARTITIONED BY (a) + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_both2') +---- +1 + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_both2') +---- +1 + +# Test 5: multi-column partition + multi-column sort +statement ok +CREATE TABLE t_multi (a INTEGER, b INTEGER, c INTEGER) PARTITIONED BY (a, b) SORTED BY (c, a) + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_multi') +---- +2 + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_multi') +---- +2 + +statement ok +INSERT INTO t_multi VALUES (1, 1, 30), (1, 1, 10), (1, 2, 20), (2, 1, 5) + +# t_multi is PARTITIONED BY (a, b), so each (a, b) combination lives in its own file. SORTED BY +# (c, a) means rows within a partition's file are (c, a)-ascending. +query III +SELECT a, b, c FROM t_multi WHERE a = 1 AND b = 1 +---- +1 1 10 +1 1 30 + +# multi-column hive-style nesting: a=N/b=M/ +query II +SELECT regexp_extract(path, '.*(a=[0-9]+)[/\\].*', 1) AS a_dir, regexp_extract(path, '.*(b=[0-9]+)[/\\].*', 1) AS b_dir +FROM ducklake_metadata.ducklake_data_file +WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_multi') +ORDER BY a_dir, b_dir +---- +a=1 b=1 +a=1 b=2 +a=2 b=1 + +# the a directory must come *before* the b directory in the path (PARTITIONED BY (a, b) ordering) +query I +SELECT count(*) FROM ducklake_metadata.ducklake_data_file +WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_multi') + AND regexp_matches(path, 'a=[0-9]+/b=[0-9]+') +---- +3 + +# Test 6: PARTITIONED BY with a transform function +statement ok +CREATE TABLE t_transform (ts TIMESTAMP, v INTEGER) PARTITIONED BY (year(ts)) + +query II +SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_transform') +---- +0 year + +statement ok +INSERT INTO t_transform VALUES (TIMESTAMP '2023-06-01', 1), (TIMESTAMP '2023-09-15', 2), (TIMESTAMP '2024-03-10', 3) + +# year() transform produces hive directories named with the partition key NAME (year, not ts) +query I +SELECT DISTINCT regexp_extract(path, '.*(year=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_transform') ORDER BY 1 +---- +year=2023 +year=2024 + +# Test 7: PARTITIONED BY only writes partition metadata, NOT sort metadata (asymmetric) +statement ok +CREATE TABLE t_only_part (a INTEGER) PARTITIONED BY (a) + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_part') +---- +1 + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_part') +---- +0 + +statement ok +INSERT INTO t_only_part VALUES (10), (20), (10) + +query I +SELECT DISTINCT regexp_extract(path, '.*(a=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_part') ORDER BY 1 +---- +a=10 +a=20 + +# Test 8: SORTED BY only writes sort metadata, NOT partition metadata (asymmetric) +statement ok +CREATE TABLE t_only_sort (a INTEGER) SORTED BY (a) + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_sort') +---- +0 + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_sort') +---- +1 + +# Test 9: PARTITIONED BY with bucket(N, col) transform happy path +statement ok +CREATE TABLE t_bucket (id BIGINT, v VARCHAR) PARTITIONED BY (bucket(8, id)) + +query II +SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_bucket') +---- +0 bucket(8) + +statement ok +INSERT INTO t_bucket VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e'), (6, 'f'), (7, 'g'), (8, 'h') + +# bucket() transform writes into hive directories named "bucket=N" (modulo bucket count) +# we don't fix specific bucket assignments (hash-dependent) but we assert at least 1 and at most 8 distinct buckets +query I +SELECT count(DISTINCT regexp_extract(path, '.*(bucket=[0-9]+)[/\\].*', 1)) BETWEEN 1 AND 8 FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_bucket') +---- +true + +# every file must live in a bucket=N directory +query I +SELECT count(*) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_bucket') AND NOT regexp_matches(path, 'bucket=[0-9]+') +---- +0 + +# Test 10: same-transaction CREATE-inline + ALTER-set must NOT emit duplicate partition rows. +# The reverse-chain commit walk has to dedupe so only the latest (ALTER) wins. +statement ok +BEGIN + +statement ok +CREATE TABLE t_create_then_alter (a INTEGER, b INTEGER) PARTITIONED BY (a) SORTED BY (a) + +statement ok +ALTER TABLE t_create_then_alter SET PARTITIONED BY (b) + +statement ok +ALTER TABLE t_create_then_alter SET SORTED BY (b DESC) + +statement ok +COMMIT + +# exactly one partition_info row (the ALTER's), not two +query I +SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter') +---- +1 + +# the surviving partition row references column b (the ALTER's choice), not a (the inline create table's). +query III +SELECT pc.partition_key_index, c.column_name, pc.transform +FROM ducklake_metadata.ducklake_partition_column pc +JOIN ducklake_metadata.ducklake_column c + ON pc.table_id = c.table_id AND pc.column_id = c.column_id AND c.end_snapshot IS NULL +WHERE pc.table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter') +---- +0 b identity + +# exactly one sort_info row, and its expression is b DESC (the ALTER's), not a +query I +SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter') +---- +1 + +query III +SELECT expression, sort_direction, null_order FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter') +---- +b DESC NULLS_LAST + +# INSERT after the chain: data must land in b=N directories (ALTER won), NOT a=N directories +statement ok +INSERT INTO t_create_then_alter VALUES (1, 100), (2, 100), (3, 200) + +query I +SELECT DISTINCT regexp_extract(path, '.*(b=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter') ORDER BY 1 +---- +b=100 +b=200 + +# none of the files should be under an a=N directory (regression guard for the dedup logic) +query I +SELECT count(*) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter') AND regexp_matches(path, 'a=[0-9]+/') +---- +0 diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_ctas.test b/test/sql/create_table_inline_partition_sort/create_table_inline_ctas.test new file mode 100644 index 00000000000..d93155f7202 --- /dev/null +++ b/test/sql/create_table_inline_partition_sort/create_table_inline_ctas.test @@ -0,0 +1,248 @@ +# name: test/sql/create_table_inline_partition_sort/create_table_inline_ctas.test +# description: CREATE TABLE AS SELECT with inline PARTITIONED BY / SORTED BY clauses +# group: [create_table_inline_partition_sort] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION {TEST_DIR}/{UUID}.db + +test-env DATA_PATH {TEST_DIR} + +statement ok +ATTACH 'ducklake:{DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '{DATA_PATH}/inline_ctas', METADATA_CATALOG 'ducklake_metadata', DATA_INLINING_ROW_LIMIT 0) + +statement ok +USE ducklake + +# Test 1: CTAS with PARTITIONED BY only +statement ok +CREATE TABLE ctas_part PARTITIONED BY (i) AS SELECT range AS i, range::VARCHAR AS v FROM range(5) + +query II +SELECT i, v FROM ctas_part ORDER BY i +---- +0 0 +1 1 +2 2 +3 3 +4 4 + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_part') +---- +1 + +# CTAS must hive-partition on the initial write: 5 distinct values from range(5) -> 5 hive directories +query I +SELECT DISTINCT regexp_extract(path, '.*(i=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_part') ORDER BY 1 +---- +i=0 +i=1 +i=2 +i=3 +i=4 + +# Test 2: CTAS with SORTED BY only. +statement ok +CREATE TABLE ctas_sort SORTED BY (j) AS SELECT range AS i, 9 - range AS j FROM range(10) + +# ctas_sort is non-partitioned; CTAS produces a single file. SELECT without ORDER BY returns the +# file's row order directly. SORTED BY (j) means j-ascending; i is forced descending by construction. +query II +SELECT i, j FROM ctas_sort +---- +9 0 +8 1 +7 2 +6 3 +5 4 +4 5 +3 6 +2 7 +1 8 +0 9 + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_sort') +---- +1 + +# Test 3: CTAS with both clauses, PARTITIONED first. 30 rows partitioned into 3 groups of 10; +# sort key v = (29 - i) is the inverse of the SELECT's natural order so the sort has real work. +statement ok +CREATE TABLE ctas_both PARTITIONED BY (g) SORTED BY (v) AS +SELECT range % 3 AS g, 29 - range AS v FROM range(30) + +query II +SELECT + (SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_both')), + (SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_both')) +---- +1 1 + +# 3 distinct partition values -> 3 hive directories +query I +SELECT count(DISTINCT regexp_extract(path, '.*(g=[0-9]+)[/\\].*', 1)) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_both') +---- +3 + +# verify sort actually happened: each partition's file holds rows in v-ascending order +query II +SELECT g, v FROM ctas_both WHERE g = 0 +---- +0 2 +0 5 +0 8 +0 11 +0 14 +0 17 +0 20 +0 23 +0 26 +0 29 + +query II +SELECT g, v FROM ctas_both WHERE g = 1 +---- +1 1 +1 4 +1 7 +1 10 +1 13 +1 16 +1 19 +1 22 +1 25 +1 28 + +query II +SELECT g, v FROM ctas_both WHERE g = 2 +---- +2 0 +2 3 +2 6 +2 9 +2 12 +2 15 +2 18 +2 21 +2 24 +2 27 + +# Test 4: CTAS with SORTED first then PARTITIONED (either-order grammar). Same shape as Test 3 but +statement ok +CREATE TABLE ctas_swap SORTED BY (v) PARTITIONED BY (g) AS +SELECT range % 3 AS g, 29 - range AS v FROM range(30) + +query II +SELECT + (SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_swap')), + (SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_swap')) +---- +1 1 + +query I +SELECT count(DISTINCT regexp_extract(path, '.*(g=[0-9]+)[/\\].*', 1)) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_swap') +---- +3 + +# partition g=1's file +query II +SELECT g, v FROM ctas_swap WHERE g = 1 +---- +1 1 +1 4 +1 7 +1 10 +1 13 +1 16 +1 19 +1 22 +1 25 +1 28 + +# Test 5: CTAS partitioning by a transform expression +statement ok +CREATE TABLE ctas_transform PARTITIONED BY (year(ts)) AS +SELECT TIMESTAMP '2024-01-01' + INTERVAL (i) YEAR AS ts, i AS v FROM range(3) t(i) + +query II +SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_transform') +---- +0 year + +# CTAS with year() transform must hive-partition on the initial write +query I +SELECT DISTINCT regexp_extract(path, '.*(year=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_transform') ORDER BY 1 +---- +year=2024 +year=2025 +year=2026 + +# Test 6: CTAS with multi-column SORTED BY using string and integer columns +statement ok +CREATE TABLE ctas_multi SORTED BY (a, b) AS +SELECT i AS a, ('tag_' || i) AS b FROM range(4) t(i) + +# ctas_multi is non-partitioned; CTAS produces a single file. SORTED BY (a, b) means rows are +# (a, b)-ascending in that file - SELECT without ORDER BY returns the file's actual row order. +query II +SELECT a, b FROM ctas_multi +---- +0 tag_0 +1 tag_1 +2 tag_2 +3 tag_3 + +query II +SELECT sort_key_index, expression FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_multi') ORDER BY sort_key_index +---- +0 a +1 b + +# Test 7: CTAS with a CTE source (regression: planning shape differs from a flat SELECT) +statement ok +CREATE TABLE ctas_cte PARTITIONED BY (g) SORTED BY (v) AS +WITH src AS (SELECT i % 3 AS g, i AS v FROM range(9) t(i)) +SELECT g, v FROM src + +query II +SELECT g, count(*) FROM ctas_cte GROUP BY g ORDER BY g +---- +0 3 +1 3 +2 3 + +# ctas_cte is PARTITIONED BY (g), so each value of g lives in its own file. SORTED BY (v) means +# rows within each partition's file are v-ascending. Filter to one partition; the row order +# returned IS the on-disk file order. +query II +SELECT g, v FROM ctas_cte WHERE g = 0 +---- +0 0 +0 3 +0 6 + +query II +SELECT g, v FROM ctas_cte WHERE g = 1 +---- +1 1 +1 4 +1 7 + +query II +SELECT g, v FROM ctas_cte WHERE g = 2 +---- +2 2 +2 5 +2 8 + +# CTE-sourced CTAS must still partition on the initial write (regression: planner passes the spec through) +query I +SELECT DISTINCT regexp_extract(path, '.*(g=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_cte') ORDER BY 1 +---- +g=0 +g=1 +g=2 diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_ctas_sort_verification.test b/test/sql/create_table_inline_partition_sort/create_table_inline_ctas_sort_verification.test new file mode 100644 index 00000000000..8aaad79b5b0 --- /dev/null +++ b/test/sql/create_table_inline_partition_sort/create_table_inline_ctas_sort_verification.test @@ -0,0 +1,88 @@ +# name: test/sql/create_table_inline_partition_sort/create_table_inline_ctas_sort_verification.test +# description: Verify SORTED BY actually sorts data during CTAS by inspecting parquet rowgroup statistics. +# group: [create_table_inline_partition_sort] + +# Uses 3000+ rows across multiple 2048-row rowgroups; sort column is the INVERSE of the SELECT's +# natural order so any "sort skipped" regression would leave rowgroups in descending order. + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION {TEST_DIR}/{UUID}.db + +test-env DATA_PATH {TEST_DIR} + +statement ok +ATTACH 'ducklake:{DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '{DATA_PATH}/inline_ctas_sort_verify', METADATA_CATALOG 'ducklake_metadata', DATA_INLINING_ROW_LIMIT 0) + +# 2048 is the minimum parquet rowgroup size in DuckDB; pick it so 3000 rows produce >=2 rowgroups. +statement ok +CALL ducklake.set_option('parquet_row_group_size', 2048) + +# Single-threaded + preserved insertion order keeps rowgroup boundaries deterministic, so we can assert +# specific stats_min values per rowgroup rather than an order-tolerant "monotonic" check. +statement ok +SET threads=1 + +statement ok +SET preserve_insertion_order=true + +statement ok +USE ducklake + +# CTAS feeding from range(3000): natural SELECT order is `a` ascending. Sort key `b` is `2999 - a` so its +# natural in-stream order is strictly descending. If SORTED BY (b) is honored at CTAS time, the data is +# resorted to b ascending and the per-rowgroup stats_min/max for `b` will be disjoint and increasing. +statement ok +CREATE TABLE sort_verify SORTED BY (b) AS +SELECT range AS a, (2999 - range) AS b FROM range(3000) + +# logical correctness: data round-trips +query I +SELECT count(*) FROM sort_verify +---- +3000 + +# Confirm we got at least 2 rowgroups (otherwise the per-rowgroup-stats check is meaningless). +query I +SELECT count(DISTINCT row_group_id) >= 2 +FROM parquet_metadata('{DATA_PATH}/inline_ctas_sort_verify/main/sort_verify/*.parquet') +WHERE path_in_schema = 'b' +---- +true + +# ---------------------------------------------------------------------------------------------------------------------- +# Hardcoded per-rowgroup min/max sanity check on `sort_verify`. +# Layout: 3000 rows, parquet_row_group_size=2048, single thread, preserve_insertion_order=true. After SORT BY b ASC: +# ---------------------------------------------------------------------------------------------------------------------- + +# column `b` +query III +SELECT row_group_id, stats_min::INTEGER AS rg_min, stats_max::INTEGER AS rg_max +FROM parquet_metadata('{DATA_PATH}/inline_ctas_sort_verify/main/sort_verify/*.parquet') +WHERE path_in_schema = 'b' +ORDER BY row_group_id +---- +0 0 2047 +1 2048 2999 + +# column `a` +query III +SELECT row_group_id, stats_min::INTEGER AS rg_min, stats_max::INTEGER AS rg_max +FROM parquet_metadata('{DATA_PATH}/inline_ctas_sort_verify/main/sort_verify/*.parquet') +WHERE path_in_schema = 'a' +ORDER BY row_group_id +---- +0 952 2999 +1 0 951 + +# row counts per rowgroup confirm the 2048 + 952 split +query II +SELECT row_group_id, row_group_num_rows +FROM parquet_metadata('{DATA_PATH}/inline_ctas_sort_verify/main/sort_verify/*.parquet') +WHERE path_in_schema = 'b' +ORDER BY row_group_id +---- +0 2048 +1 952 diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_errors.test b/test/sql/create_table_inline_partition_sort/create_table_inline_errors.test new file mode 100644 index 00000000000..85343564a51 --- /dev/null +++ b/test/sql/create_table_inline_partition_sort/create_table_inline_errors.test @@ -0,0 +1,96 @@ +# name: test/sql/create_table_inline_partition_sort/create_table_inline_errors.test +# description: validation errors for inline PARTITIONED BY / SORTED BY on CREATE TABLE / CTAS +# group: [create_table_inline_partition_sort] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION {TEST_DIR}/{UUID}.db + +test-env DATA_PATH {TEST_DIR} + +statement ok +ATTACH 'ducklake:{DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '{DATA_PATH}/inline_errors', METADATA_CATALOG 'ducklake_metadata', DATA_INLINING_ROW_LIMIT 0) + +statement ok +USE ducklake + +# PARTITIONED BY references nonexistent column +statement error +CREATE TABLE bad_part (a INTEGER) PARTITIONED BY (nonexistent) +---- +column "nonexistent" does not exist + +# SORTED BY references nonexistent column +statement error +CREATE TABLE bad_sort (a INTEGER) SORTED BY (nonexistent) +---- +:.*nonexistent.* + +# WITH (...) clause is rejected by SupportsCreateTable override (DuckLake has no WITH semantics) +statement error +CREATE TABLE bad_with (a INTEGER) WITH (some_option='x') +---- +WITH clause is not supported for tables in a ducklake catalog + +# unsupported transform function +statement error +CREATE TABLE bad_transform (a INTEGER) PARTITIONED BY (foo(a)) +---- +:.*Unsupported partition function.* + +# malformed bucket() expression - missing args +statement error +CREATE TABLE bad_bucket (a INTEGER) PARTITIONED BY (bucket(a)) +---- +Expected bucket(bucket_count, column) + +# duplicate partition key (same column with same transform) is rejected +statement error +CREATE TABLE bad_dup (a INTEGER) PARTITIONED BY (a, a) +---- +Duplicate partition key + +# CTAS variants: SORTED BY references nonexistent column +statement error +CREATE TABLE bad_ctas SORTED BY (nonexistent) AS SELECT 1 AS a +---- +:.*nonexistent.* + +# CTAS variants: PARTITIONED BY references nonexistent column +statement error +CREATE TABLE bad_ctas_part PARTITIONED BY (nonexistent) AS SELECT 1 AS a +---- +:.*nonexistent.* + +# verify the failing CREATEs left no metadata behind +query I +SELECT count(*) FROM ducklake_metadata.ducklake_partition_info +---- +0 + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_sort_info +---- +0 + +# A successful CREATE after failures still works (no transaction state corruption) +statement ok +CREATE TABLE ok_after_errors (a INTEGER) PARTITIONED BY (a) + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ok_after_errors') +---- +1 + +# Same column with DIFFERENT transforms is allowed (e.g. year + month of the same timestamp). +# This must NOT be flagged as a duplicate partition key. +statement ok +CREATE TABLE ok_diff_transforms (ts TIMESTAMP) PARTITIONED BY (year(ts), month(ts)) + +query II +SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ok_diff_transforms') ORDER BY partition_key_index +---- +0 year +1 month diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test b/test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test new file mode 100644 index 00000000000..d04656cfc00 --- /dev/null +++ b/test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test @@ -0,0 +1,453 @@ +# name: test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test +# description: Transaction semantics for inline PARTITIONED BY / SORTED BY on CREATE TABLE / CTAS: +# rollback discards everything (entry, files, transaction-local ids); concurrent CTAS with +# distinct names cannot conflict; concurrent CTAS with the same name produces a commit-time +# conflict; +# group: [create_table_inline_partition_sort] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION {TEST_DIR}/{UUID}.db + +test-env DATA_PATH {TEST_DIR} + +statement ok +ATTACH 'ducklake:{DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '{DATA_PATH}/inline_tx', METADATA_CATALOG 'ducklake_meta', DATA_INLINING_ROW_LIMIT 0) + +statement ok +SET immediate_transaction_mode=true + +statement ok +USE ducklake + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 1: ROLLBACK after a successful inline-partitioned CREATE+INSERT. +# Rollback must discard the table entry, the planning-time-allocated partition_id/sort_id, and the data files. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +BEGIN + +statement ok +CREATE TABLE rollback_part (i INTEGER, v VARCHAR) PARTITIONED BY (i) + +statement ok +INSERT INTO rollback_part VALUES (1, 'a'), (2, 'b') + +# table is visible inside the transaction +query I +SELECT count(*) FROM rollback_part +---- +2 + +statement ok +ROLLBACK + +# table is gone after rollback +statement error +SELECT * FROM rollback_part +---- +:.*not exist.* + +# the next inline CTAS proceeds cleanly - no leftover transaction-local catalog state poisoning subsequent +# planning (regression guard for the planning-time partition_id allocator). +statement ok +CREATE TABLE post_rollback PARTITIONED BY (a) AS SELECT 1 AS a + +query I +SELECT * FROM post_rollback +---- +1 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 2: ROLLBACK after CTAS with both inline clauses. The most invasive shape - exercises the full +# planning-time field_data + partition_data + sort_data construction, the sink-time CreateTableExtended +# branch, the data-file write, and the rollback cleanup. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +BEGIN + +statement ok +CREATE TABLE rollback_ctas PARTITIONED BY (g) SORTED BY (v) AS +SELECT i % 3 AS g, i AS v FROM range(9) t(i) + +query I +SELECT count(*) FROM rollback_ctas +---- +9 + +statement ok +ROLLBACK + +statement error +SELECT * FROM rollback_ctas +---- +:.*not exist.* + +# verify the metadata catalog never saw the rolled-back partition_info / sort_info rows +query I +SELECT count(*) FROM ducklake_meta.ducklake_partition_info +WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'rollback_ctas') +---- +0 + +query I +SELECT count(*) FROM ducklake_meta.ducklake_sort_info +WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'rollback_ctas') +---- +0 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 3: ROLLBACK + same-name CREATE in a fresh transaction succeeds. The rolled-back table_id / +# partition_id were transaction-local and should not leak into the next transaction's namespace. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +BEGIN + +statement ok +CREATE TABLE name_reuse PARTITIONED BY (a) AS SELECT 1 AS a, 'x' AS v + +statement ok +ROLLBACK + +# Same name, same partition column - works because the rollback wiped the prior attempt entirely. +statement ok +CREATE TABLE name_reuse PARTITIONED BY (a) AS SELECT 99 AS a, 'y' AS v + +query II +SELECT a, v FROM name_reuse +---- +99 y + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 4: Two concurrent transactions both CREATE TABLE foo PARTITIONED BY ... AS SELECT with the SAME +# name. First to commit wins; second errors at COMMIT time. This is a name-collision conflict - the new +# inline clauses don't introduce a different conflict mode. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +CREATE TABLE ducklake.conflict_same_name PARTITIONED BY (i) AS SELECT 1 AS i + +statement ok con2 +CREATE TABLE ducklake.conflict_same_name SORTED BY (i) AS SELECT 2 AS i + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- + +# con1's table is the surviving one - check its inline partition spec landed +query I +SELECT count(*) FROM ducklake_meta.ducklake_partition_info +WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'conflict_same_name') +---- +1 + +query I +SELECT * FROM ducklake.conflict_same_name +---- +1 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 5: Two concurrent CTAS-with-inline-spec with DIFFERENT names. No conflict - each transaction +# allocates partition_id from its own counter; commit-time remap assigns globally-unique committed ids. +# This proves the planning-time partition_id allocation isn't a serialization point. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +CREATE TABLE ducklake.concurrent_a PARTITIONED BY (i) AS SELECT range AS i, 'a' AS src FROM range(5) + +statement ok con2 +CREATE TABLE ducklake.concurrent_b PARTITIONED BY (i) SORTED BY (i) AS SELECT range AS i, 'b' AS src FROM range(7) + +statement ok con1 +COMMIT + +statement ok con2 +COMMIT + +# both tables exist and have their own partition_info rows +query II +SELECT + (SELECT count(*) FROM ducklake_meta.ducklake_partition_info + WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'concurrent_a')), + (SELECT count(*) FROM ducklake_meta.ducklake_partition_info + WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'concurrent_b')) +---- +1 1 + +# the two tables have DISTINCT committed partition_ids (proves the commit-time remap deduplicates the +# transaction-local ids that started identical at planning time on each connection). +query I +SELECT count(DISTINCT partition_id) FROM ducklake_meta.ducklake_partition_info +WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name IN ('concurrent_a', 'concurrent_b')) +---- +2 + +query II +SELECT count(*), max(i) FROM ducklake.concurrent_a +---- +5 4 + +query II +SELECT count(*), max(i) FROM ducklake.concurrent_b +---- +7 6 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 6: Same-transaction inline CTAS + ALTER SET PARTITIONED BY + COMMIT. Last writer wins (the +# metadata-layer dedup collapses by table_id; final on-disk state references the ALTER's partition column). +# This is regression coverage for the chain-walk behavior change in this branch. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +BEGIN + +statement ok +CREATE TABLE chain_winner PARTITIONED BY (a) SORTED BY (a) AS +SELECT range AS a, range + 100 AS b FROM range(3) + +statement ok +ALTER TABLE chain_winner SET PARTITIONED BY (b) + +statement ok +ALTER TABLE chain_winner SET SORTED BY (b DESC) + +statement ok +COMMIT + +# exactly one partition_info row (the last ALTER's) and one sort_info row (the last ALTER's) +query I +SELECT count(*) FROM ducklake_meta.ducklake_partition_info +WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'chain_winner') +---- +1 + +query I +SELECT count(*) FROM ducklake_meta.ducklake_sort_info +WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'chain_winner') +---- +1 + +# the surviving sort row is `b DESC`, not the inline `a ASC` +query III +SELECT expression, sort_direction, null_order FROM ducklake_meta.ducklake_sort_expression +WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'chain_winner') +---- +b DESC NULLS_LAST + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 7: CTAS that errors mid-execution because of a runtime constraint violation. The transaction must +# unwind cleanly - no zombie entry, no partition_info row +# ---------------------------------------------------------------------------------------------------------------------- + +statement error +CREATE TABLE error_ctas PARTITIONED BY (i) AS +SELECT CAST(range AS UTINYINT) AS i FROM range(300) +---- +:.*[Cc]onversion.*out of range.* + +# the failed CTAS must not have partially registered the table +statement error +SELECT * FROM error_ctas +---- +:.*not exist.* + +query I +SELECT count(*) FROM ducklake_meta.ducklake_partition_info +WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'error_ctas') +---- +0 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 8: CTAS that errors at the BINDER stage (unknown column in PARTITIONED BY). This fails in +# PlanCreateTableAs *before* any planning-time partition_id allocation completes, so transaction state +# must be uncorrupted. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +BEGIN + +statement error +CREATE TABLE binder_error PARTITIONED BY (nonexistent) AS SELECT 1 AS i +---- +:.*nonexistent.* + +# subsequent statements in the same transaction still work +statement ok +CREATE TABLE recovers_after_binder_error (i INTEGER) PARTITIONED BY (i) + +statement ok +INSERT INTO recovers_after_binder_error VALUES (42) + +statement ok +COMMIT + +query I +SELECT i FROM recovers_after_binder_error +---- +42 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 9: Long transaction that mixes inline CTAS + plain INSERT into other tables. Verifies the inline +# clauses don't disturb the transaction's broader commit logic. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +CREATE TABLE existing_target (k INTEGER, v VARCHAR) + +statement ok +BEGIN + +statement ok +INSERT INTO existing_target VALUES (1, 'first') + +statement ok +CREATE TABLE long_tx_ctas PARTITIONED BY (g) AS SELECT i AS g, i + 100 AS v FROM range(4) t(i) + +statement ok +INSERT INTO existing_target VALUES (2, 'second') + +statement ok +INSERT INTO long_tx_ctas VALUES (10, 110), (11, 111) + +statement ok +COMMIT + +query II +SELECT k, v FROM existing_target ORDER BY k +---- +1 first +2 second + +query II +SELECT g, count(*) FROM long_tx_ctas GROUP BY g ORDER BY g +---- +0 1 +1 1 +2 1 +3 1 +10 1 +11 1 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 10: Two inline CTAS in one transaction, ROLLBACK. Both tables AND both partition specs unwind. +# Guards against any per-CTAS state that might accumulate beyond the transaction-local catalog set. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +BEGIN + +statement ok +CREATE TABLE rb_a PARTITIONED BY (i) AS SELECT range AS i FROM range(3) + +statement ok +CREATE TABLE rb_b PARTITIONED BY (j) SORTED BY (j) AS SELECT range AS j FROM range(4) + +statement ok +ROLLBACK + +statement error +SELECT * FROM rb_a +---- +:.*not exist.* + +statement error +SELECT * FROM rb_b +---- +:.*not exist.* + +# neither rolled-back table left a partition_info row +query I +SELECT count(*) FROM ducklake_meta.ducklake_partition_info +WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name IN ('rb_a','rb_b')) +---- +0 + +# subsequent CTAS in fresh transaction succeeds and gets fresh ids - no collision with rolled-back local ids +statement ok +CREATE TABLE rb_after PARTITIONED BY (i) AS SELECT 1 AS i + +query I +SELECT * FROM rb_after +---- +1 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 11: Snapshot isolation. con2 begins BEFORE con1's inline-partitioned CTAS commits. con2 must NOT +# observe the new table even though it's already committed in the metadata DB. The partition_info row's +# begin_snapshot is the visibility gate. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok con2 +BEGIN + +statement ok con1 +CREATE TABLE ducklake.snap_iso PARTITIONED BY (i) AS SELECT 1 AS i + +# con2's snapshot predates the CTAS commit +statement error con2 +SELECT * FROM ducklake.snap_iso +---- +:.*not exist.* + +statement ok con2 +COMMIT + +# new transaction sees it +query I +SELECT * FROM ducklake.snap_iso +---- +1 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 12: Same-transaction INSERT after inline CTAS uses the partition spec. After commit, every data +# file (CTAS-written + INSERT-written) carries a partition_id, and on-disk paths are hive-partitioned. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +BEGIN + +statement ok +CREATE TABLE same_tx_part PARTITIONED BY (g) AS SELECT range AS g, range + 100 AS v FROM range(3) + +statement ok +INSERT INTO same_tx_part VALUES (10, 110), (10, 111), (11, 112) + +statement ok +COMMIT + +# every data file references the partition_id - CTAS-written and INSERT-written alike +query I +SELECT count(*) FROM ducklake_meta.ducklake_data_file +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name='same_tx_part') + AND partition_id IS NULL +---- +0 + +query II +SELECT g, count(*) FROM same_tx_part GROUP BY g ORDER BY g +---- +0 1 +1 1 +2 1 +10 2 +11 1 diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test b/test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test new file mode 100644 index 00000000000..7538a72c6da --- /dev/null +++ b/test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test @@ -0,0 +1,215 @@ +# name: test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test +# description: CTAS with inline PARTITIONED BY / SORTED BY interacting with the data-inlining row limit. +# When a CTAS's row count is below DATA_INLINING_ROW_LIMIT, rows go to the per-table +# ducklake_inlined_data__ catalog table instead of Parquet. Partition and sort +# metadata must still be recorded; the inline operator coordinates with the about-to-be-created +# table_id at sink-state-init time. Once a flush is called, inlined rows materialize as +# partition-aware Parquet files. (On-disk hive directory verification is intentionally omitted - +# that's covered by create_table_inline_basic.test / create_table_inline_ctas.test.) +# group: [create_table_inline_partition_sort] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION {TEST_DIR}/{UUID}.db + +test-env DATA_PATH {TEST_DIR} + +# DATA_INLINING_ROW_LIMIT is non-zero (the historic default-shaped CTAS path). Pick 1000 so we can write +# both fully-inlined and overflow-to-Parquet cases below. +statement ok +ATTACH 'ducklake:{DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '{DATA_PATH}/inline_with_inlining', METADATA_CATALOG 'ducklake_meta', DATA_INLINING_ROW_LIMIT 1000) + +statement ok +USE ducklake + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 1: CTAS that fits entirely under DATA_INLINING_ROW_LIMIT, with PARTITIONED BY (i). +# Rows go to ducklake_inlined_data__*; ducklake_data_file stays empty for this table. +# Partition metadata is still recorded - the partition_id is allocated at planning time and bound to the +# table entry at sink-state-init regardless of whether the rows landed in Parquet or in the inlined table. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +CREATE TABLE small_part PARTITIONED BY (i) AS +SELECT range AS i, range::VARCHAR AS v FROM range(50) + +# data round-trips (rows readable whether inlined or in Parquet) +query I +SELECT count(*) FROM small_part +---- +50 + +# partition metadata recorded for the inlined CTAS +query I +SELECT count(*) FROM ducklake_meta.ducklake_partition_info +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part') +---- +1 + +query II +SELECT partition_key_index, transform FROM ducklake_meta.ducklake_partition_column +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part') +---- +0 identity + +# zero data_file rows: rows are sitting in the inlined-data catalog table, not on Parquet +query I +SELECT count(*) FROM ducklake_meta.ducklake_data_file +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part') +---- +0 + +# the per-table inlined-data table exists in the metadata catalog +query I +SELECT count(*) FROM ducklake_meta.ducklake_inlined_data_tables +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part') +---- +1 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 2: CTAS that fits under DATA_INLINING_ROW_LIMIT with both PARTITIONED BY and SORTED BY clauses. +# Both partition and sort metadata land in their respective catalog tables even though the data is inlined. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +CREATE TABLE small_both PARTITIONED BY (g) SORTED BY (v) AS +SELECT i % 4 AS g, i AS v FROM range(20) t(i) + +query I +SELECT count(*) FROM small_both +---- +20 + +query I +SELECT count(*) FROM ducklake_meta.ducklake_partition_info +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_both') +---- +1 + +query I +SELECT count(*) FROM ducklake_meta.ducklake_sort_info +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_both') +---- +1 + +# the sort_expression row carries the planning-time-allocated direction/null_order +query III +SELECT expression, sort_direction, null_order FROM ducklake_meta.ducklake_sort_expression +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_both') +---- +v ASC NULLS_LAST + +# fully inlined: zero Parquet files +query I +SELECT count(*) FROM ducklake_meta.ducklake_data_file +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_both') +---- +0 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 3: Flushing inlined data after a fully-inlined partitioned CTAS. +# `ducklake_flush_inlined_data` materializes the inlined rows as Parquet files using the table's +# partition spec. Post-flush, ducklake_data_file gains rows that reference the partition_id. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +CALL ducklake_flush_inlined_data('ducklake') + +# at least one data file now exists for small_part, and each carries a partition_id (the inlining-then-flush +# path hands the spec through correctly) +query I +SELECT count(*) > 0 FROM ducklake_meta.ducklake_data_file +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part') +---- +true + +query I +SELECT count(*) FROM ducklake_meta.ducklake_data_file +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part') + AND partition_id IS NULL +---- +0 + +# data still visible after flush +query I +SELECT count(*) FROM small_part +---- +50 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 4: CTAS that OVERFLOWS DATA_INLINING_ROW_LIMIT (1000). 5000 rows -> the operator routes everything +# above the limit to Parquet. The partition spec applies to the Parquet writes; the sort spec applies to +# the row stream BEFORE the inline operator (sort_on_insert=true default). +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +CREATE TABLE overflow_part PARTITIONED BY (g) SORTED BY (v) AS +SELECT i % 8 AS g, i AS v FROM range(5000) t(i) + +query I +SELECT count(*) FROM overflow_part +---- +5000 + +# partition + sort metadata persist +query II +SELECT + (SELECT count(*) FROM ducklake_meta.ducklake_partition_info + WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part')), + (SELECT count(*) FROM ducklake_meta.ducklake_sort_info + WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part')) +---- +1 1 + +# overflow path: at least one Parquet file was written, and each has a non-null partition_id +query I +SELECT count(*) > 0 FROM ducklake_meta.ducklake_data_file +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part') +---- +true + +query I +SELECT count(*) FROM ducklake_meta.ducklake_data_file +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part') + AND partition_id IS NULL +---- +0 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 5: Flushing the overflow_part table (which had data both inlined and on Parquet). After flush, all +# rows should be on Parquet with non-null partition_id, and the inlined-data table count for it goes to 0 +# (or the inlined-data tables are cleared per-table by the flush). +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +CALL ducklake_flush_inlined_data('ducklake') + +# overflow_part: zero rows in any inlined-data table after flush +query I +SELECT count(*) FROM ducklake_meta.ducklake_data_file +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part') + AND partition_id IS NULL +---- +0 + +# the row count is preserved end-to-end (inlined → flushed → Parquet) +query I +SELECT count(*) FROM overflow_part +---- +5000 + +# small_both: same checks - the partition+sort spec persists through inlining + flush +query I +SELECT count(*) FROM small_both +---- +20 + +query II +SELECT g, count(*) FROM small_both GROUP BY g ORDER BY g +---- +0 5 +1 5 +2 5 +3 5 From 164b4ff6dc344820529c73292713034b9e5bce06 Mon Sep 17 00:00:00 2001 From: Alex-Monahan Date: Mon, 4 May 2026 21:56:01 -0700 Subject: [PATCH 02/12] Fix Postgres bigint overflow in CTAS PARTITIONED BY commit retry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GetNewPartitionKey was mutating partition_data->partition_id from the local TRANSACTION_LOCAL_ID_START sentinel to the freshly-allocated committed id at commit time. On a metadata-write conflict (concurrent CTAS), CommitChanges retries with a fresh commit_state. The retry would read the already-mutated committed id as if it were the local id and register the wrong key in committed_partition_ids — RemapPartitionId would then leak the unmodified local sentinel (2^63) into the data-file SQL. Postgres bigint rejects 2^63 (signed int64 max is 2^63 - 1); SQLite silently truncates, which masked the bug on the SQLite metadata backend. The mutation never had any consumer: every reader of partition_data->partition_id runs at plan time of a current or future statement, and future statements load fresh state from metadata via the persisted partition_key.id. Removing the mutation makes GetNewPartitionKey idempotent across retries. Apply the same removal to GetNewSortKey for symmetry — sort_id is currently harmless because no RemapSortId step exists, but the mutation pattern is the same latent foot-gun. --- src/storage/ducklake_transaction.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/storage/ducklake_transaction.cpp b/src/storage/ducklake_transaction.cpp index 6bc5e6c2575..13df5788c42 100644 --- a/src/storage/ducklake_transaction.cpp +++ b/src/storage/ducklake_transaction.cpp @@ -1393,7 +1393,13 @@ DuckLakePartitionInfo DuckLakeTransaction::GetNewPartitionKey(DuckLakeCommitStat auto local_partition_id = partition_data->partition_id; auto partition_id = commit_state.commit_snapshot.next_catalog_id++; partition_key.id = partition_id; - partition_data->partition_id = partition_id; + // Do NOT mutate partition_data->partition_id here. On commit retry (concurrent metadata-write + // conflicts), GetNewPartitionKey runs again with a fresh commit_state. If the in-memory partition + // data carried the previous attempt's committed id, local_partition_id above would be wrong and + // committed_partition_ids would never contain the actual local sentinel that data files were + // stamped with — RemapPartitionId would then leak TRANSACTION_LOCAL_ID_START (= 2^63) into the + // SQL, which Postgres bigint rejects (SQLite silently truncates). Keeping the in-memory value as + // the local id makes GetNewPartitionKey idempotent across retries. for (auto &field : partition_data->fields) { DuckLakePartitionFieldInfo partition_field; partition_field.partition_key_index = field.partition_key_index; @@ -1442,7 +1448,10 @@ DuckLakeSortInfo DuckLakeTransaction::GetNewSortKey(DuckLakeCommitState &commit_ auto sort_id = commit_state.commit_snapshot.next_catalog_id++; sort_key.id = sort_id; - sort_data->sort_id = sort_id; + // Do NOT mutate sort_data->sort_id here — same retry-idempotency reasoning as GetNewPartitionKey. + // No downstream reader observes the mutated value (data files don't reference sort_id), so this + // is harmless today, but removing it keeps both code paths symmetric and prevents the same bug + // from being reintroduced if anyone adds a RemapSortId step later. for (auto &field : sort_data->fields) { DuckLakeSortFieldInfo sort_field; sort_field.sort_key_index = field.sort_key_index; From 251593aac8c51ce1a02eb61e1aee975668746917 Mon Sep 17 00:00:00 2001 From: Alex-Monahan Date: Fri, 1 May 2026 13:18:09 -0700 Subject: [PATCH 03/12] Support table-scoped config options via CREATE TABLE / CTAS WITH (...) Allow CREATE TABLE and CREATE TABLE AS SELECT statements to set per-table DuckLake config options through the existing WITH (key=value, ...) clause. Each key/value pair is validated using the same path as ducklake_set_option and persisted into ducklake_metadata at commit time, scoped to the new table. Options also take effect immediately for any data written within the same transaction (e.g. CTAS row group sizing) and survive a rename within that transaction. --- src/functions/ducklake_set_option.cpp | 30 ++-- .../functions/ducklake_table_functions.hpp | 5 + src/include/storage/ducklake_insert.hpp | 17 ++- .../storage/ducklake_metadata_manager.hpp | 1 + src/include/storage/ducklake_table_entry.hpp | 9 ++ src/storage/ducklake_catalog.cpp | 8 +- src/storage/ducklake_insert.cpp | 56 +++++-- src/storage/ducklake_metadata_manager.cpp | 19 +++ src/storage/ducklake_schema_entry.cpp | 33 ++++ src/storage/ducklake_table_entry.cpp | 2 + src/storage/ducklake_transaction.cpp | 18 +++ .../create_table_inline_errors.test | 4 +- .../settings/create_table_with_options.test | 144 ++++++++++++++++++ .../create_table_with_options_rollback.test | 59 +++++++ ...create_table_with_options_transaction.test | 94 ++++++++++++ 15 files changed, 466 insertions(+), 33 deletions(-) create mode 100644 test/sql/settings/create_table_with_options.test create mode 100644 test/sql/settings/create_table_with_options_rollback.test create mode 100644 test/sql/settings/create_table_with_options_transaction.test diff --git a/src/functions/ducklake_set_option.cpp b/src/functions/ducklake_set_option.cpp index f4910846e4b..83771a1edf7 100644 --- a/src/functions/ducklake_set_option.cpp +++ b/src/functions/ducklake_set_option.cpp @@ -15,17 +15,12 @@ struct DuckLakeSetOptionData : public TableFunctionData { DuckLakeConfigOption option; }; -static unique_ptr DuckLakeSetOptionBind(ClientContext &context, TableFunctionBindInput &input, - vector &return_types, vector &names) { - auto &catalog = DuckLakeBaseMetadataFunction::GetCatalog(context, input.inputs[0]); - DuckLakeConfigOption config_option; - auto &option = config_option.option.key; - auto &value = config_option.option.value; - - option = StringUtil::Lower(StringValue::Get(input.inputs[1])); - auto &val = input.inputs[2]; +DuckLakeTag ValidateDuckLakeConfigOption(ClientContext &context, const string &option_key, const Value &val) { + DuckLakeTag result; + auto option = StringUtil::Lower(option_key); + result.key = option; + auto &value = result.value; - // read the option if (option == "parquet_compression") { auto codec = val.DefaultCastAs(LogicalType::VARCHAR).GetValue(); vector supported_algorithms {"uncompressed", "snappy", "gzip", "zstd", "brotli", "lz4", "lz4_raw"}; @@ -82,9 +77,8 @@ static unique_ptr DuckLakeSetOptionBind(ClientContext &context, Ta } else if (option == "delete_older_than" || option == "expire_older_than") { auto interval_value = val.ToString(); if (!interval_value.empty()) { - // Let's verify this is actually an interval - interval_t result; - if (!Interval::FromString(val.ToString(), result)) { + interval_t interval_result; + if (!Interval::FromString(val.ToString(), interval_result)) { throw BinderException("%s is not a valid interval value.", option); } } @@ -103,6 +97,16 @@ static unique_ptr DuckLakeSetOptionBind(ClientContext &context, Ta } else { throw NotImplementedException("Unsupported option %s", option); } + return result; +} + +static unique_ptr DuckLakeSetOptionBind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + auto &catalog = DuckLakeBaseMetadataFunction::GetCatalog(context, input.inputs[0]); + DuckLakeConfigOption config_option; + config_option.option = + ValidateDuckLakeConfigOption(context, StringValue::Get(input.inputs[1]), input.inputs[2]); + auto &option = config_option.option.key; // read the scope string schema; diff --git a/src/include/functions/ducklake_table_functions.hpp b/src/include/functions/ducklake_table_functions.hpp index 3a6814d6ee7..571c8d73687 100644 --- a/src/include/functions/ducklake_table_functions.hpp +++ b/src/include/functions/ducklake_table_functions.hpp @@ -15,6 +15,11 @@ namespace duckdb { class DuckLakeCatalog; struct DuckLakeSnapshotInfo; +struct DuckLakeTag; + +//! Validate and canonicalize a single (key, value) DuckLake config option. +//! Shared by `CALL ducklake.set_option(...)` and CREATE TABLE / CTAS `WITH (...)`. +DuckLakeTag ValidateDuckLakeConfigOption(ClientContext &context, const string &option_key, const Value &val); class DuckLakeTableFunctionUtil { public: diff --git a/src/include/storage/ducklake_insert.hpp b/src/include/storage/ducklake_insert.hpp index 4bd5ad493d3..e2c33c173e6 100644 --- a/src/include/storage/ducklake_insert.hpp +++ b/src/include/storage/ducklake_insert.hpp @@ -14,6 +14,7 @@ #include "duckdb/common/index_vector.hpp" #include "storage/ducklake_stats.hpp" #include "common/ducklake_data_file.hpp" +#include "common/ducklake_options.hpp" #include "storage/ducklake_field_data.hpp" #include "storage/ducklake_partition_data.hpp" #include "storage/ducklake_sort_data.hpp" @@ -22,6 +23,7 @@ namespace duckdb { class DuckLakeCatalog; class DuckLakeSchemaEntry; class DuckLakeTableEntry; +class ParsedExpression; class DuckLakeFieldData; struct DuckLakeCopyOptions; struct DuckLakeCopyInput; @@ -153,10 +155,18 @@ struct DuckLakeCopyInput { //! CTAS-flavored: take the field-id mapping and (optional) partition spec from the planning-time-built //! spec rather than from a DuckLakeTableEntry that hasn't been created yet. field_data is required //! because the parquet writer always needs it; partition_data is null when CREATE TABLE AS has no - //! inline PARTITIONED BY clause. + //! inline PARTITIONED BY clause. `create_with_options` is the raw `WITH (...)` clause from the parser; + //! it is validated and surfaced through `with_options` so the parquet write honors it. DuckLakeCopyInput(ClientContext &context, DuckLakeSchemaEntry &schema, const ColumnList &columns, const string &data_path_p, DuckLakeFieldData &field_data, - optional_ptr partition_data = nullptr); + optional_ptr partition_data = nullptr, + const case_insensitive_map_t> &create_with_options = {}); + + //! Look up an effective DuckLake config option for this write: returns the WITH override if one is + //! present in `with_options`, otherwise falls through to the catalog's table/schema/global lookup. + //! Used by PlanCopyForInsert so a CTAS write whose new table_id is not yet allocated still picks + //! up the user's WITH (...) values. + bool GetEffectiveOption(const string &key, string &out) const; DuckLakeCatalog &catalog; optional_ptr partition_data; @@ -168,6 +178,9 @@ struct DuckLakeCopyInput { TableIndex table_id; InsertVirtualColumns virtual_columns = InsertVirtualColumns::NONE; optional_idx get_table_index; + //! CREATE TABLE / CTAS WITH (...) options that override same-key catalog options for this write. + //! Empty for plain INSERT. Validated at plan time before reaching here. + option_map_t with_options; }; } // namespace duckdb diff --git a/src/include/storage/ducklake_metadata_manager.hpp b/src/include/storage/ducklake_metadata_manager.hpp index 90206e1f3ce..5192794220c 100644 --- a/src/include/storage/ducklake_metadata_manager.hpp +++ b/src/include/storage/ducklake_metadata_manager.hpp @@ -174,6 +174,7 @@ class DuckLakeMetadataManager { virtual string WriteNewPartitionKeys(DuckLakeSnapshot commit_snapshot, const vector &new_partitions); virtual string WriteNewSortKeys(DuckLakeSnapshot commit_snapshot, const vector &new_sorts); + virtual string WriteNewTableConfigOptions(const vector &new_options); virtual string WriteDroppedColumns(const vector &dropped_columns); virtual string WriteNewColumns(const vector &new_columns); virtual string WriteNewTags(const vector &new_tags); diff --git a/src/include/storage/ducklake_table_entry.hpp b/src/include/storage/ducklake_table_entry.hpp index 8dc63131f21..d1d2c082db8 100644 --- a/src/include/storage/ducklake_table_entry.hpp +++ b/src/include/storage/ducklake_table_entry.hpp @@ -84,6 +84,13 @@ class DuckLakeTableEntry : public TableCatalogEntry { optional_ptr GetFieldId(FieldIndex field_index) const; void SetPartitionData(unique_ptr partition_data); void SetSortData(unique_ptr sort_data); + //! Pending table-scoped config options collected from a CREATE TABLE / CTAS WITH (...) clause. + void SetPendingTableOptions(vector options) { + pending_table_options = std::move(options); + } + const vector &GetPendingTableOptions() const { + return pending_table_options; + } shared_ptr GetTableStats(ClientContext &context); shared_ptr GetTableStats(DuckLakeTransaction &transaction); idx_t GetNetDataFileRowCount(DuckLakeTransaction &transaction); @@ -187,6 +194,8 @@ class DuckLakeTableEntry : public TableCatalogEntry { unique_ptr sort_data; // only set for REMOVED_COLUMN unique_ptr changed_fields; + // table-scoped config options collected from CREATE TABLE / CTAS WITH (...), pending until commit + vector pending_table_options; }; } // namespace duckdb diff --git a/src/storage/ducklake_catalog.cpp b/src/storage/ducklake_catalog.cpp index ec77b4bfed4..2ed27012ce3 100644 --- a/src/storage/ducklake_catalog.cpp +++ b/src/storage/ducklake_catalog.cpp @@ -157,12 +157,8 @@ optional_ptr DuckLakeCatalog::CreateSchema(CatalogTransaction tran } ErrorData DuckLakeCatalog::SupportsCreateTable(BoundCreateTableInfo &info) { - auto &base = info.Base().Cast(); - if (!base.options.empty()) { - return ErrorData( - ExceptionType::CATALOG, - StringUtil::Format("WITH clause is not supported for tables in a %s catalog", GetCatalogType())); - } + // DuckLake handles PARTITIONED BY, SORTED BY, and WITH (...) itself in DuckLakeSchemaEntry, + // so suppress the base-class rejection of all three. return ErrorData(); } diff --git a/src/storage/ducklake_insert.cpp b/src/storage/ducklake_insert.cpp index 81f4375d70d..ac12159b77d 100644 --- a/src/storage/ducklake_insert.cpp +++ b/src/storage/ducklake_insert.cpp @@ -10,11 +10,13 @@ #include "storage/ducklake_geo_stats.hpp" #include "common/ducklake_types.hpp" #include "functions/ducklake_compaction_functions.hpp" +#include "functions/ducklake_table_functions.hpp" #include "duckdb/catalog/catalog_entry/copy_function_catalog_entry.hpp" #include "duckdb/execution/operator/order/physical_order.hpp" #include "duckdb/execution/operator/projection/physical_projection.hpp" #include "duckdb/execution/operator/scan/physical_table_scan.hpp" +#include "duckdb/parser/expression/constant_expression.hpp" #include "duckdb/parser/parser.hpp" #include "duckdb/planner/binder.hpp" #include "duckdb/planner/expression_binder.hpp" @@ -350,16 +352,43 @@ DuckLakeCopyInput::DuckLakeCopyInput(ClientContext &context, DuckLakeTableEntry schema_id = table.ParentSchema().Cast().GetSchemaId(); table_id = table.GetTableId(); encryption_key = catalog.GenerateEncryptionKey(context); + // Surface CREATE TABLE / CTAS WITH (...) options for transaction-local tables: the catalog + // is not mutated for transaction-local ids, so the only source of truth is the table entry. + for (auto &tag : table.GetPendingTableOptions()) { + with_options[tag.key] = tag.value; + } } DuckLakeCopyInput::DuckLakeCopyInput(ClientContext &context, DuckLakeSchemaEntry &schema, const ColumnList &columns, const string &data_path_p, DuckLakeFieldData &field_data_p, - optional_ptr partition_data_p) + optional_ptr partition_data_p, + const case_insensitive_map_t> &create_with_options) : catalog(schema.ParentCatalog().Cast()), columns(columns), data_path(data_path_p) { field_data = &field_data_p; partition_data = partition_data_p; schema_id = schema.GetSchemaId(); encryption_key = catalog.GenerateEncryptionKey(context); + // Validate CREATE TABLE AS ... WITH (key=value, ...) and surface as per-write overrides so the + // parquet files land with the requested compression / row group size, even though the new table_id + // won't be allocated until DuckLakeInsert::GetGlobalSinkState (long after this plan is built). + for (auto &option : create_with_options) { + if (!option.second || option.second->GetExpressionClass() != ExpressionClass::CONSTANT) { + throw BinderException( + "WITH option \"%s\" must be a constant expression in DuckLake CREATE TABLE / CTAS", option.first); + } + auto &constant_expr = option.second->Cast(); + auto tag = ValidateDuckLakeConfigOption(context, option.first, constant_expr.value); + with_options[tag.key] = tag.value; + } +} + +bool DuckLakeCopyInput::GetEffectiveOption(const string &key, string &out) const { + auto it = with_options.find(key); + if (it != with_options.end()) { + out = it->second; + return true; + } + return catalog.TryGetConfigOption(key, out, schema_id, table_id); } static void StripTrailingSeparator(FileSystem &fs, string &path) { @@ -502,32 +531,38 @@ DuckLakeCopyOptions DuckLakeInsert::GetCopyOptions(ClientContext &context, DuckL auto &schema_id = copy_input.schema_id; auto &table_id = copy_input.table_id; string parquet_compression; - if (catalog.TryGetConfigOption("parquet_compression", parquet_compression, schema_id, table_id)) { + if (copy_input.GetEffectiveOption("parquet_compression", parquet_compression)) { info->options["compression"].emplace_back(parquet_compression); } string parquet_version; - if (catalog.TryGetConfigOption("parquet_version", parquet_version, schema_id, table_id)) { + if (copy_input.GetEffectiveOption("parquet_version", parquet_version)) { info->options["parquet_version"].emplace_back(parquet_version); } string parquet_compression_level; - if (catalog.TryGetConfigOption("parquet_compression_level", parquet_compression_level, schema_id, table_id)) { + if (copy_input.GetEffectiveOption("parquet_compression_level", parquet_compression_level)) { info->options["compression_level"].emplace_back(parquet_compression_level); } string row_group_size; - if (catalog.TryGetConfigOption("parquet_row_group_size", row_group_size, schema_id, table_id)) { + if (copy_input.GetEffectiveOption("parquet_row_group_size", row_group_size)) { info->options["row_group_size"].emplace_back(row_group_size); } string row_group_size_bytes; - if (catalog.TryGetConfigOption("parquet_row_group_size_bytes", row_group_size_bytes, schema_id, table_id)) { + if (copy_input.GetEffectiveOption("parquet_row_group_size_bytes", row_group_size_bytes)) { info->options["row_group_size_bytes"].emplace_back(row_group_size_bytes + " bytes"); } string per_thread_output_str; bool per_thread_output = false; - if (catalog.TryGetConfigOption("per_thread_output", per_thread_output_str, schema_id, table_id)) { + if (copy_input.GetEffectiveOption("per_thread_output", per_thread_output_str)) { per_thread_output = per_thread_output_str == "true"; } - idx_t target_file_size = catalog.GetConfigOption("target_file_size", schema_id, table_id, - DuckLakeCatalog::DEFAULT_TARGET_FILE_SIZE); + string target_file_size_str; + idx_t target_file_size = DuckLakeCatalog::DEFAULT_TARGET_FILE_SIZE; + if (copy_input.GetEffectiveOption("target_file_size", target_file_size_str)) { + target_file_size = Value(target_file_size_str).GetValue(); + } else { + target_file_size = + catalog.GetConfigOption("target_file_size", schema_id, table_id, target_file_size); + } // Always use native parquet geometry for writing info->options["geoparquet_version"].emplace_back("NONE"); @@ -901,7 +936,8 @@ PhysicalOperator &DuckLakeCatalog::PlanCreateTableAs(ClientContext &context, Phy auto table_data_path = duck_schema.DataPath() + DuckLakeCatalog::GeneratePathFromName(table_uuid, create_info.table); - DuckLakeCopyInput copy_input(context, duck_schema, columns, table_data_path, *field_data, partition_data.get()); + DuckLakeCopyInput copy_input(context, duck_schema, columns, table_data_path, *field_data, partition_data.get(), + create_info.options); auto &physical_copy = DuckLakeInsert::PlanCopyForInsert(context, planner, copy_input, root.get()); optional_idx insert_partition_id; diff --git a/src/storage/ducklake_metadata_manager.cpp b/src/storage/ducklake_metadata_manager.cpp index 4dd5163a6ae..aeed0a15e20 100644 --- a/src/storage/ducklake_metadata_manager.cpp +++ b/src/storage/ducklake_metadata_manager.cpp @@ -4573,6 +4573,25 @@ WHERE {SNAPSHOT_ID} >= begin_snapshot AND ({SNAPSHOT_ID} < end_snapshot OR end_s return table_sizes; } +string DuckLakeMetadataManager::WriteNewTableConfigOptions(const vector &new_options) { + if (new_options.empty()) { + return string(); + } + // fresh committed table-id from LocalChangeType::CREATED only — no UPSERT needed + string query = "INSERT INTO {METADATA_CATALOG}.ducklake_metadata VALUES "; + for (idx_t i = 0; i < new_options.size(); i++) { + auto &opt = new_options[i]; + D_ASSERT(opt.table_id.IsValid() && !opt.table_id.IsTransactionLocal()); + if (i > 0) { + query += ", "; + } + query += StringUtil::Format("(%s, %s, 'table', %d)", SQLString(opt.option.key), SQLString(opt.option.value), + opt.table_id.index); + } + query += ";\n"; + return query; +} + void DuckLakeMetadataManager::SetConfigOption(const DuckLakeConfigOption &option) { // check if the option already exists auto &option_key = option.option.key; diff --git a/src/storage/ducklake_schema_entry.cpp b/src/storage/ducklake_schema_entry.cpp index b7b03fc962b..875ce91fb0e 100644 --- a/src/storage/ducklake_schema_entry.cpp +++ b/src/storage/ducklake_schema_entry.cpp @@ -5,7 +5,9 @@ #include "duckdb/parser/parsed_data/create_view_info.hpp" #include "duckdb/parser/parsed_data/drop_info.hpp" #include "duckdb/parser/result_modifier.hpp" +#include "duckdb/parser/expression/constant_expression.hpp" #include "duckdb/planner/parsed_data/bound_create_table_info.hpp" +#include "functions/ducklake_table_functions.hpp" #include "storage/ducklake_catalog.hpp" #include "storage/ducklake_table_entry.hpp" #include "storage/ducklake_transaction.hpp" @@ -62,6 +64,27 @@ bool DuckLakeSchemaEntry::HandleCreateConflict(CatalogTransaction transaction, C return true; } +static vector ValidateCreateTableOptions(ClientContext &context, BoundCreateTableInfo &info) { + vector result; + auto &base_info = info.Base(); + if (base_info.options.empty()) { + return result; + } + for (auto &option : base_info.options) { + if (!option.second) { + throw BinderException("WITH option \"%s\" requires a value", option.first); + } + if (option.second->GetExpressionClass() != ExpressionClass::CONSTANT) { + throw BinderException( + "WITH option \"%s\" must be a constant expression in DuckLake CREATE TABLE / CTAS", option.first); + } + auto &constant_expr = option.second->Cast(); + auto tag = ValidateDuckLakeConfigOption(context, option.first, constant_expr.value); + result.push_back(std::move(tag)); + } + return result; +} + optional_ptr DuckLakeSchemaEntry::CreateTableExtended(CatalogTransaction transaction, BoundCreateTableInfo &info, string table_uuid, string table_data_path, unique_ptr prebuilt_partition_data, @@ -78,6 +101,8 @@ DuckLakeSchemaEntry::CreateTableExtended(CatalogTransaction transaction, BoundCr throw BinderException("Column name \"%s\" is reserved by DuckLake for internal use", col.Name()); } } + // validate WITH (...) options before any catalog mutation + auto validated_options = ValidateCreateTableOptions(transaction.GetContext(), info); //! get a local table-id auto table_id = TableIndex(duck_transaction.GetLocalCatalogId()); // generate field ids based on the column ids @@ -114,6 +139,14 @@ DuckLakeSchemaEntry::CreateTableExtended(CatalogTransaction transaction, BoundCr table_entry->SetSortData( DuckLakeTableEntry::BuildSortData(duck_transaction, table_entry->GetColumns(), orders)); } + // stash on the entry; same-transaction writers pick these up via DuckLakeCopyInput, and at commit + // time WriteNewTableConfigOptions persists them under the freshly-allocated committed table_id. + // Routing through the entry (instead of mutating catalog options under the transaction-local id) + // keeps the catalog free of dead entries on rollback, matching how new_tables / local_changes + // already scope transaction state. + if (!validated_options.empty()) { + table_entry->SetPendingTableOptions(validated_options); + } auto result = table_entry.get(); duck_transaction.CreateEntry(std::move(table_entry)); return result; diff --git a/src/storage/ducklake_table_entry.cpp b/src/storage/ducklake_table_entry.cpp index 628b23a1b56..c54790c2d6f 100644 --- a/src/storage/ducklake_table_entry.cpp +++ b/src/storage/ducklake_table_entry.cpp @@ -75,6 +75,7 @@ DuckLakeTableEntry::DuckLakeTableEntry(DuckLakeTableEntry &parent, CreateTableIn if (parent.sort_data) { sort_data = make_uniq(*parent.sort_data); } + pending_table_options = parent.pending_table_options; CheckSupportedTypes(); if (local_change.type == LocalChangeType::ADD_COLUMN) { LogicalIndex new_col_idx(columns.LogicalColumnCount() - 1); @@ -99,6 +100,7 @@ DuckLakeTableEntry::DuckLakeTableEntry(DuckLakeTableEntry &parent, CreateTableIn if (parent.sort_data) { sort_data = make_uniq(*parent.sort_data); } + pending_table_options = parent.pending_table_options; CheckSupportedTypes(); auto changed_id = local_change.field_index; diff --git a/src/storage/ducklake_transaction.cpp b/src/storage/ducklake_transaction.cpp index 13df5788c42..f043dff2b48 100644 --- a/src/storage/ducklake_transaction.cpp +++ b/src/storage/ducklake_transaction.cpp @@ -1522,6 +1522,7 @@ struct NewTableInfo { vector new_columns; vector new_inlined_data_tables; vector new_sort_keys; + vector new_table_options; }; struct NewMacroInfo { @@ -1772,6 +1773,18 @@ void DuckLakeTransaction::GetNewTableInfo(DuckLakeCommitState &commit_state, Duc result.new_sort_keys.push_back(std::move(sort_key)); } } + // CREATE TABLE / CTAS WITH (...) options. AlterEntryInternal drops the CREATED entry from + // the transaction-local set on rename, so a CREATE+RENAME chain arrives here as a single + // RENAMED entry with a still-transaction-local id — emit whenever old_table_id is local. + if (old_table_id.IsTransactionLocal()) { + auto &pending_options = table.GetPendingTableOptions(); + for (auto &tag : pending_options) { + DuckLakeConfigOption opt; + opt.option = tag; + opt.table_id = new_table_id; + result.new_table_options.push_back(std::move(opt)); + } + } break; } default: @@ -2383,6 +2396,11 @@ string DuckLakeTransaction::CommitChanges(DuckLakeCommitState &commit_state, batch_queries += metadata_manager->WriteNewColumns(result.new_columns); batch_queries += metadata_manager->WriteNewInlinedTables(commit_snapshot, result.new_inlined_data_tables); batch_queries += metadata_manager->WriteNewSortKeys(commit_snapshot, result.new_sort_keys); + batch_queries += metadata_manager->WriteNewTableConfigOptions(result.new_table_options); + // re-key in-memory options from the local id to the committed id + for (auto &opt : result.new_table_options) { + ducklake_catalog.SetConfigOption(opt); + } new_tables_result = result.new_tables; new_inlined_data_tables_result = result.new_inlined_data_tables; } diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_errors.test b/test/sql/create_table_inline_partition_sort/create_table_inline_errors.test index 85343564a51..e5bda9a2439 100644 --- a/test/sql/create_table_inline_partition_sort/create_table_inline_errors.test +++ b/test/sql/create_table_inline_partition_sort/create_table_inline_errors.test @@ -28,11 +28,11 @@ CREATE TABLE bad_sort (a INTEGER) SORTED BY (nonexistent) ---- :.*nonexistent.* -# WITH (...) clause is rejected by SupportsCreateTable override (DuckLake has no WITH semantics) +# WITH (...) accepts known DuckLake config option keys; unknown keys are rejected during validation statement error CREATE TABLE bad_with (a INTEGER) WITH (some_option='x') ---- -WITH clause is not supported for tables in a ducklake catalog +:.*Unsupported option.* # unsupported transform function statement error diff --git a/test/sql/settings/create_table_with_options.test b/test/sql/settings/create_table_with_options.test new file mode 100644 index 00000000000..f666f195d1e --- /dev/null +++ b/test/sql/settings/create_table_with_options.test @@ -0,0 +1,144 @@ +# name: test/sql/settings/create_table_with_options.test +# description: Test CREATE TABLE / CTAS with WITH (...) table-scoped config options +# group: [settings] + +require ducklake + +require parquet + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_create_with_options.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_create_with_options_files'); + +# CREATE TABLE with a single WITH option persists to ducklake_metadata +statement ok +CREATE TABLE ducklake.t_compress(i INTEGER) WITH (parquet_compression='zstd'); + +query III +SELECT option_name, value, scope FROM ducklake.options() WHERE option_name='parquet_compression' AND scope='TABLE' AND scope_entry='main.t_compress' +---- +parquet_compression zstd TABLE + +# inserting into the table uses the table-scoped option +statement ok +INSERT INTO ducklake.t_compress SELECT range FROM range(1000) + +query I +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_compress/**') +---- +ZSTD + +# CREATE TABLE with multiple WITH options +statement ok +CREATE TABLE ducklake.t_multi(i INTEGER) WITH (parquet_compression='gzip', parquet_row_group_size=512); + +query III +SELECT option_name, value, scope FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_multi' ORDER BY option_name +---- +parquet_compression gzip TABLE +parquet_row_group_size 512 TABLE + +# CTAS with WITH options - validates the option both takes effect and is persisted +statement ok +SET threads=1 + +statement ok +SET preserve_insertion_order=false + +statement ok +CREATE TABLE ducklake.t_ctas WITH (parquet_compression='gzip', parquet_row_group_size=2048) AS SELECT i, 'hello' || i s FROM range(4000) t(i); + +query III +SELECT option_name, value, scope FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_ctas' ORDER BY option_name +---- +parquet_compression gzip TABLE +parquet_row_group_size 2048 TABLE + +# parquet files written by the CTAS use the configured compression +query I +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_ctas/**') +---- +GZIP + +# 4000 rows at row_group_size=2048 produces at least 2 row groups +query I +SELECT COUNT(DISTINCT row_group_id) >= 2 FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_ctas/**') +---- +true + +# unsupported option key raises an error +statement error +CREATE TABLE ducklake.t_bad(i INTEGER) WITH (not_a_real_option='x'); +---- +Unsupported option + +# bad value also fails (parquet_compression rejects unknown codec) +statement error +CREATE TABLE ducklake.t_bad(i INTEGER) WITH (parquet_compression='bogus'); +---- +Unsupported codec + +statement ok +DETACH ducklake + +# options survive a restart +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_create_with_options.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_create_with_options_files'); + +query I +SELECT value FROM ducklake.options() WHERE option_name='parquet_compression' AND scope='TABLE' AND scope_entry='main.t_compress' +---- +zstd + +# CREATE OR REPLACE swaps in new options for the new table +statement ok +CREATE TABLE ducklake.t_repl(i INTEGER) WITH (parquet_compression='zstd'); + +statement ok +CREATE OR REPLACE TABLE ducklake.t_repl(i INTEGER) WITH (parquet_compression='gzip'); + +query I +SELECT value FROM ducklake.options() WHERE option_name='parquet_compression' AND scope='TABLE' AND scope_entry='main.t_repl' +---- +gzip + +# CREATE TABLE IF NOT EXISTS does not mutate options on an existing table +statement ok +CREATE TABLE ducklake.t_ine(i INTEGER) WITH (parquet_compression='zstd'); + +statement ok +CREATE TABLE IF NOT EXISTS ducklake.t_ine(i INTEGER) WITH (parquet_compression='gzip'); + +query I +SELECT value FROM ducklake.options() WHERE option_name='parquet_compression' AND scope='TABLE' AND scope_entry='main.t_ine' +---- +zstd + +# WITH (...) on a CTAS beats a schema-scope option for the same key +statement ok +CREATE SCHEMA ducklake.s_prec + +statement ok +CALL ducklake.set_option('parquet_compression', 'gzip', schema => 's_prec') + +statement ok +CREATE TABLE ducklake.s_prec.t_prec WITH (parquet_compression='zstd') AS SELECT i FROM range(1000) t(i) + +query I +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/s_prec/t_prec/**') +---- +ZSTD + +# option keys are case-insensitive and the value is canonicalized to lower +statement ok +CREATE TABLE ducklake.t_case(i INTEGER) WITH (PARQUET_COMPRESSION='GZIP') + +query II +SELECT option_name, value FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_case' +---- +parquet_compression gzip + +# non-constant value is rejected at plan time +statement error +CREATE TABLE ducklake.t_nonconst(i INTEGER) WITH (parquet_compression=upper('zstd')) +---- +must be a constant expression diff --git a/test/sql/settings/create_table_with_options_rollback.test b/test/sql/settings/create_table_with_options_rollback.test new file mode 100644 index 00000000000..0df7ee6dbf6 --- /dev/null +++ b/test/sql/settings/create_table_with_options_rollback.test @@ -0,0 +1,59 @@ +# name: test/sql/settings/create_table_with_options_rollback.test +# description: A rolled-back CREATE TABLE WITH (...) must not leak across transactions. +# group: [settings] + +require ducklake + +require parquet + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_with_rollback.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_with_rollback_files'); + +statement ok +SET threads=1 + +statement ok +SET preserve_insertion_order=false + +# create a table with a WITH override, then ROLLBACK. The transaction-local table_id used in this +# transaction is reset for the next DuckLakeTransaction (each starts at TRANSACTION_LOCAL_ID_START), +# so a leaked override under that local id would silently contaminate any subsequent transaction +# whose first new table happens to land on the same local id. +statement ok +BEGIN + +statement ok +CREATE TABLE ducklake.t_rolled (i INTEGER) WITH (parquet_compression='gzip') + +statement ok +INSERT INTO ducklake.t_rolled SELECT i FROM range(100) t(i) + +statement ok +ROLLBACK + +# nothing about the rolled-back table should remain in metadata +query I +SELECT COUNT(*) FROM ducklake.options() WHERE scope='TABLE' +---- +0 + +# next transaction creates a plain table (no WITH) and writes to it BEFORE commit, so the +# INSERT planner looks up options under the transaction-local id. With the leak, that id +# resolves to the rolled-back gzip override; the parquet write would silently be gzip. +statement ok +BEGIN + +statement ok +CREATE TABLE ducklake.t_clean(i INTEGER) + +statement ok +INSERT INTO ducklake.t_clean SELECT i FROM range(2000) t(i) + +statement ok +COMMIT + +# default parquet compression in DuckDB is snappy +query I +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_with_rollback_files/main/t_clean/**') +---- +SNAPPY diff --git a/test/sql/settings/create_table_with_options_transaction.test b/test/sql/settings/create_table_with_options_transaction.test new file mode 100644 index 00000000000..15eb479c475 --- /dev/null +++ b/test/sql/settings/create_table_with_options_transaction.test @@ -0,0 +1,94 @@ +# name: test/sql/settings/create_table_with_options_transaction.test +# description: WITH (...) options on CREATE / CTAS persist correctly across same-transaction ALTER and rename +# group: [settings] + +require ducklake + +require parquet + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_create_with_options_txn.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_create_with_options_txn_files'); + +statement ok +SET threads=1 + +statement ok +SET preserve_insertion_order=false + +# CTAS + RENAME within the same transaction: options must follow the new name and the parquet +# files written during the CTAS must already honor the configured row group size. +statement ok +BEGIN + +statement ok +CREATE TABLE ducklake.t_rename WITH (parquet_compression='gzip', parquet_row_group_size=500) AS SELECT i FROM range(2500) t(i); + +statement ok +ALTER TABLE ducklake.t_rename RENAME TO t_renamed + +statement ok +COMMIT + +query III rowsort +SELECT option_name, value, scope FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_renamed' ORDER BY option_name +---- +parquet_compression gzip TABLE +parquet_row_group_size 500 TABLE + +# no leftover options under the original name +query I +SELECT COUNT(*) FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_rename' +---- +0 + +# data files were written with the configured compression (the CTAS happened before the rename) +query I +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_txn_files/main/**') +---- +GZIP + +# CREATE TABLE + insert + rename within the same transaction +statement ok +BEGIN + +statement ok +CREATE TABLE ducklake.t2(i INTEGER) WITH (parquet_compression='zstd'); + +statement ok +INSERT INTO ducklake.t2 SELECT range FROM range(1000); + +statement ok +ALTER TABLE ducklake.t2 RENAME TO t2_after + +statement ok +COMMIT + +query II +SELECT option_name, value FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t2_after' +---- +parquet_compression zstd + +query I +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_txn_files/main/t2*/**') +---- +ZSTD + +# CREATE TABLE WITH (...) + same-transaction INSERT must apply the override even though the table +# is still transaction-local (committed table_id has not been allocated yet). DuckLakeCopyInput pulls +# the override from DuckLakeTableEntry::pending_table_options. +statement ok +BEGIN + +statement ok +CREATE TABLE ducklake.t_local(i INTEGER) WITH (parquet_compression='gzip') + +statement ok +INSERT INTO ducklake.t_local SELECT i FROM range(1000) t(i) + +statement ok +COMMIT + +query I +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_txn_files/main/t_local/**') +---- +GZIP From 83b82f6b850ccb2cec38a69b05e7252c68364f3c Mon Sep 17 00:00:00 2001 From: Alex-Monahan Date: Mon, 4 May 2026 17:44:24 -0700 Subject: [PATCH 04/12] Apply review feedback: rename to options_in_create_with, support folded expressions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Naming pass — every WITH-clause-related identifier now uses options_in_create_with / OptionsInCreateWith, replacing pending_table_options, GetPendingTableOptions, SetPendingTableOptions, with_options, new_table_options, WriteNewTableConfigOptions, ValidateCreateTableOptions, and create_with_options. Architectural cleanup: - Push WITH-clause validation+population into the schema-flavored DuckLakeCopyInput constructor; PlanCreateTableAs no longer mutates copy_input post-construction. - Replace the inlined get_option lambda in PlanCopyForInsert with a method on DuckLakeCopyInput::GetEffectiveOption (the only inlined-helper lambda in DuckLake's src/ tree before this change). - Mirror the partition_data / sort_data copy guard pattern on the alter constructors: only copy options_in_create_with when the parent has any. Functionality: - Accept foldable expressions (constants, getvariable(...), upper('zstd'), etc.) as WITH (...) values, mirroring upstream CREATE SECRET / ATTACH binding. Use ConstantBinder + ExpressionExecutor::EvaluateScalar; reject unbindable expressions (column refs, subqueries) and prepared-statement parameters. - Lift the raw-options-map validation loop into a single shared free function ValidateOptionsInCreateWith; both call sites collapse to one line each. Tests: - Add cases for getvariable() WITH values, function-call folding (upper('zstd')), unbound column-ref rejection, and the explicit CREATE TABLE WITH + same-txn INSERT scenario. --- src/functions/ducklake_set_option.cpp | 32 +++++++++++++++++++ .../functions/ducklake_table_functions.hpp | 10 ++++++ src/include/storage/ducklake_insert.hpp | 15 +++++---- .../storage/ducklake_metadata_manager.hpp | 2 +- src/include/storage/ducklake_table_entry.hpp | 12 +++---- src/storage/ducklake_insert.cpp | 21 ++++-------- src/storage/ducklake_metadata_manager.cpp | 8 ++--- src/storage/ducklake_schema_entry.cpp | 30 +++-------------- src/storage/ducklake_table_entry.cpp | 8 +++-- src/storage/ducklake_transaction.cpp | 12 +++---- .../settings/create_table_with_options.test | 32 +++++++++++++++++-- ...create_table_with_options_transaction.test | 2 +- 12 files changed, 114 insertions(+), 70 deletions(-) diff --git a/src/functions/ducklake_set_option.cpp b/src/functions/ducklake_set_option.cpp index 83771a1edf7..db3a5fd6d57 100644 --- a/src/functions/ducklake_set_option.cpp +++ b/src/functions/ducklake_set_option.cpp @@ -4,8 +4,40 @@ #include "storage/ducklake_table_entry.hpp" #include "storage/ducklake_schema_entry.hpp" +#include "duckdb/execution/expression_executor.hpp" +#include "duckdb/parser/parsed_expression.hpp" +#include "duckdb/planner/binder.hpp" +#include "duckdb/planner/expression_binder/constant_binder.hpp" + namespace duckdb { +vector ValidateOptionsInCreateWith( + ClientContext &context, const case_insensitive_map_t> &options) { + // Bind via ConstantBinder so each value expression must fold to a literal (no column refs, no + // subqueries) — mirrors upstream ATTACH (bind_attach.cpp) and CREATE SECRET (bind_create.cpp). + // `allow_unfoldable=true` on EvaluateScalar lets volatile functions like random() through; that's + // fine here because we only persist the resulting Value, not the expression tree. + // Each expression is copied before binding because CTAS calls this twice (plan time + sink init) + // over the same options map; consuming the map (the upstream pattern) would break the second call. + auto binder = Binder::CreateBinder(context); + ConstantBinder option_binder(*binder, context, "DuckLake WITH option"); + vector result; + result.reserve(options.size()); + for (auto &option : options) { + if (!option.second) { + throw BinderException("WITH option \"%s\" requires a value", option.first); + } + auto expr_copy = option.second->Copy(); + auto bound_expr = option_binder.Bind(expr_copy); + if (bound_expr->HasParameter()) { + throw ParameterNotResolvedException(); + } + auto value = ExpressionExecutor::EvaluateScalar(context, *bound_expr, true); + result.push_back(ValidateDuckLakeConfigOption(context, option.first, value)); + } + return result; +} + struct DuckLakeSetOptionData : public TableFunctionData { DuckLakeSetOptionData(Catalog &catalog, DuckLakeConfigOption option_p) : catalog(catalog), option(std::move(option_p)) { diff --git a/src/include/functions/ducklake_table_functions.hpp b/src/include/functions/ducklake_table_functions.hpp index 571c8d73687..5aeb385ca5b 100644 --- a/src/include/functions/ducklake_table_functions.hpp +++ b/src/include/functions/ducklake_table_functions.hpp @@ -14,6 +14,7 @@ namespace duckdb { class DuckLakeCatalog; +class ParsedExpression; struct DuckLakeSnapshotInfo; struct DuckLakeTag; @@ -21,6 +22,15 @@ struct DuckLakeTag; //! Shared by `CALL ducklake.set_option(...)` and CREATE TABLE / CTAS `WITH (...)`. DuckLakeTag ValidateDuckLakeConfigOption(ClientContext &context, const string &option_key, const Value &val); +//! Bind, fold, and validate every entry in a CREATE TABLE / CTAS `WITH (...)` options map. +//! Each value expression must fold to a literal under ConstantBinder (constants, `getvariable(...)`, +//! `upper('zstd')`, etc.); each key/value is then run through ValidateDuckLakeConfigOption. +//! Empty input → empty output. Throws BinderException on any unbindable expression or unknown key. +//! The input map is left intact — copies each expression before binding because CTAS calls this +//! both at plan time (PlanCreateTableAs) and again during sink-state-init (CreateTableExtended). +vector ValidateOptionsInCreateWith( + ClientContext &context, const case_insensitive_map_t> &options); + class DuckLakeTableFunctionUtil { public: // Conform timestamp to ISO-8601 extended format with optional fractional seconds and timezone offset, e.g.: diff --git a/src/include/storage/ducklake_insert.hpp b/src/include/storage/ducklake_insert.hpp index e2c33c173e6..f1b9a4b76fc 100644 --- a/src/include/storage/ducklake_insert.hpp +++ b/src/include/storage/ducklake_insert.hpp @@ -155,17 +155,18 @@ struct DuckLakeCopyInput { //! CTAS-flavored: take the field-id mapping and (optional) partition spec from the planning-time-built //! spec rather than from a DuckLakeTableEntry that hasn't been created yet. field_data is required //! because the parquet writer always needs it; partition_data is null when CREATE TABLE AS has no - //! inline PARTITIONED BY clause. `create_with_options` is the raw `WITH (...)` clause from the parser; - //! it is validated and surfaced through `with_options` so the parquet write honors it. + //! inline PARTITIONED BY clause. `options_in_create_with` is the raw `WITH (...)` clause from the + //! parser; it is validated and surfaced through the `options_in_create_with` map so the parquet + //! write honors it. DuckLakeCopyInput(ClientContext &context, DuckLakeSchemaEntry &schema, const ColumnList &columns, const string &data_path_p, DuckLakeFieldData &field_data, optional_ptr partition_data = nullptr, - const case_insensitive_map_t> &create_with_options = {}); + const case_insensitive_map_t> &options_in_create_with = {}); //! Look up an effective DuckLake config option for this write: returns the WITH override if one is - //! present in `with_options`, otherwise falls through to the catalog's table/schema/global lookup. - //! Used by PlanCopyForInsert so a CTAS write whose new table_id is not yet allocated still picks - //! up the user's WITH (...) values. + //! present in `options_in_create_with`, otherwise falls through to the catalog's table/schema/global + //! lookup. Used by PlanCopyForInsert so a CTAS write whose new table_id is not yet allocated still + //! picks up the user's WITH (...) values. bool GetEffectiveOption(const string &key, string &out) const; DuckLakeCatalog &catalog; @@ -180,7 +181,7 @@ struct DuckLakeCopyInput { optional_idx get_table_index; //! CREATE TABLE / CTAS WITH (...) options that override same-key catalog options for this write. //! Empty for plain INSERT. Validated at plan time before reaching here. - option_map_t with_options; + option_map_t options_in_create_with; }; } // namespace duckdb diff --git a/src/include/storage/ducklake_metadata_manager.hpp b/src/include/storage/ducklake_metadata_manager.hpp index 5192794220c..ccfed486648 100644 --- a/src/include/storage/ducklake_metadata_manager.hpp +++ b/src/include/storage/ducklake_metadata_manager.hpp @@ -174,7 +174,7 @@ class DuckLakeMetadataManager { virtual string WriteNewPartitionKeys(DuckLakeSnapshot commit_snapshot, const vector &new_partitions); virtual string WriteNewSortKeys(DuckLakeSnapshot commit_snapshot, const vector &new_sorts); - virtual string WriteNewTableConfigOptions(const vector &new_options); + virtual string WriteOptionsInCreateWith(const vector &options_in_create_with); virtual string WriteDroppedColumns(const vector &dropped_columns); virtual string WriteNewColumns(const vector &new_columns); virtual string WriteNewTags(const vector &new_tags); diff --git a/src/include/storage/ducklake_table_entry.hpp b/src/include/storage/ducklake_table_entry.hpp index d1d2c082db8..5cd952aa669 100644 --- a/src/include/storage/ducklake_table_entry.hpp +++ b/src/include/storage/ducklake_table_entry.hpp @@ -84,12 +84,12 @@ class DuckLakeTableEntry : public TableCatalogEntry { optional_ptr GetFieldId(FieldIndex field_index) const; void SetPartitionData(unique_ptr partition_data); void SetSortData(unique_ptr sort_data); - //! Pending table-scoped config options collected from a CREATE TABLE / CTAS WITH (...) clause. - void SetPendingTableOptions(vector options) { - pending_table_options = std::move(options); + //! Table-scoped config options collected from a CREATE TABLE / CTAS WITH (...) clause. + void SetOptionsInCreateWith(vector options) { + options_in_create_with = std::move(options); } - const vector &GetPendingTableOptions() const { - return pending_table_options; + const vector &GetOptionsInCreateWith() const { + return options_in_create_with; } shared_ptr GetTableStats(ClientContext &context); shared_ptr GetTableStats(DuckLakeTransaction &transaction); @@ -195,7 +195,7 @@ class DuckLakeTableEntry : public TableCatalogEntry { // only set for REMOVED_COLUMN unique_ptr changed_fields; // table-scoped config options collected from CREATE TABLE / CTAS WITH (...), pending until commit - vector pending_table_options; + vector options_in_create_with; }; } // namespace duckdb diff --git a/src/storage/ducklake_insert.cpp b/src/storage/ducklake_insert.cpp index ac12159b77d..5e5da2f8c46 100644 --- a/src/storage/ducklake_insert.cpp +++ b/src/storage/ducklake_insert.cpp @@ -16,7 +16,6 @@ #include "duckdb/execution/operator/order/physical_order.hpp" #include "duckdb/execution/operator/projection/physical_projection.hpp" #include "duckdb/execution/operator/scan/physical_table_scan.hpp" -#include "duckdb/parser/expression/constant_expression.hpp" #include "duckdb/parser/parser.hpp" #include "duckdb/planner/binder.hpp" #include "duckdb/planner/expression_binder.hpp" @@ -354,15 +353,15 @@ DuckLakeCopyInput::DuckLakeCopyInput(ClientContext &context, DuckLakeTableEntry encryption_key = catalog.GenerateEncryptionKey(context); // Surface CREATE TABLE / CTAS WITH (...) options for transaction-local tables: the catalog // is not mutated for transaction-local ids, so the only source of truth is the table entry. - for (auto &tag : table.GetPendingTableOptions()) { - with_options[tag.key] = tag.value; + for (auto &tag : table.GetOptionsInCreateWith()) { + options_in_create_with[tag.key] = tag.value; } } DuckLakeCopyInput::DuckLakeCopyInput(ClientContext &context, DuckLakeSchemaEntry &schema, const ColumnList &columns, const string &data_path_p, DuckLakeFieldData &field_data_p, optional_ptr partition_data_p, - const case_insensitive_map_t> &create_with_options) + const case_insensitive_map_t> &options_in_create_with) : catalog(schema.ParentCatalog().Cast()), columns(columns), data_path(data_path_p) { field_data = &field_data_p; partition_data = partition_data_p; @@ -371,20 +370,14 @@ DuckLakeCopyInput::DuckLakeCopyInput(ClientContext &context, DuckLakeSchemaEntry // Validate CREATE TABLE AS ... WITH (key=value, ...) and surface as per-write overrides so the // parquet files land with the requested compression / row group size, even though the new table_id // won't be allocated until DuckLakeInsert::GetGlobalSinkState (long after this plan is built). - for (auto &option : create_with_options) { - if (!option.second || option.second->GetExpressionClass() != ExpressionClass::CONSTANT) { - throw BinderException( - "WITH option \"%s\" must be a constant expression in DuckLake CREATE TABLE / CTAS", option.first); - } - auto &constant_expr = option.second->Cast(); - auto tag = ValidateDuckLakeConfigOption(context, option.first, constant_expr.value); - with_options[tag.key] = tag.value; + for (auto &tag : ValidateOptionsInCreateWith(context, options_in_create_with)) { + this->options_in_create_with[tag.key] = tag.value; } } bool DuckLakeCopyInput::GetEffectiveOption(const string &key, string &out) const { - auto it = with_options.find(key); - if (it != with_options.end()) { + auto it = options_in_create_with.find(key); + if (it != options_in_create_with.end()) { out = it->second; return true; } diff --git a/src/storage/ducklake_metadata_manager.cpp b/src/storage/ducklake_metadata_manager.cpp index aeed0a15e20..b624f38a8c3 100644 --- a/src/storage/ducklake_metadata_manager.cpp +++ b/src/storage/ducklake_metadata_manager.cpp @@ -4573,14 +4573,14 @@ WHERE {SNAPSHOT_ID} >= begin_snapshot AND ({SNAPSHOT_ID} < end_snapshot OR end_s return table_sizes; } -string DuckLakeMetadataManager::WriteNewTableConfigOptions(const vector &new_options) { - if (new_options.empty()) { +string DuckLakeMetadataManager::WriteOptionsInCreateWith(const vector &options_in_create_with) { + if (options_in_create_with.empty()) { return string(); } // fresh committed table-id from LocalChangeType::CREATED only — no UPSERT needed string query = "INSERT INTO {METADATA_CATALOG}.ducklake_metadata VALUES "; - for (idx_t i = 0; i < new_options.size(); i++) { - auto &opt = new_options[i]; + for (idx_t i = 0; i < options_in_create_with.size(); i++) { + auto &opt = options_in_create_with[i]; D_ASSERT(opt.table_id.IsValid() && !opt.table_id.IsTransactionLocal()); if (i > 0) { query += ", "; diff --git a/src/storage/ducklake_schema_entry.cpp b/src/storage/ducklake_schema_entry.cpp index 875ce91fb0e..06856717a4f 100644 --- a/src/storage/ducklake_schema_entry.cpp +++ b/src/storage/ducklake_schema_entry.cpp @@ -5,7 +5,6 @@ #include "duckdb/parser/parsed_data/create_view_info.hpp" #include "duckdb/parser/parsed_data/drop_info.hpp" #include "duckdb/parser/result_modifier.hpp" -#include "duckdb/parser/expression/constant_expression.hpp" #include "duckdb/planner/parsed_data/bound_create_table_info.hpp" #include "functions/ducklake_table_functions.hpp" #include "storage/ducklake_catalog.hpp" @@ -64,27 +63,6 @@ bool DuckLakeSchemaEntry::HandleCreateConflict(CatalogTransaction transaction, C return true; } -static vector ValidateCreateTableOptions(ClientContext &context, BoundCreateTableInfo &info) { - vector result; - auto &base_info = info.Base(); - if (base_info.options.empty()) { - return result; - } - for (auto &option : base_info.options) { - if (!option.second) { - throw BinderException("WITH option \"%s\" requires a value", option.first); - } - if (option.second->GetExpressionClass() != ExpressionClass::CONSTANT) { - throw BinderException( - "WITH option \"%s\" must be a constant expression in DuckLake CREATE TABLE / CTAS", option.first); - } - auto &constant_expr = option.second->Cast(); - auto tag = ValidateDuckLakeConfigOption(context, option.first, constant_expr.value); - result.push_back(std::move(tag)); - } - return result; -} - optional_ptr DuckLakeSchemaEntry::CreateTableExtended(CatalogTransaction transaction, BoundCreateTableInfo &info, string table_uuid, string table_data_path, unique_ptr prebuilt_partition_data, @@ -102,7 +80,7 @@ DuckLakeSchemaEntry::CreateTableExtended(CatalogTransaction transaction, BoundCr } } // validate WITH (...) options before any catalog mutation - auto validated_options = ValidateCreateTableOptions(transaction.GetContext(), info); + auto options_in_create_with = ValidateOptionsInCreateWith(transaction.GetContext(), base_info.options); //! get a local table-id auto table_id = TableIndex(duck_transaction.GetLocalCatalogId()); // generate field ids based on the column ids @@ -140,12 +118,12 @@ DuckLakeSchemaEntry::CreateTableExtended(CatalogTransaction transaction, BoundCr DuckLakeTableEntry::BuildSortData(duck_transaction, table_entry->GetColumns(), orders)); } // stash on the entry; same-transaction writers pick these up via DuckLakeCopyInput, and at commit - // time WriteNewTableConfigOptions persists them under the freshly-allocated committed table_id. + // time WriteOptionsInCreateWith persists them under the freshly-allocated committed table_id. // Routing through the entry (instead of mutating catalog options under the transaction-local id) // keeps the catalog free of dead entries on rollback, matching how new_tables / local_changes // already scope transaction state. - if (!validated_options.empty()) { - table_entry->SetPendingTableOptions(validated_options); + if (!options_in_create_with.empty()) { + table_entry->SetOptionsInCreateWith(options_in_create_with); } auto result = table_entry.get(); duck_transaction.CreateEntry(std::move(table_entry)); diff --git a/src/storage/ducklake_table_entry.cpp b/src/storage/ducklake_table_entry.cpp index c54790c2d6f..e45c6852806 100644 --- a/src/storage/ducklake_table_entry.cpp +++ b/src/storage/ducklake_table_entry.cpp @@ -75,7 +75,9 @@ DuckLakeTableEntry::DuckLakeTableEntry(DuckLakeTableEntry &parent, CreateTableIn if (parent.sort_data) { sort_data = make_uniq(*parent.sort_data); } - pending_table_options = parent.pending_table_options; + if (!parent.options_in_create_with.empty()) { + options_in_create_with = parent.options_in_create_with; + } CheckSupportedTypes(); if (local_change.type == LocalChangeType::ADD_COLUMN) { LogicalIndex new_col_idx(columns.LogicalColumnCount() - 1); @@ -100,7 +102,9 @@ DuckLakeTableEntry::DuckLakeTableEntry(DuckLakeTableEntry &parent, CreateTableIn if (parent.sort_data) { sort_data = make_uniq(*parent.sort_data); } - pending_table_options = parent.pending_table_options; + if (!parent.options_in_create_with.empty()) { + options_in_create_with = parent.options_in_create_with; + } CheckSupportedTypes(); auto changed_id = local_change.field_index; diff --git a/src/storage/ducklake_transaction.cpp b/src/storage/ducklake_transaction.cpp index f043dff2b48..1d1ec739ffb 100644 --- a/src/storage/ducklake_transaction.cpp +++ b/src/storage/ducklake_transaction.cpp @@ -1522,7 +1522,7 @@ struct NewTableInfo { vector new_columns; vector new_inlined_data_tables; vector new_sort_keys; - vector new_table_options; + vector options_in_create_with; }; struct NewMacroInfo { @@ -1777,12 +1777,12 @@ void DuckLakeTransaction::GetNewTableInfo(DuckLakeCommitState &commit_state, Duc // the transaction-local set on rename, so a CREATE+RENAME chain arrives here as a single // RENAMED entry with a still-transaction-local id — emit whenever old_table_id is local. if (old_table_id.IsTransactionLocal()) { - auto &pending_options = table.GetPendingTableOptions(); - for (auto &tag : pending_options) { + auto &options_in_create_with = table.GetOptionsInCreateWith(); + for (auto &tag : options_in_create_with) { DuckLakeConfigOption opt; opt.option = tag; opt.table_id = new_table_id; - result.new_table_options.push_back(std::move(opt)); + result.options_in_create_with.push_back(std::move(opt)); } } break; @@ -2396,9 +2396,9 @@ string DuckLakeTransaction::CommitChanges(DuckLakeCommitState &commit_state, batch_queries += metadata_manager->WriteNewColumns(result.new_columns); batch_queries += metadata_manager->WriteNewInlinedTables(commit_snapshot, result.new_inlined_data_tables); batch_queries += metadata_manager->WriteNewSortKeys(commit_snapshot, result.new_sort_keys); - batch_queries += metadata_manager->WriteNewTableConfigOptions(result.new_table_options); + batch_queries += metadata_manager->WriteOptionsInCreateWith(result.options_in_create_with); // re-key in-memory options from the local id to the committed id - for (auto &opt : result.new_table_options) { + for (auto &opt : result.options_in_create_with) { ducklake_catalog.SetConfigOption(opt); } new_tables_result = result.new_tables; diff --git a/test/sql/settings/create_table_with_options.test b/test/sql/settings/create_table_with_options.test index f666f195d1e..2f7c05848b4 100644 --- a/test/sql/settings/create_table_with_options.test +++ b/test/sql/settings/create_table_with_options.test @@ -137,8 +137,34 @@ SELECT option_name, value FROM ducklake.options() WHERE scope='TABLE' AND scope_ ---- parquet_compression gzip -# non-constant value is rejected at plan time +# foldable expressions (function calls) are accepted and folded to a literal +statement ok +CREATE TABLE ducklake.t_fold(i INTEGER) WITH (parquet_compression=upper('zstd')) + +query II +SELECT option_name, value FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_fold' +---- +parquet_compression zstd + +# session variables are read at bind time via getvariable(...) and feed the WITH clause +statement ok +SET VARIABLE an_option = (SELECT 'zstd') + +statement ok +CREATE OR REPLACE TABLE ducklake.t_var WITH (parquet_compression=getvariable('an_option')) AS FROM range(42) + +query II +SELECT option_name, value FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_var' +---- +parquet_compression zstd + +query I +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_var/**') +---- +ZSTD + +# expressions that cannot fold to a literal (e.g. a column reference with no FROM) are rejected statement error -CREATE TABLE ducklake.t_nonconst(i INTEGER) WITH (parquet_compression=upper('zstd')) +CREATE TABLE ducklake.t_bad_col(i INTEGER) WITH (parquet_compression=some_unknown_column) ---- -must be a constant expression +some_unknown_column diff --git a/test/sql/settings/create_table_with_options_transaction.test b/test/sql/settings/create_table_with_options_transaction.test index 15eb479c475..9bd9fc24e98 100644 --- a/test/sql/settings/create_table_with_options_transaction.test +++ b/test/sql/settings/create_table_with_options_transaction.test @@ -75,7 +75,7 @@ ZSTD # CREATE TABLE WITH (...) + same-transaction INSERT must apply the override even though the table # is still transaction-local (committed table_id has not been allocated yet). DuckLakeCopyInput pulls -# the override from DuckLakeTableEntry::pending_table_options. +# the override from DuckLakeTableEntry::options_in_create_with. statement ok BEGIN From 3b4fe7f46f6a8728d47bd76a3e85fe0e8a67db52 Mon Sep 17 00:00:00 2001 From: Alex-Monahan Date: Mon, 4 May 2026 17:50:10 -0700 Subject: [PATCH 05/12] formatting fixes --- src/functions/ducklake_set_option.cpp | 7 +++---- src/include/functions/ducklake_table_functions.hpp | 4 ++-- src/storage/ducklake_insert.cpp | 3 +-- src/storage/ducklake_transaction.cpp | 4 ++-- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/functions/ducklake_set_option.cpp b/src/functions/ducklake_set_option.cpp index db3a5fd6d57..eda3cd0eccd 100644 --- a/src/functions/ducklake_set_option.cpp +++ b/src/functions/ducklake_set_option.cpp @@ -11,8 +11,8 @@ namespace duckdb { -vector ValidateOptionsInCreateWith( - ClientContext &context, const case_insensitive_map_t> &options) { +vector ValidateOptionsInCreateWith(ClientContext &context, + const case_insensitive_map_t> &options) { // Bind via ConstantBinder so each value expression must fold to a literal (no column refs, no // subqueries) — mirrors upstream ATTACH (bind_attach.cpp) and CREATE SECRET (bind_create.cpp). // `allow_unfoldable=true` on EvaluateScalar lets volatile functions like random() through; that's @@ -136,8 +136,7 @@ static unique_ptr DuckLakeSetOptionBind(ClientContext &context, Ta vector &return_types, vector &names) { auto &catalog = DuckLakeBaseMetadataFunction::GetCatalog(context, input.inputs[0]); DuckLakeConfigOption config_option; - config_option.option = - ValidateDuckLakeConfigOption(context, StringValue::Get(input.inputs[1]), input.inputs[2]); + config_option.option = ValidateDuckLakeConfigOption(context, StringValue::Get(input.inputs[1]), input.inputs[2]); auto &option = config_option.option.key; // read the scope diff --git a/src/include/functions/ducklake_table_functions.hpp b/src/include/functions/ducklake_table_functions.hpp index 5aeb385ca5b..639136061e9 100644 --- a/src/include/functions/ducklake_table_functions.hpp +++ b/src/include/functions/ducklake_table_functions.hpp @@ -28,8 +28,8 @@ DuckLakeTag ValidateDuckLakeConfigOption(ClientContext &context, const string &o //! Empty input → empty output. Throws BinderException on any unbindable expression or unknown key. //! The input map is left intact — copies each expression before binding because CTAS calls this //! both at plan time (PlanCreateTableAs) and again during sink-state-init (CreateTableExtended). -vector ValidateOptionsInCreateWith( - ClientContext &context, const case_insensitive_map_t> &options); +vector ValidateOptionsInCreateWith(ClientContext &context, + const case_insensitive_map_t> &options); class DuckLakeTableFunctionUtil { public: diff --git a/src/storage/ducklake_insert.cpp b/src/storage/ducklake_insert.cpp index 5e5da2f8c46..e260b37e403 100644 --- a/src/storage/ducklake_insert.cpp +++ b/src/storage/ducklake_insert.cpp @@ -553,8 +553,7 @@ DuckLakeCopyOptions DuckLakeInsert::GetCopyOptions(ClientContext &context, DuckL if (copy_input.GetEffectiveOption("target_file_size", target_file_size_str)) { target_file_size = Value(target_file_size_str).GetValue(); } else { - target_file_size = - catalog.GetConfigOption("target_file_size", schema_id, table_id, target_file_size); + target_file_size = catalog.GetConfigOption("target_file_size", schema_id, table_id, target_file_size); } // Always use native parquet geometry for writing diff --git a/src/storage/ducklake_transaction.cpp b/src/storage/ducklake_transaction.cpp index 1d1ec739ffb..35f929d4b3f 100644 --- a/src/storage/ducklake_transaction.cpp +++ b/src/storage/ducklake_transaction.cpp @@ -736,8 +736,8 @@ Connection &DuckLakeTransaction::GetConnection() { bool DuckLakeTransaction::SchemaChangesMade() const { return !new_tables.empty() || !dropped_tables.empty() || new_schemas || !dropped_schemas.empty() || - !dropped_views.empty() || !renamed_views.empty() || !new_macros.empty() || - !dropped_scalar_macros.empty() || !dropped_table_macros.empty(); + !dropped_views.empty() || !renamed_views.empty() || !new_macros.empty() || !dropped_scalar_macros.empty() || + !dropped_table_macros.empty(); } bool DuckLakeTransaction::ChangesMade() const { From 624417e99a9c24600728cf226c2804c0d8aee3b7 Mon Sep 17 00:00:00 2001 From: Alex-Monahan Date: Tue, 5 May 2026 13:18:02 -0700 Subject: [PATCH 06/12] Add combined PARTITIONED BY + SORTED BY + WITH coverage to create_table_with_options.test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two new test cases — one plain CREATE TABLE, one CTAS — exercise all three clauses in the same statement. Verifies: - Parser accepts the three-clause combination (no syntax conflicts). - WITH options (parquet_compression, parquet_row_group_size) persist to ducklake_metadata under the new table_id. - PARTITIONED BY produces hive 'i=N/' directories in the data path; for 20 distinct values we see 20 partition directories. - WITH compression overrides the default (gzip on the plain path, zstd on the CTAS path) and is observed in parquet_metadata(). SORT correctness is exercised here only by not-erroring; full SORT verification lives in test/sql/sorted_table/* and test/sql/create_table_inline_partition_sort/*_sort_verification.test. --- .../settings/create_table_with_options.test | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/test/sql/settings/create_table_with_options.test b/test/sql/settings/create_table_with_options.test index 2f7c05848b4..94bb532a0bb 100644 --- a/test/sql/settings/create_table_with_options.test +++ b/test/sql/settings/create_table_with_options.test @@ -168,3 +168,64 @@ statement error CREATE TABLE ducklake.t_bad_col(i INTEGER) WITH (parquet_compression=some_unknown_column) ---- some_unknown_column + +# All three clauses together: PARTITIONED BY + SORTED BY + WITH on plain CREATE TABLE. +# Verifies the parser accepts the combination, the WITH options are persisted, and a follow-up +# INSERT uses all three (compression from WITH, hive layout from PARTITIONED BY). +statement ok +CREATE TABLE ducklake.t_combo(i INTEGER, s VARCHAR) +PARTITIONED BY (i) +SORTED BY (s) +WITH (parquet_compression='gzip', parquet_row_group_size=2048) + +query III rowsort +SELECT option_name, value, scope FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_combo' ORDER BY option_name +---- +parquet_compression gzip TABLE +parquet_row_group_size 2048 TABLE + +statement ok +INSERT INTO ducklake.t_combo SELECT i, 'row' || i FROM range(20) t(i) + +# follow-up INSERT honors WITH compression +query I +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo/**') +---- +GZIP + +# PARTITIONED BY (i) produces hive-style i=N/ directories, one per distinct i (20 here) +query I +SELECT COUNT(DISTINCT regexp_extract(file_name, 'i=(\d+)', 1)) FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo/**') +---- +20 + +# SORTED BY data ordering is exercised here only via not-erroring; full SORT correctness lives in +# test/sql/sorted_table/* and test/sql/create_table_inline_partition_sort/*_sort_verification.test. + +# All three clauses together on a CTAS — verifies plan-time PARTITIONED BY layout, SORTED BY data +# ordering, and WITH compression all coexist on a CREATE TABLE AS SELECT. +statement ok +CREATE TABLE ducklake.t_combo_ctas +PARTITIONED BY (i) +SORTED BY (s) +WITH (parquet_compression='zstd', parquet_row_group_size=2048) +AS SELECT range AS i, 'row' || range AS s FROM range(20) + +query III rowsort +SELECT option_name, value, scope FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_combo_ctas' ORDER BY option_name +---- +parquet_compression zstd TABLE +parquet_row_group_size 2048 TABLE + +# CTAS-written parquet files use the WITH compression +query I +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo_ctas/**') +---- +ZSTD + +# rows landed in their hive partitions (one parquet directory per i value) +query I +SELECT COUNT(DISTINCT regexp_extract(file_name, 'i=(\d+)', 1)) FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo_ctas/**') +---- +20 + From 63b467869d72e157a3c9262e4296cf15e879ab10 Mon Sep 17 00:00:00 2001 From: Alex-Monahan Date: Tue, 5 May 2026 13:29:31 -0700 Subject: [PATCH 07/12] Validate SORTED BY in the combined PARTITIONED BY + SORTED BY + WITH tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both the plain-INSERT and CTAS combined-clause tests now hardcode per-rowgroup stats_min/max for the sort key, mirroring the technique in create_table_inline_ctas_sort_verification.test. Layout: 6144 rows, p = i // 3072 (two partitions), b = 6143 - i (input is b DESCENDING). With parquet_row_group_size=2048 from WITH and preserve_insertion_order=true, each partition file has exactly 2 rowgroups. After SORTED BY (b), per-rowgroup b stats are disjoint and ascending: Partition p=0 (b ∈ [3072, 6143]): rowgroup 0: b ∈ [3072, 5119] rowgroup 1: b ∈ [5120, 6143] Partition p=1 (b ∈ [0, 3071]): rowgroup 0: b ∈ [0, 2047] rowgroup 1: b ∈ [2048, 3071] Without sort, rowgroup 0 of each file would carry the full input range of b (descending), so the disjoint-ascending shape is a tight regression test for 'sort actually reordered rows on disk' on both code paths. --- .../settings/create_table_with_options.test | 88 +++++++++++++++---- 1 file changed, 69 insertions(+), 19 deletions(-) diff --git a/test/sql/settings/create_table_with_options.test b/test/sql/settings/create_table_with_options.test index 94bb532a0bb..4adf367c99b 100644 --- a/test/sql/settings/create_table_with_options.test +++ b/test/sql/settings/create_table_with_options.test @@ -169,13 +169,26 @@ CREATE TABLE ducklake.t_bad_col(i INTEGER) WITH (parquet_compression=some_unknow ---- some_unknown_column +# preserve_insertion_order=true keeps rowgroup boundaries deterministic so we can hardcode +# per-rowgroup stats_min/max below to verify the sort actually reordered rows on disk. +statement ok +SET preserve_insertion_order=true + # All three clauses together: PARTITIONED BY + SORTED BY + WITH on plain CREATE TABLE. -# Verifies the parser accepts the combination, the WITH options are persisted, and a follow-up -# INSERT uses all three (compression from WITH, hive layout from PARTITIONED BY). +# Verifies the parser accepts the combination, the WITH options are persisted, the partition layout +# matches PARTITIONED BY, the WITH compression takes effect, and SORTED BY actually reorders rows +# (per-rowgroup b stats are disjoint and ascending after sort). +# +# Layout (6144 rows, parquet_row_group_size=2048, single thread, preserve_insertion_order=true): +# PARTITIONED BY (p): p = i // 3072 ∈ {0, 1} → 2 hive directories (p=0/, p=1/) +# SORTED BY (b): b = 6143 - i → input is b DESCENDING; after sort, b ASCENDING +# Rowgroups per partition: 3072 rows ÷ 2048 = 2 rowgroups (sizes 2048, 1024) +# Partition p=0: b ∈ [3072, 6143] → rg 0: b ∈ [3072,5119] rg 1: b ∈ [5120,6143] +# Partition p=1: b ∈ [0, 3071] → rg 0: b ∈ [0,2047] rg 1: b ∈ [2048,3071] statement ok -CREATE TABLE ducklake.t_combo(i INTEGER, s VARCHAR) -PARTITIONED BY (i) -SORTED BY (s) +CREATE TABLE ducklake.t_combo(p INTEGER, b INTEGER) +PARTITIONED BY (p) +SORTED BY (b) WITH (parquet_compression='gzip', parquet_row_group_size=2048) query III rowsort @@ -185,31 +198,49 @@ parquet_compression gzip TABLE parquet_row_group_size 2048 TABLE statement ok -INSERT INTO ducklake.t_combo SELECT i, 'row' || i FROM range(20) t(i) +INSERT INTO ducklake.t_combo SELECT i // 3072 AS p, 6143 - i AS b FROM range(6144) t(i) # follow-up INSERT honors WITH compression query I -SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo/**') +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo/**') WHERE path_in_schema = 'b' ---- GZIP -# PARTITIONED BY (i) produces hive-style i=N/ directories, one per distinct i (20 here) +# PARTITIONED BY (p) produces 2 hive directories query I -SELECT COUNT(DISTINCT regexp_extract(file_name, 'i=(\d+)', 1)) FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo/**') +SELECT COUNT(DISTINCT regexp_extract(file_name, 'p=(\d+)', 1)) FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo/**') ---- -20 +2 -# SORTED BY data ordering is exercised here only via not-erroring; full SORT correctness lives in -# test/sql/sorted_table/* and test/sql/create_table_inline_partition_sort/*_sort_verification.test. +# parquet_row_group_size=2048 from WITH produces 2 rowgroups per partition file (4 total) +query I +SELECT COUNT(*) FROM (SELECT DISTINCT file_name, row_group_id FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo/**')) +---- +4 + +# SORTED BY (b) reordered the rows: per-rowgroup b stats are disjoint and ascending within each +# partition file. (Input was b DESCENDING; without sort, rg 0's b would span the full input range.) +query IIII +SELECT regexp_extract(file_name, 'p=(\d+)', 1) AS partition, + row_group_id, stats_min::INTEGER AS rg_min, stats_max::INTEGER AS rg_max +FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo/**') +WHERE path_in_schema = 'b' +ORDER BY partition, row_group_id +---- +0 0 3072 5119 +0 1 5120 6143 +1 0 0 2047 +1 1 2048 3071 # All three clauses together on a CTAS — verifies plan-time PARTITIONED BY layout, SORTED BY data # ordering, and WITH compression all coexist on a CREATE TABLE AS SELECT. +# Same data layout as t_combo (6144 rows, p=i//3072, b=6143-i) but compression=zstd this time. statement ok CREATE TABLE ducklake.t_combo_ctas -PARTITIONED BY (i) -SORTED BY (s) +PARTITIONED BY (p) +SORTED BY (b) WITH (parquet_compression='zstd', parquet_row_group_size=2048) -AS SELECT range AS i, 'row' || range AS s FROM range(20) +AS SELECT i // 3072 AS p, 6143 - i AS b FROM range(6144) t(i) query III rowsort SELECT option_name, value, scope FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_combo_ctas' ORDER BY option_name @@ -219,13 +250,32 @@ parquet_row_group_size 2048 TABLE # CTAS-written parquet files use the WITH compression query I -SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo_ctas/**') +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo_ctas/**') WHERE path_in_schema = 'b' ---- ZSTD -# rows landed in their hive partitions (one parquet directory per i value) +# 2 hive partition directories query I -SELECT COUNT(DISTINCT regexp_extract(file_name, 'i=(\d+)', 1)) FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo_ctas/**') +SELECT COUNT(DISTINCT regexp_extract(file_name, 'p=(\d+)', 1)) FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo_ctas/**') +---- +2 + +# 2 rowgroups per partition file (4 total) +query I +SELECT COUNT(*) FROM (SELECT DISTINCT file_name, row_group_id FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo_ctas/**')) +---- +4 + +# SORTED BY (b) on the CTAS path: same per-rowgroup hardcoded stats as t_combo. +query IIII +SELECT regexp_extract(file_name, 'p=(\d+)', 1) AS partition, + row_group_id, stats_min::INTEGER AS rg_min, stats_max::INTEGER AS rg_max +FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo_ctas/**') +WHERE path_in_schema = 'b' +ORDER BY partition, row_group_id ---- -20 +0 0 3072 5119 +0 1 5120 6143 +1 0 0 2047 +1 1 2048 3071 From 962a8bb0d98917407e480256c1086e5ff2e03ffd Mon Sep 17 00:00:00 2001 From: Alex-Monahan Date: Tue, 5 May 2026 13:32:49 -0700 Subject: [PATCH 08/12] Remove unnecessary comment --- test/sql/settings/create_table_with_options.test | 7 ------- 1 file changed, 7 deletions(-) diff --git a/test/sql/settings/create_table_with_options.test b/test/sql/settings/create_table_with_options.test index 4adf367c99b..b71b67e3cf4 100644 --- a/test/sql/settings/create_table_with_options.test +++ b/test/sql/settings/create_table_with_options.test @@ -178,13 +178,6 @@ SET preserve_insertion_order=true # Verifies the parser accepts the combination, the WITH options are persisted, the partition layout # matches PARTITIONED BY, the WITH compression takes effect, and SORTED BY actually reorders rows # (per-rowgroup b stats are disjoint and ascending after sort). -# -# Layout (6144 rows, parquet_row_group_size=2048, single thread, preserve_insertion_order=true): -# PARTITIONED BY (p): p = i // 3072 ∈ {0, 1} → 2 hive directories (p=0/, p=1/) -# SORTED BY (b): b = 6143 - i → input is b DESCENDING; after sort, b ASCENDING -# Rowgroups per partition: 3072 rows ÷ 2048 = 2 rowgroups (sizes 2048, 1024) -# Partition p=0: b ∈ [3072, 6143] → rg 0: b ∈ [3072,5119] rg 1: b ∈ [5120,6143] -# Partition p=1: b ∈ [0, 3071] → rg 0: b ∈ [0,2047] rg 1: b ∈ [2048,3071] statement ok CREATE TABLE ducklake.t_combo(p INTEGER, b INTEGER) PARTITIONED BY (p) From 3ac58257557be58a9d123f92d648eb2aaae62eed Mon Sep 17 00:00:00 2001 From: Alex-Monahan Date: Thu, 4 Jun 2026 11:34:57 -0700 Subject: [PATCH 09/12] Fix bad conflict resolution from github.com web merge The web-based merge of v1.5-variegata (2f205dbb) resolved the conflict in ducklake_transaction.cpp by keeping a 295-line block (NewTableInfo, CancelOrDropField, RenameEmittedEntry, HandleChangedFields, GetNewTableInfo) that upstream had refactored into ducklake_commit_state.hpp / ducklake_transaction_state.cpp, causing duplicate definitions and breaking compilation. Remove the obsolete block and port the one piece the upstream refactor did not carry over: the CREATED-case handling that persists partition and sort metadata for CREATE TABLE ... PARTITIONED BY / SORTED BY, now in DuckLakeTransactionState::GetNewTableInfo. Also includes make-format output limited to code added by this PR. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../ducklake_compaction_functions.cpp | 3 +- src/storage/ducklake_transaction.cpp | 295 ------------------ src/storage/ducklake_transaction_state.cpp | 12 + .../create_table_inline_transactions.test | 3 +- .../create_table_inline_with_inlining.test | 3 +- 5 files changed, 17 insertions(+), 299 deletions(-) diff --git a/src/functions/ducklake_compaction_functions.cpp b/src/functions/ducklake_compaction_functions.cpp index dcf8ecf10c8..f79ae1ddb08 100644 --- a/src/functions/ducklake_compaction_functions.cpp +++ b/src/functions/ducklake_compaction_functions.cpp @@ -69,8 +69,7 @@ vector DuckLakeCompactor::BindSortOrders(Binder &binder, const return orders; } -vector DuckLakeCompactor::BindSortOrders(Binder &binder, DuckLakeTableEntry &table, - idx_t table_index, +vector DuckLakeCompactor::BindSortOrders(Binder &binder, DuckLakeTableEntry &table, idx_t table_index, vector &pre_bound_orders) { return BindSortOrders(binder, table.GetColumns(), table.name, TableIndex(table_index), pre_bound_orders); } diff --git a/src/storage/ducklake_transaction.cpp b/src/storage/ducklake_transaction.cpp index b84febadf5f..a8aeb2f7e0a 100644 --- a/src/storage/ducklake_transaction.cpp +++ b/src/storage/ducklake_transaction.cpp @@ -1130,301 +1130,6 @@ DuckLakeTableInfo DuckLakeTransaction::GetNewTable(DuckLakeCommitState &commit_s return table_entry; } -struct NewTableInfo { - vector new_tables; - vector new_views; - vector new_partition_keys; - vector new_tags; - vector new_column_tags; - vector dropped_columns; - vector new_columns; - vector new_inlined_data_tables; - vector new_sort_keys; -}; - -struct NewMacroInfo { - vector new_macros; -}; - -void CancelOrDropField(TableIndex table_id, FieldIndex field_id, NewTableInfo &result, - unordered_map &txn_added_fields) { - auto it = txn_added_fields.find(field_id.index); - if (it != txn_added_fields.end()) { - auto erased_idx = it->second; - result.new_columns.erase(result.new_columns.begin() + erased_idx); - txn_added_fields.erase(it); - for (auto &entry : txn_added_fields) { - if (entry.second > erased_idx) { - entry.second--; - } - } - return; - } - // Column was not added in the same transaction, we emit a drop. - DuckLakeDroppedColumn dropped_col; - dropped_col.table_id = table_id; - dropped_col.field_id = field_id; - result.dropped_columns.push_back(dropped_col); -} - -template -void RenameEmittedEntry(vector &entries, TableIndex remapped_id, const string &new_name) { - for (auto &entry : entries) { - if (entry.id == remapped_id) { - entry.name = new_name; - return; - } - } -} - -void HandleChangedFields(TableIndex table_id, const ColumnChangeInfo &change_info, NewTableInfo &result, - unordered_map &txn_added_fields) { - for (auto &dropped_field_id : change_info.dropped_fields) { - CancelOrDropField(table_id, dropped_field_id, result, txn_added_fields); - } - for (auto &new_col_info : change_info.new_fields) { - DuckLakeNewColumn new_column; - new_column.table_id = table_id; - new_column.column_info = new_col_info.column_info; - new_column.parent_idx = new_col_info.parent_idx; - txn_added_fields[new_col_info.column_info.id.index] = result.new_columns.size(); - result.new_columns.push_back(std::move(new_column)); - } -} - -void DuckLakeTransaction::GetNewTableInfo(DuckLakeCommitState &commit_state, DuckLakeCatalogSet &catalog_set, - reference table_entry, NewTableInfo &result, - TransactionChangeInformation &transaction_changes) { - // iterate over the table chain in reverse order when committing - // the latest entry is the root entry - but we need to commit starting from the first entry written - // gather all tables - vector> tables; - while (true) { - tables.push_back(table_entry.get().Cast()); - if (!table_entry.get().HasChild()) { - break; - } - table_entry = table_entry.get().Child(); - } - - // Maps from field index to the number of alter operations for that field. - map field_alter_count; - // Number of table comment operations. - idx_t comment_count = 0; - // Maps from field index to the number of column comment operations for that field. - map column_comment_count; - for (idx_t table_idx = 0; table_idx < tables.size(); table_idx++) { - auto &table = tables[table_idx].get(); - auto local_change = table.GetLocalChange(); - switch (local_change.type) { - case LocalChangeType::SET_NULL: - case LocalChangeType::DROP_NULL: - case LocalChangeType::RENAME_COLUMN: - case LocalChangeType::SET_DEFAULT: - field_alter_count[local_change.field_index]++; - break; - case LocalChangeType::SET_COMMENT: - comment_count++; - break; - case LocalChangeType::SET_COLUMN_COMMENT: - column_comment_count[local_change.field_index]++; - break; - default: - break; - } - } - - // Maps from field index to the number of alter operations remaining for that field. - map field_alter_remaining(std::move(field_alter_count)); - // Number of table comment operations remaining. - idx_t comment_remaining = comment_count; - // Maps from field index to the number of column comment operations remaining for that field. - map column_comment_remaining(std::move(column_comment_count)); - - // Used to decide whether a column is a newly added column in this transaction. - // Maps from field_index.index to the index of the column in result.new_columns. - unordered_map txn_added_fields; - - // traverse in reverse order - bool column_schema_change = false; - for (idx_t table_idx = tables.size(); table_idx > 0; table_idx--) { - auto &table = tables[table_idx - 1].get(); - auto local_change = table.GetLocalChange(); - auto table_id = table.GetTableId(); - switch (local_change.type) { - case LocalChangeType::SET_PARTITION_KEY: { - auto partition_key = GetNewPartitionKey(commit_state, table); - result.new_partition_keys.push_back(std::move(partition_key)); - - transaction_changes.altered_tables.insert(table_id); - transaction_changes.altered_tables_with_schema_version_changes.insert(table_id); - column_schema_change = true; - break; - } - case LocalChangeType::SET_SORT_KEY: { - auto sort_key = GetNewSortKey(commit_state, table); - result.new_sort_keys.push_back(std::move(sort_key)); - transaction_changes.altered_tables.insert(table_id); - break; - } - case LocalChangeType::SET_COMMENT: { - comment_remaining--; - if (comment_remaining > 0) { - break; - } - DuckLakeTagInfo comment_info; - comment_info.id = commit_state.GetTableId(table).index; - comment_info.key = "comment"; - comment_info.value = table.comment; - result.new_tags.push_back(std::move(comment_info)); - - transaction_changes.altered_tables.insert(table_id); - break; - } - case LocalChangeType::SET_COLUMN_COMMENT: { - auto &col_comment_rem = column_comment_remaining[local_change.field_index]; - col_comment_rem--; - if (col_comment_rem > 0) { - break; - } - DuckLakeColumnTagInfo comment_info; - comment_info.table_id = commit_state.GetTableId(table); - comment_info.field_index = local_change.field_index; - comment_info.key = "comment"; - comment_info.value = table.GetColumnByFieldId(local_change.field_index).Comment(); - result.new_column_tags.push_back(std::move(comment_info)); - - transaction_changes.altered_tables.insert(table_id); - break; - } - case LocalChangeType::SET_NULL: - case LocalChangeType::DROP_NULL: - case LocalChangeType::RENAME_COLUMN: - case LocalChangeType::SET_DEFAULT: { - auto &remaining = field_alter_remaining[local_change.field_index]; - remaining--; - // This is an older thus superseded entry for a field that is altered for multiple times in this - // transaction, so we skip it; the newest entry will emit the definitive value. - if (remaining > 0) { - break; - } - - auto committed_table_id = commit_state.GetTableId(table); - - // Cancel the previous txn-local add for this field, or drop the committed column. - CancelOrDropField(committed_table_id, local_change.field_index, result, txn_added_fields); - - // Insert the new column with the updated info and register it. - DuckLakeNewColumn new_col; - new_col.table_id = committed_table_id; - new_col.column_info = table.GetColumnInfo(local_change.field_index); - txn_added_fields[local_change.field_index.index] = result.new_columns.size(); - result.new_columns.push_back(std::move(new_col)); - - transaction_changes.altered_tables.insert(table_id); - transaction_changes.altered_tables_with_schema_version_changes.insert(table_id); - if (local_change.type == LocalChangeType::RENAME_COLUMN) { - column_schema_change = true; - // persist updated sort expressions (column name was updated in the table entry) - if (table.GetSortData()) { - auto sort_key = GetNewSortKey(commit_state, table); - result.new_sort_keys.push_back(std::move(sort_key)); - } - } - break; - } - case LocalChangeType::REMOVE_COLUMN: - case LocalChangeType::CHANGE_COLUMN_TYPE: { - // drop the indicated column - // note that in case of nested types we might be dropping multiple columns here - HandleChangedFields(commit_state.GetTableId(table), table.GetChangedFields(), result, txn_added_fields); - transaction_changes.altered_tables_with_schema_version_changes.insert(table_id); - column_schema_change = true; - break; - } - case LocalChangeType::ADD_COLUMN: { - // insert the new column - DuckLakeNewColumn new_col; - new_col.table_id = commit_state.GetTableId(table); - new_col.column_info = table.GetAddColumnInfo(); - txn_added_fields[new_col.column_info.id.index] = result.new_columns.size(); - result.new_columns.push_back(std::move(new_col)); - - transaction_changes.altered_tables.insert(table.GetTableId()); - transaction_changes.altered_tables_with_schema_version_changes.insert(table_id); - column_schema_change = true; - break; - } - case LocalChangeType::NONE: - case LocalChangeType::CREATED: - case LocalChangeType::RENAMED: { - auto old_table_id = table.GetTableId(); - if (local_change.type == LocalChangeType::RENAMED && old_table_id.IsTransactionLocal()) { - auto existing = commit_state.committed_tables.find(old_table_id); - if (existing != commit_state.committed_tables.end()) { - // If we are rename a table in the same transaction it was created, we need to patch it - RenameEmittedEntry(result.new_tables, existing->second, table.name); - break; - } - } - auto new_table = GetNewTable(commit_state, table); - auto new_table_id = new_table.id; - result.new_tables.push_back(std::move(new_table)); - - // remap the table in the commit state - commit_state.committed_tables.emplace(old_table_id, new_table_id); - - // create an inlined data table entry with the latest columns - // (uses tables.front() to get the most up-to-date schema including any ALTER TABLE changes) - auto &latest_table = tables.front().get(); - DuckLakeTableInfo inlined_entry; - inlined_entry.id = new_table_id; - inlined_entry.schema_id = latest_table.ParentSchema().Cast().GetSchemaId(); - inlined_entry.uuid = latest_table.GetTableUUID(); - inlined_entry.columns = latest_table.GetTableColumns(); - result.new_inlined_data_tables.push_back(std::move(inlined_entry)); - - // CREATE TABLE ... PARTITIONED BY / SORTED BY - if (local_change.type == LocalChangeType::CREATED) { - if (table.GetPartitionData()) { - auto partition_key = GetNewPartitionKey(commit_state, table); - result.new_partition_keys.push_back(std::move(partition_key)); - } - if (table.GetSortData()) { - auto sort_key = GetNewSortKey(commit_state, table); - result.new_sort_keys.push_back(std::move(sort_key)); - } - } - break; - } - default: - throw NotImplementedException("Unsupported transaction local change"); - } - } - if (column_schema_change) { - // we changed the column definitions of an existing table - we need to create a new inlined data table - // (if data inlining is enabled) - // for newly created tables this is already handled in the CREATED case above - auto &table = tables.front().get(); - auto committed_id = commit_state.GetTableId(table); - bool already_added = false; - for (auto &entry : result.new_inlined_data_tables) { - if (entry.id == committed_id) { - already_added = true; - break; - } - } - if (!already_added) { - DuckLakeTableInfo table_entry; - table_entry.id = committed_id; - table_entry.schema_id = table.ParentSchema().Cast().GetSchemaId(); - table_entry.uuid = table.GetTableUUID(); - table_entry.columns = table.GetTableColumns(); - result.new_inlined_data_tables.push_back(std::move(table_entry)); - } - } -} - DuckLakeViewInfo DuckLakeTransaction::GetNewView(DuckLakeCommitState &commit_state, DuckLakeViewEntry &view) { auto &schema = view.ParentSchema().Cast(); DuckLakeViewInfo view_entry; diff --git a/src/storage/ducklake_transaction_state.cpp b/src/storage/ducklake_transaction_state.cpp index 2b708c3ad82..2cade30015d 100644 --- a/src/storage/ducklake_transaction_state.cpp +++ b/src/storage/ducklake_transaction_state.cpp @@ -1017,6 +1017,18 @@ void DuckLakeTransactionState::GetNewTableInfo(DuckLakeCommitState &commit_state inlined_entry.uuid = latest_table.GetTableUUID(); inlined_entry.columns = latest_table.GetTableColumns(); result.new_inlined_data_tables.push_back(std::move(inlined_entry)); + + // CREATE TABLE ... PARTITIONED BY / SORTED BY + if (local_change.type == LocalChangeType::CREATED) { + if (table.GetPartitionData()) { + auto partition_key = DuckLakeTransaction::GetNewPartitionKey(commit_state, table); + result.new_partition_keys.push_back(std::move(partition_key)); + } + if (table.GetSortData()) { + auto sort_key = DuckLakeTransaction::GetNewSortKey(commit_state, table); + result.new_sort_keys.push_back(std::move(sort_key)); + } + } break; } default: diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test b/test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test index d04656cfc00..06372b84d36 100644 --- a/test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test +++ b/test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test @@ -1,9 +1,10 @@ # name: test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test # description: Transaction semantics for inline PARTITIONED BY / SORTED BY on CREATE TABLE / CTAS: +# group: [create_table_inline_partition_sort] + # rollback discards everything (entry, files, transaction-local ids); concurrent CTAS with # distinct names cannot conflict; concurrent CTAS with the same name produces a commit-time # conflict; -# group: [create_table_inline_partition_sort] require ducklake diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test b/test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test index 7538a72c6da..cd5cb78026d 100644 --- a/test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test +++ b/test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test @@ -1,12 +1,13 @@ # name: test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test # description: CTAS with inline PARTITIONED BY / SORTED BY interacting with the data-inlining row limit. +# group: [create_table_inline_partition_sort] + # When a CTAS's row count is below DATA_INLINING_ROW_LIMIT, rows go to the per-table # ducklake_inlined_data__ catalog table instead of Parquet. Partition and sort # metadata must still be recorded; the inline operator coordinates with the about-to-be-created # table_id at sink-state-init time. Once a flush is called, inlined rows materialize as # partition-aware Parquet files. (On-disk hive directory verification is intentionally omitted - # that's covered by create_table_inline_basic.test / create_table_inline_ctas.test.) -# group: [create_table_inline_partition_sort] require ducklake From 19ded6a6cb08c711e4e36dd7f4d57fca025f8d0a Mon Sep 17 00:00:00 2001 From: Alex-Monahan Date: Thu, 4 Jun 2026 14:27:59 -0700 Subject: [PATCH 10/12] Refactor inline partition/sort tests to avoid multi-table metadata scans The Quack RPC catalog test runner does not support multiple streaming scans in a single query. Resolve table_ids into SQL variables with SET VARIABLE first, then reference them via getvariable() so each metadata query scans only one metadata table. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../create_table_inline_basic.test | 106 ++++++++++++------ .../create_table_inline_ctas.test | 67 +++++++---- .../create_table_inline_errors.test | 12 +- .../create_table_inline_transactions.test | 69 +++++++++--- .../create_table_inline_with_inlining.test | 54 +++++---- 5 files changed, 213 insertions(+), 95 deletions(-) diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_basic.test b/test/sql/create_table_inline_partition_sort/create_table_inline_basic.test index 38b97dd29f4..d008d5e4548 100644 --- a/test/sql/create_table_inline_partition_sort/create_table_inline_basic.test +++ b/test/sql/create_table_inline_partition_sort/create_table_inline_basic.test @@ -30,26 +30,31 @@ SELECT i, v FROM t_part ORDER BY i, v 1 c 2 b +# resolve the table_id in a separate statement - some metadata catalogs (e.g. Quack +# ones) cannot scan two of their tables in a single query +statement ok +SET VARIABLE t_part_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_part') + # verify partition metadata was persisted at CREATE time (no ALTER ran) query I -SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_part') +SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = getvariable('t_part_id') ---- 1 query I -SELECT count(*) FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_part') +SELECT count(*) FROM ducklake_metadata.ducklake_partition_column WHERE table_id = getvariable('t_part_id') ---- 1 # verify the on-disk Parquet files landed in hive-style partition-specific directories # (DATA_INLINING_ROW_LIMIT 0 forces INSERT to write Parquet directly; one file per partition value) query I -SELECT count(*) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_part') +SELECT count(*) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('t_part_id') ---- 2 query I -SELECT DISTINCT regexp_extract(path, '.*(i=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_part') ORDER BY 1 +SELECT DISTINCT regexp_extract(path, '.*(i=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('t_part_id') ORDER BY 1 ---- i=1 i=2 @@ -71,20 +76,23 @@ SELECT i, v FROM t_sort 2 z 3 x +statement ok +SET VARIABLE t_sort_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_sort') + query I -SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_sort') +SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = getvariable('t_sort_id') ---- 1 query II -SELECT sort_key_index, expression FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_sort') ORDER BY sort_key_index +SELECT sort_key_index, expression FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = getvariable('t_sort_id') ORDER BY sort_key_index ---- 0 i # verify the inline path persisted the same defaults a bare ALTER TABLE ... SET SORTED BY (col) would # (the metadata writer collapses OrderType::ASCENDING -> "ASC" and OrderByNullType::ORDER_DEFAULT -> "NULLS_LAST") query III -SELECT sort_direction, null_order, dialect FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_sort') +SELECT sort_direction, null_order, dialect FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = getvariable('t_sort_id') ---- ASC NULLS_LAST duckdb @@ -104,9 +112,12 @@ SELECT a, b, v FROM t_both1 WHERE a = 1 1 20 r 1 30 p +statement ok +SET VARIABLE t_both1_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_both1') + # on-disk: one Parquet file per distinct partition value (a=1 and a=2), each in its own a=N directory query I -SELECT DISTINCT regexp_extract(path, '.*(a=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_both1') ORDER BY 1 +SELECT DISTINCT regexp_extract(path, '.*(a=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('t_both1_id') ORDER BY 1 ---- a=1 a=2 @@ -115,13 +126,16 @@ a=2 statement ok CREATE TABLE t_both2 (a INTEGER, b INTEGER) SORTED BY (b) PARTITIONED BY (a) +statement ok +SET VARIABLE t_both2_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_both2') + query I -SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_both2') +SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = getvariable('t_both2_id') ---- 1 query I -SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_both2') +SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = getvariable('t_both2_id') ---- 1 @@ -129,13 +143,16 @@ SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELE statement ok CREATE TABLE t_multi (a INTEGER, b INTEGER, c INTEGER) PARTITIONED BY (a, b) SORTED BY (c, a) +statement ok +SET VARIABLE t_multi_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_multi') + query I -SELECT count(*) FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_multi') +SELECT count(*) FROM ducklake_metadata.ducklake_partition_column WHERE table_id = getvariable('t_multi_id') ---- 2 query I -SELECT count(*) FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_multi') +SELECT count(*) FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = getvariable('t_multi_id') ---- 2 @@ -143,7 +160,7 @@ statement ok INSERT INTO t_multi VALUES (1, 1, 30), (1, 1, 10), (1, 2, 20), (2, 1, 5) # t_multi is PARTITIONED BY (a, b), so each (a, b) combination lives in its own file. SORTED BY -# (c, a) means rows within a partition's file are (c, a)-ascending. +# (c, a) means rows within a partition's file are (c, a)-ascending. query III SELECT a, b, c FROM t_multi WHERE a = 1 AND b = 1 ---- @@ -154,7 +171,7 @@ SELECT a, b, c FROM t_multi WHERE a = 1 AND b = 1 query II SELECT regexp_extract(path, '.*(a=[0-9]+)[/\\].*', 1) AS a_dir, regexp_extract(path, '.*(b=[0-9]+)[/\\].*', 1) AS b_dir FROM ducklake_metadata.ducklake_data_file -WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_multi') +WHERE table_id = getvariable('t_multi_id') ORDER BY a_dir, b_dir ---- a=1 b=1 @@ -164,7 +181,7 @@ a=2 b=1 # the a directory must come *before* the b directory in the path (PARTITIONED BY (a, b) ordering) query I SELECT count(*) FROM ducklake_metadata.ducklake_data_file -WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_multi') +WHERE table_id = getvariable('t_multi_id') AND regexp_matches(path, 'a=[0-9]+/b=[0-9]+') ---- 3 @@ -173,8 +190,11 @@ WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE ta statement ok CREATE TABLE t_transform (ts TIMESTAMP, v INTEGER) PARTITIONED BY (year(ts)) +statement ok +SET VARIABLE t_transform_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_transform') + query II -SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_transform') +SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = getvariable('t_transform_id') ---- 0 year @@ -183,7 +203,7 @@ INSERT INTO t_transform VALUES (TIMESTAMP '2023-06-01', 1), (TIMESTAMP '2023-09- # year() transform produces hive directories named with the partition key NAME (year, not ts) query I -SELECT DISTINCT regexp_extract(path, '.*(year=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_transform') ORDER BY 1 +SELECT DISTINCT regexp_extract(path, '.*(year=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('t_transform_id') ORDER BY 1 ---- year=2023 year=2024 @@ -192,13 +212,16 @@ year=2024 statement ok CREATE TABLE t_only_part (a INTEGER) PARTITIONED BY (a) +statement ok +SET VARIABLE t_only_part_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_part') + query I -SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_part') +SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = getvariable('t_only_part_id') ---- 1 query I -SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_part') +SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = getvariable('t_only_part_id') ---- 0 @@ -206,7 +229,7 @@ statement ok INSERT INTO t_only_part VALUES (10), (20), (10) query I -SELECT DISTINCT regexp_extract(path, '.*(a=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_part') ORDER BY 1 +SELECT DISTINCT regexp_extract(path, '.*(a=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('t_only_part_id') ORDER BY 1 ---- a=10 a=20 @@ -215,13 +238,16 @@ a=20 statement ok CREATE TABLE t_only_sort (a INTEGER) SORTED BY (a) +statement ok +SET VARIABLE t_only_sort_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_sort') + query I -SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_sort') +SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = getvariable('t_only_sort_id') ---- 0 query I -SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_sort') +SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = getvariable('t_only_sort_id') ---- 1 @@ -229,8 +255,11 @@ SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELE statement ok CREATE TABLE t_bucket (id BIGINT, v VARCHAR) PARTITIONED BY (bucket(8, id)) +statement ok +SET VARIABLE t_bucket_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_bucket') + query II -SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_bucket') +SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = getvariable('t_bucket_id') ---- 0 bucket(8) @@ -240,13 +269,13 @@ INSERT INTO t_bucket VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e'), (6 # bucket() transform writes into hive directories named "bucket=N" (modulo bucket count) # we don't fix specific bucket assignments (hash-dependent) but we assert at least 1 and at most 8 distinct buckets query I -SELECT count(DISTINCT regexp_extract(path, '.*(bucket=[0-9]+)[/\\].*', 1)) BETWEEN 1 AND 8 FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_bucket') +SELECT count(DISTINCT regexp_extract(path, '.*(bucket=[0-9]+)[/\\].*', 1)) BETWEEN 1 AND 8 FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('t_bucket_id') ---- true # every file must live in a bucket=N directory query I -SELECT count(*) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_bucket') AND NOT regexp_matches(path, 'bucket=[0-9]+') +SELECT count(*) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('t_bucket_id') AND NOT regexp_matches(path, 'bucket=[0-9]+') ---- 0 @@ -267,30 +296,35 @@ ALTER TABLE t_create_then_alter SET SORTED BY (b DESC) statement ok COMMIT +statement ok +SET VARIABLE t_create_then_alter_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter') + # exactly one partition_info row (the ALTER's), not two query I -SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter') +SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = getvariable('t_create_then_alter_id') ---- 1 +# resolve the column_id of b separately so the check below only scans one metadata table +statement ok +SET VARIABLE t_create_then_alter_b_col = (SELECT column_id FROM ducklake_metadata.ducklake_column WHERE table_id = getvariable('t_create_then_alter_id') AND column_name = 'b' AND end_snapshot IS NULL) + # the surviving partition row references column b (the ALTER's choice), not a (the inline create table's). query III -SELECT pc.partition_key_index, c.column_name, pc.transform -FROM ducklake_metadata.ducklake_partition_column pc -JOIN ducklake_metadata.ducklake_column c - ON pc.table_id = c.table_id AND pc.column_id = c.column_id AND c.end_snapshot IS NULL -WHERE pc.table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter') +SELECT partition_key_index, column_id = getvariable('t_create_then_alter_b_col'), transform +FROM ducklake_metadata.ducklake_partition_column +WHERE table_id = getvariable('t_create_then_alter_id') ---- -0 b identity +0 true identity # exactly one sort_info row, and its expression is b DESC (the ALTER's), not a query I -SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter') +SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = getvariable('t_create_then_alter_id') ---- 1 query III -SELECT expression, sort_direction, null_order FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter') +SELECT expression, sort_direction, null_order FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = getvariable('t_create_then_alter_id') ---- b DESC NULLS_LAST @@ -299,13 +333,13 @@ statement ok INSERT INTO t_create_then_alter VALUES (1, 100), (2, 100), (3, 200) query I -SELECT DISTINCT regexp_extract(path, '.*(b=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter') ORDER BY 1 +SELECT DISTINCT regexp_extract(path, '.*(b=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('t_create_then_alter_id') ORDER BY 1 ---- b=100 b=200 # none of the files should be under an a=N directory (regression guard for the dedup logic) query I -SELECT count(*) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter') AND regexp_matches(path, 'a=[0-9]+/') +SELECT count(*) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('t_create_then_alter_id') AND regexp_matches(path, 'a=[0-9]+/') ---- 0 diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_ctas.test b/test/sql/create_table_inline_partition_sort/create_table_inline_ctas.test index d93155f7202..64519581ffe 100644 --- a/test/sql/create_table_inline_partition_sort/create_table_inline_ctas.test +++ b/test/sql/create_table_inline_partition_sort/create_table_inline_ctas.test @@ -29,14 +29,19 @@ SELECT i, v FROM ctas_part ORDER BY i 3 3 4 4 +# resolve the table_id in a separate statement - some metadata catalogs (e.g. Quack +# ones) cannot scan two of their tables in a single query +statement ok +SET VARIABLE ctas_part_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_part') + query I -SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_part') +SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = getvariable('ctas_part_id') ---- 1 # CTAS must hive-partition on the initial write: 5 distinct values from range(5) -> 5 hive directories query I -SELECT DISTINCT regexp_extract(path, '.*(i=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_part') ORDER BY 1 +SELECT DISTINCT regexp_extract(path, '.*(i=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('ctas_part_id') ORDER BY 1 ---- i=0 i=1 @@ -64,8 +69,11 @@ SELECT i, j FROM ctas_sort 1 8 0 9 +statement ok +SET VARIABLE ctas_sort_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_sort') + query I -SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_sort') +SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = getvariable('ctas_sort_id') ---- 1 @@ -75,16 +83,22 @@ statement ok CREATE TABLE ctas_both PARTITIONED BY (g) SORTED BY (v) AS SELECT range % 3 AS g, 29 - range AS v FROM range(30) -query II -SELECT - (SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_both')), - (SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_both')) +statement ok +SET VARIABLE ctas_both_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_both') + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = getvariable('ctas_both_id') ---- -1 1 +1 + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = getvariable('ctas_both_id') +---- +1 # 3 distinct partition values -> 3 hive directories query I -SELECT count(DISTINCT regexp_extract(path, '.*(g=[0-9]+)[/\\].*', 1)) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_both') +SELECT count(DISTINCT regexp_extract(path, '.*(g=[0-9]+)[/\\].*', 1)) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('ctas_both_id') ---- 3 @@ -136,15 +150,21 @@ statement ok CREATE TABLE ctas_swap SORTED BY (v) PARTITIONED BY (g) AS SELECT range % 3 AS g, 29 - range AS v FROM range(30) -query II -SELECT - (SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_swap')), - (SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_swap')) +statement ok +SET VARIABLE ctas_swap_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_swap') + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = getvariable('ctas_swap_id') ---- -1 1 +1 query I -SELECT count(DISTINCT regexp_extract(path, '.*(g=[0-9]+)[/\\].*', 1)) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_swap') +SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = getvariable('ctas_swap_id') +---- +1 + +query I +SELECT count(DISTINCT regexp_extract(path, '.*(g=[0-9]+)[/\\].*', 1)) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('ctas_swap_id') ---- 3 @@ -168,14 +188,17 @@ statement ok CREATE TABLE ctas_transform PARTITIONED BY (year(ts)) AS SELECT TIMESTAMP '2024-01-01' + INTERVAL (i) YEAR AS ts, i AS v FROM range(3) t(i) +statement ok +SET VARIABLE ctas_transform_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_transform') + query II -SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_transform') +SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = getvariable('ctas_transform_id') ---- 0 year # CTAS with year() transform must hive-partition on the initial write query I -SELECT DISTINCT regexp_extract(path, '.*(year=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_transform') ORDER BY 1 +SELECT DISTINCT regexp_extract(path, '.*(year=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('ctas_transform_id') ORDER BY 1 ---- year=2024 year=2025 @@ -196,8 +219,11 @@ SELECT a, b FROM ctas_multi 2 tag_2 3 tag_3 +statement ok +SET VARIABLE ctas_multi_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_multi') + query II -SELECT sort_key_index, expression FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_multi') ORDER BY sort_key_index +SELECT sort_key_index, expression FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = getvariable('ctas_multi_id') ORDER BY sort_key_index ---- 0 a 1 b @@ -239,9 +265,12 @@ SELECT g, v FROM ctas_cte WHERE g = 2 2 5 2 8 +statement ok +SET VARIABLE ctas_cte_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_cte') + # CTE-sourced CTAS must still partition on the initial write (regression: planner passes the spec through) query I -SELECT DISTINCT regexp_extract(path, '.*(g=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_cte') ORDER BY 1 +SELECT DISTINCT regexp_extract(path, '.*(g=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('ctas_cte_id') ORDER BY 1 ---- g=0 g=1 diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_errors.test b/test/sql/create_table_inline_partition_sort/create_table_inline_errors.test index 85343564a51..e9d2fa75d61 100644 --- a/test/sql/create_table_inline_partition_sort/create_table_inline_errors.test +++ b/test/sql/create_table_inline_partition_sort/create_table_inline_errors.test @@ -79,8 +79,13 @@ SELECT count(*) FROM ducklake_metadata.ducklake_sort_info statement ok CREATE TABLE ok_after_errors (a INTEGER) PARTITIONED BY (a) +# resolve the table_id in a separate statement - some metadata catalogs (e.g. Quack +# ones) cannot scan two of their tables in a single query +statement ok +SET VARIABLE ok_after_errors_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ok_after_errors') + query I -SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ok_after_errors') +SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = getvariable('ok_after_errors_id') ---- 1 @@ -89,8 +94,11 @@ SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = statement ok CREATE TABLE ok_diff_transforms (ts TIMESTAMP) PARTITIONED BY (year(ts), month(ts)) +statement ok +SET VARIABLE ok_diff_transforms_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ok_diff_transforms') + query II -SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ok_diff_transforms') ORDER BY partition_key_index +SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = getvariable('ok_diff_transforms_id') ORDER BY partition_key_index ---- 0 year 1 month diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test b/test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test index 06372b84d36..f805cfcc661 100644 --- a/test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test +++ b/test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test @@ -4,7 +4,7 @@ # rollback discards everything (entry, files, transaction-local ids); concurrent CTAS with # distinct names cannot conflict; concurrent CTAS with the same name produces a commit-time -# conflict; +# conflict; require ducklake @@ -88,16 +88,21 @@ SELECT * FROM rollback_ctas ---- :.*not exist.* +# resolve the (now nonexistent -> NULL) table_id in a separate statement - some metadata catalogs +# (e.g. Quack ones) cannot scan two of their tables in a single query +statement ok +SET VARIABLE rollback_ctas_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'rollback_ctas') + # verify the metadata catalog never saw the rolled-back partition_info / sort_info rows query I SELECT count(*) FROM ducklake_meta.ducklake_partition_info -WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'rollback_ctas') +WHERE table_id = getvariable('rollback_ctas_id') ---- 0 query I SELECT count(*) FROM ducklake_meta.ducklake_sort_info -WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'rollback_ctas') +WHERE table_id = getvariable('rollback_ctas_id') ---- 0 @@ -149,10 +154,13 @@ statement error con2 COMMIT ---- +statement ok +SET VARIABLE conflict_same_name_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'conflict_same_name') + # con1's table is the surviving one - check its inline partition spec landed query I SELECT count(*) FROM ducklake_meta.ducklake_partition_info -WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'conflict_same_name') +WHERE table_id = getvariable('conflict_same_name_id') ---- 1 @@ -185,21 +193,30 @@ COMMIT statement ok con2 COMMIT +statement ok +SET VARIABLE concurrent_a_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'concurrent_a') + +statement ok +SET VARIABLE concurrent_b_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'concurrent_b') + # both tables exist and have their own partition_info rows -query II -SELECT - (SELECT count(*) FROM ducklake_meta.ducklake_partition_info - WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'concurrent_a')), - (SELECT count(*) FROM ducklake_meta.ducklake_partition_info - WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'concurrent_b')) +query I +SELECT count(*) FROM ducklake_meta.ducklake_partition_info +WHERE table_id = getvariable('concurrent_a_id') ---- -1 1 +1 + +query I +SELECT count(*) FROM ducklake_meta.ducklake_partition_info +WHERE table_id = getvariable('concurrent_b_id') +---- +1 # the two tables have DISTINCT committed partition_ids (proves the commit-time remap deduplicates the # transaction-local ids that started identical at planning time on each connection). query I SELECT count(DISTINCT partition_id) FROM ducklake_meta.ducklake_partition_info -WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name IN ('concurrent_a', 'concurrent_b')) +WHERE table_id IN (getvariable('concurrent_a_id'), getvariable('concurrent_b_id')) ---- 2 @@ -235,23 +252,26 @@ ALTER TABLE chain_winner SET SORTED BY (b DESC) statement ok COMMIT +statement ok +SET VARIABLE chain_winner_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'chain_winner') + # exactly one partition_info row (the last ALTER's) and one sort_info row (the last ALTER's) query I SELECT count(*) FROM ducklake_meta.ducklake_partition_info -WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'chain_winner') +WHERE table_id = getvariable('chain_winner_id') ---- 1 query I SELECT count(*) FROM ducklake_meta.ducklake_sort_info -WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'chain_winner') +WHERE table_id = getvariable('chain_winner_id') ---- 1 # the surviving sort row is `b DESC`, not the inline `a ASC` query III SELECT expression, sort_direction, null_order FROM ducklake_meta.ducklake_sort_expression -WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'chain_winner') +WHERE table_id = getvariable('chain_winner_id') ---- b DESC NULLS_LAST @@ -272,9 +292,12 @@ SELECT * FROM error_ctas ---- :.*not exist.* +statement ok +SET VARIABLE error_ctas_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'error_ctas') + query I SELECT count(*) FROM ducklake_meta.ducklake_partition_info -WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'error_ctas') +WHERE table_id = getvariable('error_ctas_id') ---- 0 @@ -376,10 +399,17 @@ SELECT * FROM rb_b ---- :.*not exist.* +# both lookups yield NULL (the tables were rolled back), so the IN below matches no rows +statement ok +SET VARIABLE rb_a_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'rb_a') + +statement ok +SET VARIABLE rb_b_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'rb_b') + # neither rolled-back table left a partition_info row query I SELECT count(*) FROM ducklake_meta.ducklake_partition_info -WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name IN ('rb_a','rb_b')) +WHERE table_id IN (getvariable('rb_a_id'), getvariable('rb_b_id')) ---- 0 @@ -436,10 +466,13 @@ INSERT INTO same_tx_part VALUES (10, 110), (10, 111), (11, 112) statement ok COMMIT +statement ok +SET VARIABLE same_tx_part_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name='same_tx_part') + # every data file references the partition_id - CTAS-written and INSERT-written alike query I SELECT count(*) FROM ducklake_meta.ducklake_data_file -WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name='same_tx_part') +WHERE table_id = getvariable('same_tx_part_id') AND partition_id IS NULL ---- 0 diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test b/test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test index cd5cb78026d..bcd053dca22 100644 --- a/test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test +++ b/test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test @@ -42,30 +42,35 @@ SELECT count(*) FROM small_part ---- 50 +# resolve the table_id in a separate statement - some metadata catalogs (e.g. Quack +# ones) cannot scan two of their tables in a single query +statement ok +SET VARIABLE small_part_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part') + # partition metadata recorded for the inlined CTAS query I SELECT count(*) FROM ducklake_meta.ducklake_partition_info -WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part') +WHERE table_id = getvariable('small_part_id') ---- 1 query II SELECT partition_key_index, transform FROM ducklake_meta.ducklake_partition_column -WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part') +WHERE table_id = getvariable('small_part_id') ---- 0 identity # zero data_file rows: rows are sitting in the inlined-data catalog table, not on Parquet query I SELECT count(*) FROM ducklake_meta.ducklake_data_file -WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part') +WHERE table_id = getvariable('small_part_id') ---- 0 # the per-table inlined-data table exists in the metadata catalog query I SELECT count(*) FROM ducklake_meta.ducklake_inlined_data_tables -WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part') +WHERE table_id = getvariable('small_part_id') ---- 1 @@ -83,29 +88,32 @@ SELECT count(*) FROM small_both ---- 20 +statement ok +SET VARIABLE small_both_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_both') + query I SELECT count(*) FROM ducklake_meta.ducklake_partition_info -WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_both') +WHERE table_id = getvariable('small_both_id') ---- 1 query I SELECT count(*) FROM ducklake_meta.ducklake_sort_info -WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_both') +WHERE table_id = getvariable('small_both_id') ---- 1 # the sort_expression row carries the planning-time-allocated direction/null_order query III SELECT expression, sort_direction, null_order FROM ducklake_meta.ducklake_sort_expression -WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_both') +WHERE table_id = getvariable('small_both_id') ---- v ASC NULLS_LAST # fully inlined: zero Parquet files query I SELECT count(*) FROM ducklake_meta.ducklake_data_file -WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_both') +WHERE table_id = getvariable('small_both_id') ---- 0 @@ -122,13 +130,13 @@ CALL ducklake_flush_inlined_data('ducklake') # path hands the spec through correctly) query I SELECT count(*) > 0 FROM ducklake_meta.ducklake_data_file -WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part') +WHERE table_id = getvariable('small_part_id') ---- true query I SELECT count(*) FROM ducklake_meta.ducklake_data_file -WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part') +WHERE table_id = getvariable('small_part_id') AND partition_id IS NULL ---- 0 @@ -154,26 +162,32 @@ SELECT count(*) FROM overflow_part ---- 5000 +statement ok +SET VARIABLE overflow_part_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part') + # partition + sort metadata persist -query II -SELECT - (SELECT count(*) FROM ducklake_meta.ducklake_partition_info - WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part')), - (SELECT count(*) FROM ducklake_meta.ducklake_sort_info - WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part')) +query I +SELECT count(*) FROM ducklake_meta.ducklake_partition_info +WHERE table_id = getvariable('overflow_part_id') ---- -1 1 +1 + +query I +SELECT count(*) FROM ducklake_meta.ducklake_sort_info +WHERE table_id = getvariable('overflow_part_id') +---- +1 # overflow path: at least one Parquet file was written, and each has a non-null partition_id query I SELECT count(*) > 0 FROM ducklake_meta.ducklake_data_file -WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part') +WHERE table_id = getvariable('overflow_part_id') ---- true query I SELECT count(*) FROM ducklake_meta.ducklake_data_file -WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part') +WHERE table_id = getvariable('overflow_part_id') AND partition_id IS NULL ---- 0 @@ -190,7 +204,7 @@ CALL ducklake_flush_inlined_data('ducklake') # overflow_part: zero rows in any inlined-data table after flush query I SELECT count(*) FROM ducklake_meta.ducklake_data_file -WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part') +WHERE table_id = getvariable('overflow_part_id') AND partition_id IS NULL ---- 0 From 705169066397287272266ee1f27ceea57969d2f1 Mon Sep 17 00:00:00 2001 From: Alex-Monahan Date: Fri, 5 Jun 2026 16:20:10 -0700 Subject: [PATCH 11/12] Fix target_file_size precedence after merge: session setting wins The merge resolution in GetCopyOptions consulted the table-scoped option (WITH (...) / metadata) before the ducklake_target_file_size session setting, inverting the precedence pinned by test/sql/insert/insert_session_target_file_size.test. Check the session setting first; fall back to the table-scoped option, then GetTargetFileSize (global option / default). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/storage/ducklake_insert.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/storage/ducklake_insert.cpp b/src/storage/ducklake_insert.cpp index 525ba211949..0a496cecd03 100644 --- a/src/storage/ducklake_insert.cpp +++ b/src/storage/ducklake_insert.cpp @@ -550,9 +550,15 @@ DuckLakeCopyOptions DuckLakeInsert::GetCopyOptions(ClientContext &context, DuckL } string target_file_size_str; idx_t target_file_size; - if (copy_input.GetEffectiveOption("target_file_size", target_file_size_str)) { + Value session_target_file_size; + bool has_session_target_file_size = + context.TryGetCurrentSetting("ducklake_target_file_size", session_target_file_size) && + !session_target_file_size.IsNull() && !session_target_file_size.ToString().empty(); + if (!has_session_target_file_size && copy_input.GetEffectiveOption("target_file_size", target_file_size_str)) { + // WITH (...) / metadata option - used unless the ducklake_target_file_size session setting overrides it target_file_size = Value(target_file_size_str).GetValue(); } else { + // session setting, then global metadata option, then the default target_file_size = catalog.GetTargetFileSize(context, schema_id, table_id); } From 39d70c7afb1d008f74e21f6503b21ddfd236eeba Mon Sep 17 00:00:00 2001 From: Alex-Monahan Date: Fri, 5 Jun 2026 16:21:44 -0700 Subject: [PATCH 12/12] Format fix Co-Authored-By: Claude Opus 4.8 (1M context) --- src/storage/ducklake_table_entry.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/storage/ducklake_table_entry.cpp b/src/storage/ducklake_table_entry.cpp index eff6b7a2f7f..faefb12c9d5 100644 --- a/src/storage/ducklake_table_entry.cpp +++ b/src/storage/ducklake_table_entry.cpp @@ -695,7 +695,8 @@ unique_ptr DuckLakeTableEntry::AlterTable(ClientContext &context, if (transaction.HasTransactionLocalInserts(GetTableId())) { VerifyNoNullValues(context, transaction, *this, col); table_info.constraints.push_back(make_uniq(col.Logical())); - auto new_entry = make_uniq(*this, table_info, LocalChange::SetNull(field_id.GetFieldIndex())); + auto new_entry = + make_uniq(*this, table_info, LocalChange::SetNull(field_id.GetFieldIndex())); return std::move(new_entry); }