diff --git a/src/functions/ducklake_compaction_functions.cpp b/src/functions/ducklake_compaction_functions.cpp index 92ad52f05f4..dcd89b363be 100644 --- a/src/functions/ducklake_compaction_functions.cpp +++ b/src/functions/ducklake_compaction_functions.cpp @@ -47,15 +47,16 @@ vector DuckLakeCompactor::ParseSortOrders(const DuckLakeSort &sort_ } //! Binds ORDER BY expressions directly using ExpressionBinder. -vector DuckLakeCompactor::BindSortOrders(Binder &binder, DuckLakeTableEntry &table, idx_t table_index, +vector DuckLakeCompactor::BindSortOrders(Binder &binder, const ColumnList &columns, + const string &table_name, TableIndex table_index, vector &pre_bound_orders) { - auto &columns = table.GetColumns(); auto column_names = columns.GetColumnNames(); auto column_types = columns.GetColumnTypes(); // Create a child binder with the table columns in scope auto child_binder = Binder::CreateBinder(binder.context, &binder); - child_binder->bind_context.AddGenericBinding(table_index, table.name, column_names, column_types); + // AddGenericBinding takes the raw idx_t binding id, not DuckLake's TableIndex wrapper. + child_binder->bind_context.AddGenericBinding(table_index.index, table_name, column_names, column_types); // Bind each ORDER BY expression directly vector orders; @@ -68,6 +69,12 @@ vector DuckLakeCompactor::BindSortOrders(Binder &binder, DuckL return orders; } +vector DuckLakeCompactor::BindSortOrders(Binder &binder, DuckLakeTableEntry &table, + idx_t table_index, + vector &pre_bound_orders) { + return BindSortOrders(binder, table.GetColumns(), table.name, TableIndex(table_index), pre_bound_orders); +} + //===--------------------------------------------------------------------===// // Compaction Operator //===--------------------------------------------------------------------===// @@ -384,7 +391,7 @@ unique_ptr DuckLakeCompactor::InsertSort(Binder &binder, unique } // Validate all column references in sort expressions exist in the table - DuckLakeTableEntry::ValidateSortExpressionColumns(table, pre_bound_orders); + DuckLakeTableEntry::ValidateSortExpressionColumns(table.GetColumns(), pre_bound_orders); // Resolve types for the input plan (could be LogicalGet or LogicalProjection) plan->ResolveOperatorTypes(); diff --git a/src/functions/ducklake_set_option.cpp b/src/functions/ducklake_set_option.cpp index f4910846e4b..eda3cd0eccd 100644 --- a/src/functions/ducklake_set_option.cpp +++ b/src/functions/ducklake_set_option.cpp @@ -4,8 +4,40 @@ #include "storage/ducklake_table_entry.hpp" #include "storage/ducklake_schema_entry.hpp" +#include "duckdb/execution/expression_executor.hpp" +#include "duckdb/parser/parsed_expression.hpp" +#include "duckdb/planner/binder.hpp" +#include "duckdb/planner/expression_binder/constant_binder.hpp" + namespace duckdb { +vector ValidateOptionsInCreateWith(ClientContext &context, + const case_insensitive_map_t> &options) { + // Bind via ConstantBinder so each value expression must fold to a literal (no column refs, no + // subqueries) — mirrors upstream ATTACH (bind_attach.cpp) and CREATE SECRET (bind_create.cpp). + // `allow_unfoldable=true` on EvaluateScalar lets volatile functions like random() through; that's + // fine here because we only persist the resulting Value, not the expression tree. + // Each expression is copied before binding because CTAS calls this twice (plan time + sink init) + // over the same options map; consuming the map (the upstream pattern) would break the second call. + auto binder = Binder::CreateBinder(context); + ConstantBinder option_binder(*binder, context, "DuckLake WITH option"); + vector result; + result.reserve(options.size()); + for (auto &option : options) { + if (!option.second) { + throw BinderException("WITH option \"%s\" requires a value", option.first); + } + auto expr_copy = option.second->Copy(); + auto bound_expr = option_binder.Bind(expr_copy); + if (bound_expr->HasParameter()) { + throw ParameterNotResolvedException(); + } + auto value = ExpressionExecutor::EvaluateScalar(context, *bound_expr, true); + result.push_back(ValidateDuckLakeConfigOption(context, option.first, value)); + } + return result; +} + struct DuckLakeSetOptionData : public TableFunctionData { DuckLakeSetOptionData(Catalog &catalog, DuckLakeConfigOption option_p) : catalog(catalog), option(std::move(option_p)) { @@ -15,17 +47,12 @@ struct DuckLakeSetOptionData : public TableFunctionData { DuckLakeConfigOption option; }; -static unique_ptr DuckLakeSetOptionBind(ClientContext &context, TableFunctionBindInput &input, - vector &return_types, vector &names) { - auto &catalog = DuckLakeBaseMetadataFunction::GetCatalog(context, input.inputs[0]); - DuckLakeConfigOption config_option; - auto &option = config_option.option.key; - auto &value = config_option.option.value; - - option = StringUtil::Lower(StringValue::Get(input.inputs[1])); - auto &val = input.inputs[2]; +DuckLakeTag ValidateDuckLakeConfigOption(ClientContext &context, const string &option_key, const Value &val) { + DuckLakeTag result; + auto option = StringUtil::Lower(option_key); + result.key = option; + auto &value = result.value; - // read the option if (option == "parquet_compression") { auto codec = val.DefaultCastAs(LogicalType::VARCHAR).GetValue(); vector supported_algorithms {"uncompressed", "snappy", "gzip", "zstd", "brotli", "lz4", "lz4_raw"}; @@ -82,9 +109,8 @@ static unique_ptr DuckLakeSetOptionBind(ClientContext &context, Ta } else if (option == "delete_older_than" || option == "expire_older_than") { auto interval_value = val.ToString(); if (!interval_value.empty()) { - // Let's verify this is actually an interval - interval_t result; - if (!Interval::FromString(val.ToString(), result)) { + interval_t interval_result; + if (!Interval::FromString(val.ToString(), interval_result)) { throw BinderException("%s is not a valid interval value.", option); } } @@ -103,6 +129,15 @@ static unique_ptr DuckLakeSetOptionBind(ClientContext &context, Ta } else { throw NotImplementedException("Unsupported option %s", option); } + return result; +} + +static unique_ptr DuckLakeSetOptionBind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + auto &catalog = DuckLakeBaseMetadataFunction::GetCatalog(context, input.inputs[0]); + DuckLakeConfigOption config_option; + config_option.option = ValidateDuckLakeConfigOption(context, StringValue::Get(input.inputs[1]), input.inputs[2]); + auto &option = config_option.option.key; // read the scope string schema; diff --git a/src/include/functions/ducklake_compaction_functions.hpp b/src/include/functions/ducklake_compaction_functions.hpp index ef69944ad52..94db42bc1f6 100644 --- a/src/include/functions/ducklake_compaction_functions.hpp +++ b/src/include/functions/ducklake_compaction_functions.hpp @@ -93,6 +93,10 @@ class DuckLakeCompactor { static vector ParseSortOrders(const DuckLakeSort &sort_data); static vector BindSortOrders(Binder &binder, DuckLakeTableEntry &table, idx_t table_index, vector &pre_bound_orders); + //! Overload that takes (columns, table_name) directly so CTAS planning can sort-bind before any + //! DuckLakeTableEntry exists. Behaves identically to the entry-taking overload. + static vector BindSortOrders(Binder &binder, const ColumnList &columns, const string &table_name, + TableIndex table_index, vector &pre_bound_orders); private: ClientContext &context; diff --git a/src/include/functions/ducklake_table_functions.hpp b/src/include/functions/ducklake_table_functions.hpp index 3a6814d6ee7..639136061e9 100644 --- a/src/include/functions/ducklake_table_functions.hpp +++ b/src/include/functions/ducklake_table_functions.hpp @@ -14,7 +14,22 @@ namespace duckdb { class DuckLakeCatalog; +class ParsedExpression; struct DuckLakeSnapshotInfo; +struct DuckLakeTag; + +//! Validate and canonicalize a single (key, value) DuckLake config option. +//! Shared by `CALL ducklake.set_option(...)` and CREATE TABLE / CTAS `WITH (...)`. +DuckLakeTag ValidateDuckLakeConfigOption(ClientContext &context, const string &option_key, const Value &val); + +//! Bind, fold, and validate every entry in a CREATE TABLE / CTAS `WITH (...)` options map. +//! Each value expression must fold to a literal under ConstantBinder (constants, `getvariable(...)`, +//! `upper('zstd')`, etc.); each key/value is then run through ValidateDuckLakeConfigOption. +//! Empty input → empty output. Throws BinderException on any unbindable expression or unknown key. +//! The input map is left intact — copies each expression before binding because CTAS calls this +//! both at plan time (PlanCreateTableAs) and again during sink-state-init (CreateTableExtended). +vector ValidateOptionsInCreateWith(ClientContext &context, + const case_insensitive_map_t> &options); class DuckLakeTableFunctionUtil { public: diff --git a/src/include/storage/ducklake_catalog.hpp b/src/include/storage/ducklake_catalog.hpp index f3d5cb014ab..a2b69e56b43 100644 --- a/src/include/storage/ducklake_catalog.hpp +++ b/src/include/storage/ducklake_catalog.hpp @@ -28,6 +28,7 @@ class DuckLakeFieldData; struct DuckLakeFileListEntry; struct DuckLakeConfigOption; struct DeleteFileMap; +struct BoundCreateTableInfo; class LogicalGet; //! Cache entry for DuckLake table statistics @@ -135,6 +136,8 @@ class DuckLakeCatalog : public Catalog { optional_ptr CreateSchema(CatalogTransaction transaction, CreateSchemaInfo &info) override; + ErrorData SupportsCreateTable(BoundCreateTableInfo &info) override; + void ScanSchemas(ClientContext &context, std::function callback) override; optional_ptr LookupSchema(CatalogTransaction transaction, const EntryLookupInfo &schema_lookup, diff --git a/src/include/storage/ducklake_insert.hpp b/src/include/storage/ducklake_insert.hpp index c591763ca50..f1b9a4b76fc 100644 --- a/src/include/storage/ducklake_insert.hpp +++ b/src/include/storage/ducklake_insert.hpp @@ -14,14 +14,17 @@ #include "duckdb/common/index_vector.hpp" #include "storage/ducklake_stats.hpp" #include "common/ducklake_data_file.hpp" +#include "common/ducklake_options.hpp" #include "storage/ducklake_field_data.hpp" +#include "storage/ducklake_partition_data.hpp" +#include "storage/ducklake_sort_data.hpp" namespace duckdb { class DuckLakeCatalog; class DuckLakeSchemaEntry; class DuckLakeTableEntry; +class ParsedExpression; class DuckLakeFieldData; -struct DuckLakePartition; struct DuckLakeCopyOptions; struct DuckLakeCopyInput; @@ -41,15 +44,19 @@ class DuckLakeInsertGlobalState : public GlobalSinkState { class DuckLakeInsert : public PhysicalOperator { public: - //! INSERT INTO + //! INSERT INTO an existing table. DuckLakeInsert(PhysicalPlan &physical_plan, const vector &types, DuckLakeTableEntry &table, optional_idx partition_id, string encryption_key); - //! CREATE TABLE AS + //! CREATE TABLE AS - the table is created in GetGlobalSinkState. Any inline + //! PARTITIONED BY / SORTED BY clauses are pre-built into ctas_partition_data / ctas_sort_data at planning + //! time so the physical_copy upstream of this operator can hive-partition the write with a partition_id + //! that matches what gets attached to the table entry at sink-state-init. DuckLakeInsert(PhysicalPlan &physical_plan, const vector &types, SchemaCatalogEntry &schema, unique_ptr info, string table_uuid, string table_data_path, - string encryption_key); + unique_ptr ctas_partition_data, unique_ptr ctas_sort_data, + optional_idx partition_id, string encryption_key); - //! The table to insert into + //! The table to insert into (only set for INSERT INTO; nullptr for CTAS until GetGlobalSinkState resolves) optional_ptr table; //! Table schema, in case of CREATE TABLE AS optional_ptr schema; @@ -59,6 +66,11 @@ class DuckLakeInsert : public PhysicalOperator { string table_uuid; //! The table data path, in case of CREATE TABLE AS string table_data_path; + //! Pre-built partition spec for CTAS (allocated at planning time so the physical write carries the + //! correct partition_id). + unique_ptr ctas_partition_data; + //! Pre-built sort spec for CTAS (same lifecycle as ctas_partition_data). + unique_ptr ctas_sort_data; //! The partition id we are writing into (if any) optional_idx partition_id; //! The encryption key used for writing the Parquet files @@ -140,8 +152,22 @@ struct DuckLakeCopyOptions { struct DuckLakeCopyInput { explicit DuckLakeCopyInput(ClientContext &context, DuckLakeTableEntry &table, const string &hive_partition = ""); + //! CTAS-flavored: take the field-id mapping and (optional) partition spec from the planning-time-built + //! spec rather than from a DuckLakeTableEntry that hasn't been created yet. field_data is required + //! because the parquet writer always needs it; partition_data is null when CREATE TABLE AS has no + //! inline PARTITIONED BY clause. `options_in_create_with` is the raw `WITH (...)` clause from the + //! parser; it is validated and surfaced through the `options_in_create_with` map so the parquet + //! write honors it. DuckLakeCopyInput(ClientContext &context, DuckLakeSchemaEntry &schema, const ColumnList &columns, - const string &data_path_p); + const string &data_path_p, DuckLakeFieldData &field_data, + optional_ptr partition_data = nullptr, + const case_insensitive_map_t> &options_in_create_with = {}); + + //! Look up an effective DuckLake config option for this write: returns the WITH override if one is + //! present in `options_in_create_with`, otherwise falls through to the catalog's table/schema/global + //! lookup. Used by PlanCopyForInsert so a CTAS write whose new table_id is not yet allocated still + //! picks up the user's WITH (...) values. + bool GetEffectiveOption(const string &key, string &out) const; DuckLakeCatalog &catalog; optional_ptr partition_data; @@ -153,6 +179,9 @@ struct DuckLakeCopyInput { TableIndex table_id; InsertVirtualColumns virtual_columns = InsertVirtualColumns::NONE; optional_idx get_table_index; + //! CREATE TABLE / CTAS WITH (...) options that override same-key catalog options for this write. + //! Empty for plain INSERT. Validated at plan time before reaching here. + option_map_t options_in_create_with; }; } // namespace duckdb diff --git a/src/include/storage/ducklake_metadata_manager.hpp b/src/include/storage/ducklake_metadata_manager.hpp index 90206e1f3ce..ccfed486648 100644 --- a/src/include/storage/ducklake_metadata_manager.hpp +++ b/src/include/storage/ducklake_metadata_manager.hpp @@ -174,6 +174,7 @@ class DuckLakeMetadataManager { virtual string WriteNewPartitionKeys(DuckLakeSnapshot commit_snapshot, const vector &new_partitions); virtual string WriteNewSortKeys(DuckLakeSnapshot commit_snapshot, const vector &new_sorts); + virtual string WriteOptionsInCreateWith(const vector &options_in_create_with); virtual string WriteDroppedColumns(const vector &dropped_columns); virtual string WriteNewColumns(const vector &new_columns); virtual string WriteNewTags(const vector &new_tags); diff --git a/src/include/storage/ducklake_partition_data.hpp b/src/include/storage/ducklake_partition_data.hpp index 7fa7c34f94b..f65300a56ff 100644 --- a/src/include/storage/ducklake_partition_data.hpp +++ b/src/include/storage/ducklake_partition_data.hpp @@ -20,12 +20,21 @@ enum class DuckLakeTransformType { IDENTITY, BUCKET, YEAR, MONTH, DAY, HOUR }; struct DuckLakeTransform { DuckLakeTransformType type; idx_t bucket_count = 0; // only for BUCKET + + bool operator==(const DuckLakeTransform &other) const { + return type == other.type && bucket_count == other.bucket_count; + } }; struct DuckLakePartitionField { idx_t partition_key_index = 0; FieldIndex field_id; DuckLakeTransform transform; + + bool operator==(const DuckLakePartitionField &other) const { + return partition_key_index == other.partition_key_index && field_id == other.field_id && + transform == other.transform; + } }; struct DuckLakePartition { diff --git a/src/include/storage/ducklake_schema_entry.hpp b/src/include/storage/ducklake_schema_entry.hpp index d50f8e637c9..3257444acd8 100644 --- a/src/include/storage/ducklake_schema_entry.hpp +++ b/src/include/storage/ducklake_schema_entry.hpp @@ -10,6 +10,8 @@ #include "duckdb/catalog/catalog_entry/schema_catalog_entry.hpp" #include "storage/ducklake_catalog_set.hpp" +#include "storage/ducklake_partition_data.hpp" +#include "storage/ducklake_sort_data.hpp" namespace duckdb { class DuckLakeTransaction; @@ -32,8 +34,12 @@ class DuckLakeSchemaEntry : public SchemaCatalogEntry { } public: + //! Create a DuckLakeTableEntry and register it with the transaction. + //! When prebuilt_partition_data / prebuilt_sort_data are used by CTAS when partitioned or sorted optional_ptr CreateTableExtended(CatalogTransaction transaction, BoundCreateTableInfo &info, - string table_uuid, string table_data_path); + string table_uuid, string table_data_path, + unique_ptr prebuilt_partition_data = nullptr, + unique_ptr prebuilt_sort_data = nullptr); unique_ptr GetInfo() const override; optional_ptr CreateTable(CatalogTransaction transaction, BoundCreateTableInfo &info) override; optional_ptr CreateFunction(CatalogTransaction transaction, CreateFunctionInfo &info) override; diff --git a/src/include/storage/ducklake_table_entry.hpp b/src/include/storage/ducklake_table_entry.hpp index 0f97e664a2f..5cd952aa669 100644 --- a/src/include/storage/ducklake_table_entry.hpp +++ b/src/include/storage/ducklake_table_entry.hpp @@ -84,6 +84,13 @@ class DuckLakeTableEntry : public TableCatalogEntry { optional_ptr GetFieldId(FieldIndex field_index) const; void SetPartitionData(unique_ptr partition_data); void SetSortData(unique_ptr sort_data); + //! Table-scoped config options collected from a CREATE TABLE / CTAS WITH (...) clause. + void SetOptionsInCreateWith(vector options) { + options_in_create_with = std::move(options); + } + const vector &GetOptionsInCreateWith() const { + return options_in_create_with; + } shared_ptr GetTableStats(ClientContext &context); shared_ptr GetTableStats(DuckLakeTransaction &transaction); idx_t GetNetDataFileRowCount(DuckLakeTransaction &transaction); @@ -122,8 +129,17 @@ class DuckLakeTableEntry : public TableCatalogEntry { virtual_column_map_t GetVirtualColumns() const override; vector GetRowIdColumns() const override; - //! Validates that all column references in sort expressions exist in the table - static void ValidateSortExpressionColumns(DuckLakeTableEntry &table, const vector &orders); + //! Validates that all column references in sort expressions exist in the column list. + //! Takes (columns) directly so it can run at CTAS planning time before a DuckLakeTableEntry exists. + static void ValidateSortExpressionColumns(const ColumnList &columns, const vector &orders); + + //! Build a DuckLakePartition from raw partition expressions (allocates a transaction-local id). + static unique_ptr BuildPartitionData(DuckLakeTransaction &transaction, const ColumnList &columns, + DuckLakeFieldData &field_data, + const vector> &partition_keys); + //! Build a DuckLakeSort from a vector of OrderByNode (allocates a transaction-local id). + static unique_ptr BuildSortData(DuckLakeTransaction &transaction, const ColumnList &columns, + const vector &orders); private: unique_ptr AlterTable(DuckLakeTransaction &transaction, RenameTableInfo &info); @@ -178,6 +194,8 @@ class DuckLakeTableEntry : public TableCatalogEntry { unique_ptr sort_data; // only set for REMOVED_COLUMN unique_ptr changed_fields; + // table-scoped config options collected from CREATE TABLE / CTAS WITH (...), pending until commit + vector options_in_create_with; }; } // namespace duckdb diff --git a/src/storage/ducklake_catalog.cpp b/src/storage/ducklake_catalog.cpp index c2c7e1f8a5a..2ed27012ce3 100644 --- a/src/storage/ducklake_catalog.cpp +++ b/src/storage/ducklake_catalog.cpp @@ -10,6 +10,7 @@ #include "duckdb/parser/parsed_data/create_table_info.hpp" #include "duckdb/parser/parsed_data/create_view_info.hpp" #include "duckdb/parser/parsed_data/drop_info.hpp" +#include "duckdb/planner/parsed_data/bound_create_table_info.hpp" #include "duckdb/storage/database_size.hpp" #include "storage/ducklake_initializer.hpp" #include "storage/ducklake_schema_entry.hpp" @@ -155,6 +156,12 @@ optional_ptr DuckLakeCatalog::CreateSchema(CatalogTransaction tran return result; } +ErrorData DuckLakeCatalog::SupportsCreateTable(BoundCreateTableInfo &info) { + // DuckLake handles PARTITIONED BY, SORTED BY, and WITH (...) itself in DuckLakeSchemaEntry, + // so suppress the base-class rejection of all three. + return ErrorData(); +} + void DuckLakeCatalog::DropSchema(ClientContext &context, DropInfo &info) { auto schema = GetSchema(GetCatalogTransaction(context), info.name, info.if_not_found); if (!schema) { diff --git a/src/storage/ducklake_insert.cpp b/src/storage/ducklake_insert.cpp index ba3cc8bcf84..e260b37e403 100644 --- a/src/storage/ducklake_insert.cpp +++ b/src/storage/ducklake_insert.cpp @@ -10,6 +10,7 @@ #include "storage/ducklake_geo_stats.hpp" #include "common/ducklake_types.hpp" #include "functions/ducklake_compaction_functions.hpp" +#include "functions/ducklake_table_functions.hpp" #include "duckdb/catalog/catalog_entry/copy_function_catalog_entry.hpp" #include "duckdb/execution/operator/order/physical_order.hpp" @@ -42,10 +43,13 @@ DuckLakeInsert::DuckLakeInsert(PhysicalPlan &physical_plan, const vector &types, SchemaCatalogEntry &schema, unique_ptr info, string table_uuid_p, - string table_data_path_p, string encryption_key_p) + string table_data_path_p, unique_ptr ctas_partition_data_p, + unique_ptr ctas_sort_data_p, optional_idx partition_id, + string encryption_key_p) : PhysicalOperator(physical_plan, PhysicalOperatorType::EXTENSION, types, 1), table(nullptr), schema(&schema), info(std::move(info)), table_uuid(std::move(table_uuid_p)), table_data_path(std::move(table_data_path_p)), - encryption_key(std::move(encryption_key_p)) { + ctas_partition_data(std::move(ctas_partition_data_p)), ctas_sort_data(std::move(ctas_sort_data_p)), + partition_id(partition_id), encryption_key(std::move(encryption_key_p)) { } //===--------------------------------------------------------------------===// @@ -58,12 +62,18 @@ DuckLakeInsertGlobalState::DuckLakeInsertGlobalState(DuckLakeTableEntry &table) unique_ptr DuckLakeInsert::GetGlobalSinkState(ClientContext &context) const { optional_ptr table_ptr; if (info) { - // CREATE TABLE AS - create the table + // CREATE TABLE AS - materialize the table entry now, attaching a *clone* of the partition / sort spec + // we built at planning time so the on-disk partition_id and the in-memory partition_data agree. + // We clone (rather than std::move) so the operator's prebuilt spec stays intact across executions. + auto partition_clone = ctas_partition_data ? make_uniq(*ctas_partition_data) : nullptr; + auto sort_clone = ctas_sort_data ? make_uniq(*ctas_sort_data) : nullptr; + auto &catalog = schema->catalog; auto &ducklake_schema = schema.get_mutable()->Cast(); auto transaction = catalog.GetCatalogTransaction(context); - table_ptr = &ducklake_schema.CreateTableExtended(transaction, *info, table_uuid, table_data_path) - ->Cast(); + auto created = ducklake_schema.CreateTableExtended(transaction, *info, table_uuid, table_data_path, + std::move(partition_clone), std::move(sort_clone)); + table_ptr = &created->Cast(); } else { // INSERT INTO table_ptr = table; @@ -341,13 +351,37 @@ DuckLakeCopyInput::DuckLakeCopyInput(ClientContext &context, DuckLakeTableEntry schema_id = table.ParentSchema().Cast().GetSchemaId(); table_id = table.GetTableId(); encryption_key = catalog.GenerateEncryptionKey(context); + // Surface CREATE TABLE / CTAS WITH (...) options for transaction-local tables: the catalog + // is not mutated for transaction-local ids, so the only source of truth is the table entry. + for (auto &tag : table.GetOptionsInCreateWith()) { + options_in_create_with[tag.key] = tag.value; + } } DuckLakeCopyInput::DuckLakeCopyInput(ClientContext &context, DuckLakeSchemaEntry &schema, const ColumnList &columns, - const string &data_path_p) + const string &data_path_p, DuckLakeFieldData &field_data_p, + optional_ptr partition_data_p, + const case_insensitive_map_t> &options_in_create_with) : catalog(schema.ParentCatalog().Cast()), columns(columns), data_path(data_path_p) { + field_data = &field_data_p; + partition_data = partition_data_p; schema_id = schema.GetSchemaId(); encryption_key = catalog.GenerateEncryptionKey(context); + // Validate CREATE TABLE AS ... WITH (key=value, ...) and surface as per-write overrides so the + // parquet files land with the requested compression / row group size, even though the new table_id + // won't be allocated until DuckLakeInsert::GetGlobalSinkState (long after this plan is built). + for (auto &tag : ValidateOptionsInCreateWith(context, options_in_create_with)) { + this->options_in_create_with[tag.key] = tag.value; + } +} + +bool DuckLakeCopyInput::GetEffectiveOption(const string &key, string &out) const { + auto it = options_in_create_with.find(key); + if (it != options_in_create_with.end()) { + out = it->second; + return true; + } + return catalog.TryGetConfigOption(key, out, schema_id, table_id); } static void StripTrailingSeparator(FileSystem &fs, string &path) { @@ -490,32 +524,37 @@ DuckLakeCopyOptions DuckLakeInsert::GetCopyOptions(ClientContext &context, DuckL auto &schema_id = copy_input.schema_id; auto &table_id = copy_input.table_id; string parquet_compression; - if (catalog.TryGetConfigOption("parquet_compression", parquet_compression, schema_id, table_id)) { + if (copy_input.GetEffectiveOption("parquet_compression", parquet_compression)) { info->options["compression"].emplace_back(parquet_compression); } string parquet_version; - if (catalog.TryGetConfigOption("parquet_version", parquet_version, schema_id, table_id)) { + if (copy_input.GetEffectiveOption("parquet_version", parquet_version)) { info->options["parquet_version"].emplace_back(parquet_version); } string parquet_compression_level; - if (catalog.TryGetConfigOption("parquet_compression_level", parquet_compression_level, schema_id, table_id)) { + if (copy_input.GetEffectiveOption("parquet_compression_level", parquet_compression_level)) { info->options["compression_level"].emplace_back(parquet_compression_level); } string row_group_size; - if (catalog.TryGetConfigOption("parquet_row_group_size", row_group_size, schema_id, table_id)) { + if (copy_input.GetEffectiveOption("parquet_row_group_size", row_group_size)) { info->options["row_group_size"].emplace_back(row_group_size); } string row_group_size_bytes; - if (catalog.TryGetConfigOption("parquet_row_group_size_bytes", row_group_size_bytes, schema_id, table_id)) { + if (copy_input.GetEffectiveOption("parquet_row_group_size_bytes", row_group_size_bytes)) { info->options["row_group_size_bytes"].emplace_back(row_group_size_bytes + " bytes"); } string per_thread_output_str; bool per_thread_output = false; - if (catalog.TryGetConfigOption("per_thread_output", per_thread_output_str, schema_id, table_id)) { + if (copy_input.GetEffectiveOption("per_thread_output", per_thread_output_str)) { per_thread_output = per_thread_output_str == "true"; } - idx_t target_file_size = catalog.GetConfigOption("target_file_size", schema_id, table_id, - DuckLakeCatalog::DEFAULT_TARGET_FILE_SIZE); + string target_file_size_str; + idx_t target_file_size = DuckLakeCatalog::DEFAULT_TARGET_FILE_SIZE; + if (copy_input.GetEffectiveOption("target_file_size", target_file_size_str)) { + target_file_size = Value(target_file_size_str).GetValue(); + } else { + target_file_size = catalog.GetConfigOption("target_file_size", schema_id, table_id, target_file_size); + } // Always use native parquet geometry for writing info->options["geoparquet_version"].emplace_back("NONE"); @@ -735,40 +774,43 @@ static void ResolveColumnRefs(unique_ptr &expr) { ExpressionIterator::EnumerateChildren(*expr, [](unique_ptr &child) { ResolveColumnRefs(child); }); } -static optional_ptr PlanInsertSort(ClientContext &context, PhysicalPlanGenerator &planner, - PhysicalOperator &plan, DuckLakeTableEntry &table, - optional_ptr sort_data) { - // Parse the sort expressions from the sort_data +static optional_ptr PlanInsertSortFromColumns(ClientContext &context, PhysicalPlanGenerator &planner, + PhysicalOperator &plan, const ColumnList &columns, + const string &table_name, + optional_ptr sort_data) { + // Shared between the INSERT path (called with an existing table's columns/name) and the CTAS path + // (called with the planning-time CreateTableInfo's columns and the to-be-created table's name). auto pre_bound_orders = DuckLakeCompactor::ParseSortOrders(*sort_data); if (pre_bound_orders.empty()) { return nullptr; } + DuckLakeTableEntry::ValidateSortExpressionColumns(columns, pre_bound_orders); - // Validate all column references in sort expressions exist in the table - DuckLakeTableEntry::ValidateSortExpressionColumns(table, pre_bound_orders); - - // Bind the ORDER BY expressions auto binder = Binder::CreateBinder(context); - idx_t table_index = 0; - auto orders = DuckLakeCompactor::BindSortOrders(*binder, table, table_index, pre_bound_orders); + TableIndex table_index(0); + auto orders = DuckLakeCompactor::BindSortOrders(*binder, columns, table_name, table_index, pre_bound_orders); // Convert BoundColumnRefExpression to BoundReferenceExpression for physical plan for (auto &order : orders) { ResolveColumnRefs(order.expression); } - // Create identity projection map vector projection_map; for (idx_t i = 0; i < plan.types.size(); i++) { projection_map.push_back(i); } - auto &order_op = planner.Make(plan.types, std::move(orders), std::move(projection_map), plan.estimated_cardinality); order_op.children.push_back(plan); return &order_op; } +static optional_ptr PlanInsertSort(ClientContext &context, PhysicalPlanGenerator &planner, + PhysicalOperator &plan, DuckLakeTableEntry &table, + optional_ptr sort_data) { + return PlanInsertSortFromColumns(context, planner, plan, table.GetColumns(), table.name, sort_data); +} + PhysicalOperator &DuckLakeCatalog::PlanInsert(ClientContext &context, PhysicalPlanGenerator &planner, LogicalInsert &op, optional_ptr plan) { if (op.return_chunk) { @@ -782,10 +824,10 @@ PhysicalOperator &DuckLakeCatalog::PlanInsert(ClientContext &context, PhysicalPl } auto &ducklake_table = op.table.Cast(); - // Sort data according to the table's SET SORTED BY configuration + // Sort wrap (when SET SORTED BY is configured and sort_on_insert is enabled). auto sort_data = ducklake_table.GetSortData(); - auto &ducklake_schema_for_sort = ducklake_table.ParentSchema().Cast(); - bool sort_on_insert = GetConfigOption("sort_on_insert", ducklake_schema_for_sort.GetSchemaId(), + auto &ducklake_schema = ducklake_table.ParentSchema().Cast(); + bool sort_on_insert = GetConfigOption("sort_on_insert", ducklake_schema.GetSchemaId(), ducklake_table.GetTableId(), "true") == "true"; if (sort_data && sort_on_insert) { auto sorted_plan = PlanInsertSort(context, planner, *plan, ducklake_table, sort_data); @@ -794,17 +836,13 @@ PhysicalOperator &DuckLakeCatalog::PlanInsert(ClientContext &context, PhysicalPl } } + // Data-inlining wrap. When sort_on_insert=false and inlining is enabled, sort AFTER the inline operator + // so overflow rows reach Parquet sorted; inlined rows skip the sort (they live in metadata, not Parquet). optional_ptr inline_data; - idx_t data_inlining_row_limit = GetInliningLimit(context, ducklake_table); if (data_inlining_row_limit > 0) { plan = planner.Make(*plan, data_inlining_row_limit); inline_data = plan->Cast(); - - // When sort_on_insert=false but inlining is enabled, add sorting AFTER - // the inline data operator. Data that exceeds the inlining limit passes - // through to parquet files and must be sorted. Data that is inlined - // (absorbed by DuckLakeInlineData) never reaches this sort operator. if (sort_data && !sort_on_insert) { auto sorted_plan = PlanInsertSort(context, planner, *plan, ducklake_table, sort_data); if (sorted_plan) { @@ -812,6 +850,7 @@ PhysicalOperator &DuckLakeCatalog::PlanInsert(ClientContext &context, PhysicalPl } } } + DuckLakeCopyInput copy_input(context, ducklake_table); auto &physical_copy = DuckLakeInsert::PlanCopyForInsert(context, planner, copy_input, plan); auto &insert = DuckLakeInsert::PlanInsert(context, planner, ducklake_table, std::move(copy_input.encryption_key)); @@ -828,8 +867,53 @@ PhysicalOperator &DuckLakeCatalog::PlanCreateTableAs(ClientContext &context, Phy auto &columns = create_info.columns; auto &duck_transaction = DuckLakeTransaction::Get(context, *this); auto &duck_schema = op.schema.Cast(); + // FIXME: if table already exists and we are doing CREATE IF NOT EXISTS - skip + + // CTAS planning time: the table entry does NOT exist yet (it will be materialized at sink-state-init by + // DuckLakeInsert::GetGlobalSinkState). However the physical write we're + // about to build needs the partition spec + field-id mapping right now so the parquet files land in the + // right hive directories with the right partition_id stamped on them. + // + // The split: we build field_data + partition_data + sort_data SYNTHETICALLY here from the raw inline + // clauses on `op.info`. We hand the partition_data / sort_data to the DuckLakeInsert operator. + + idx_t column_id = 1; + auto field_data = DuckLakeFieldData::FromColumns(columns, column_id); + unique_ptr partition_data; + if (!create_info.partition_keys.empty()) { + partition_data = + DuckLakeTableEntry::BuildPartitionData(duck_transaction, columns, *field_data, create_info.partition_keys); + } + + unique_ptr sort_data; + if (!create_info.sort_keys.empty()) { + // FIXME: TODO: The syntax needs to be enabled in DuckDB upstream + // CREATE TABLE AS ... SORTED BY (e) accepts only raw expressions; wrap each in an OrderByNode using the + // same defaults the DuckDB transformer produces for a bare expression list (ASCENDING + ORDER_DEFAULT). + // Matches the ALTER TABLE ... SET SORTED BY (e) path. + vector orders; + orders.reserve(create_info.sort_keys.size()); + for (auto &expr : create_info.sort_keys) { + orders.emplace_back(OrderType::ASCENDING, OrderByNullType::ORDER_DEFAULT, expr->Copy()); + } + sort_data = DuckLakeTableEntry::BuildSortData(duck_transaction, columns, orders); + } + reference root = plan; + + // Sort wrap (mirrors PlanInsert's sort_on_insert wrap). For CTAS we don't have a table_id yet so the + // table-level sort_on_insert override doesn't apply; we honor the schema-level setting (defaults to true). + bool sort_on_insert = + GetConfigOption("sort_on_insert", duck_schema.GetSchemaId(), TableIndex(), "true") == "true"; + if (sort_data && sort_on_insert) { + auto sorted_plan = + PlanInsertSortFromColumns(context, planner, root.get(), columns, create_info.table, sort_data.get()); + if (sorted_plan) { + root = *sorted_plan; + } + } + optional_ptr inline_data; idx_t data_inlining_row_limit = DataInliningRowLimit(context, duck_schema.GetSchemaId(), TableIndex()); auto &metadata_manager = duck_transaction.GetMetadataManager(); @@ -837,17 +921,25 @@ PhysicalOperator &DuckLakeCatalog::PlanCreateTableAs(ClientContext &context, Phy root = planner.Make(root.get(), data_inlining_row_limit); inline_data = root.get().Cast(); } - for (auto &col : op.info->Base().columns.Logical()) { + for (auto &col : columns.Logical()) { DuckLakeTypes::CheckSupportedType(col.Type()); } auto table_uuid = duck_transaction.GenerateUUID(); auto table_data_path = duck_schema.DataPath() + DuckLakeCatalog::GeneratePathFromName(table_uuid, create_info.table); - DuckLakeCopyInput copy_input(context, duck_schema, columns, table_data_path); + DuckLakeCopyInput copy_input(context, duck_schema, columns, table_data_path, *field_data, partition_data.get(), + create_info.options); auto &physical_copy = DuckLakeInsert::PlanCopyForInsert(context, planner, copy_input, root.get()); - auto &insert = planner.Make(op.types, op.schema, std::move(op.info), std::move(table_uuid), - std::move(table_data_path), std::move(copy_input.encryption_key)); + + optional_idx insert_partition_id; + if (partition_data) { + insert_partition_id = partition_data->partition_id; + } + + auto &insert = planner.Make( + op.types, op.schema, std::move(op.info), std::move(table_uuid), std::move(table_data_path), + std::move(partition_data), std::move(sort_data), insert_partition_id, std::move(copy_input.encryption_key)); if (inline_data) { inline_data->insert = insert.Cast(); } diff --git a/src/storage/ducklake_metadata_manager.cpp b/src/storage/ducklake_metadata_manager.cpp index 97fa85e05d3..2be2a6da298 100644 --- a/src/storage/ducklake_metadata_manager.cpp +++ b/src/storage/ducklake_metadata_manager.cpp @@ -4573,6 +4573,25 @@ WHERE {SNAPSHOT_ID} >= begin_snapshot AND ({SNAPSHOT_ID} < end_snapshot OR end_s return table_sizes; } +string DuckLakeMetadataManager::WriteOptionsInCreateWith(const vector &options_in_create_with) { + if (options_in_create_with.empty()) { + return string(); + } + // fresh committed table-id from LocalChangeType::CREATED only — no UPSERT needed + string query = "INSERT INTO {METADATA_CATALOG}.ducklake_metadata VALUES "; + for (idx_t i = 0; i < options_in_create_with.size(); i++) { + auto &opt = options_in_create_with[i]; + D_ASSERT(opt.table_id.IsValid() && !opt.table_id.IsTransactionLocal()); + if (i > 0) { + query += ", "; + } + query += StringUtil::Format("(%s, %s, 'table', %d)", SQLString(opt.option.key), SQLString(opt.option.value), + opt.table_id.index); + } + query += ";\n"; + return query; +} + void DuckLakeMetadataManager::SetConfigOption(const DuckLakeConfigOption &option) { // check if the option already exists auto &option_key = option.option.key; diff --git a/src/storage/ducklake_schema_entry.cpp b/src/storage/ducklake_schema_entry.cpp index 801f4fa6c73..96185139af0 100644 --- a/src/storage/ducklake_schema_entry.cpp +++ b/src/storage/ducklake_schema_entry.cpp @@ -4,7 +4,9 @@ #include "duckdb/parser/parsed_data/comment_on_column_info.hpp" #include "duckdb/parser/parsed_data/create_view_info.hpp" #include "duckdb/parser/parsed_data/drop_info.hpp" +#include "duckdb/parser/result_modifier.hpp" #include "duckdb/planner/parsed_data/bound_create_table_info.hpp" +#include "functions/ducklake_table_functions.hpp" #include "storage/ducklake_catalog.hpp" #include "storage/ducklake_table_entry.hpp" #include "storage/ducklake_transaction.hpp" @@ -61,9 +63,10 @@ bool DuckLakeSchemaEntry::HandleCreateConflict(CatalogTransaction transaction, C return true; } -optional_ptr DuckLakeSchemaEntry::CreateTableExtended(CatalogTransaction transaction, - BoundCreateTableInfo &info, string table_uuid, - string table_data_path) { +optional_ptr +DuckLakeSchemaEntry::CreateTableExtended(CatalogTransaction transaction, BoundCreateTableInfo &info, string table_uuid, + string table_data_path, unique_ptr prebuilt_partition_data, + unique_ptr prebuilt_sort_data) { auto &duck_transaction = transaction.transaction->Cast(); auto &base_info = info.Base(); // check if we have an existing entry with this name @@ -76,6 +79,8 @@ optional_ptr DuckLakeSchemaEntry::CreateTableExtended(CatalogTrans throw BinderException("Column name \"%s\" is reserved by DuckLake for internal use", col.Name()); } } + // validate WITH (...) options before any catalog mutation + auto options_in_create_with = ValidateOptionsInCreateWith(transaction.GetContext(), base_info.options); //! get a local table-id auto table_id = TableIndex(duck_transaction.GetLocalCatalogId()); // generate field ids based on the column ids @@ -85,6 +90,41 @@ optional_ptr DuckLakeSchemaEntry::CreateTableExtended(CatalogTrans auto table_entry = make_uniq(ParentCatalog(), *this, base_info, table_id, std::move(table_uuid), std::move(table_data_path), std::move(field_data), column_id, std::move(inlined_tables), LocalChangeType::CREATED); + // Two routes for partition / sort data: + // (A) CTAS: partition_data and sort_data were pre-built at planning time so the planner-built physical + // write could embed the right partition_id into the data files. Attach them as-is - rebuilding here + // would allocate new ids that wouldn't match the ids baked into the on-disk files. + // (B) Plain CREATE TABLE (non-CTAS): rebuild from the inline clauses on info.Base().partition_keys / + // sort_keys. There's no planning-time write to coordinate with, so the id allocated here is the only id. + if (prebuilt_partition_data) { + table_entry->SetPartitionData(std::move(prebuilt_partition_data)); + } else if (!base_info.partition_keys.empty()) { + table_entry->SetPartitionData(DuckLakeTableEntry::BuildPartitionData( + duck_transaction, table_entry->GetColumns(), table_entry->GetFieldData(), base_info.partition_keys)); + } + if (prebuilt_sort_data) { + table_entry->SetSortData(std::move(prebuilt_sort_data)); + } else if (!base_info.sort_keys.empty()) { + // TODO: FIXME: Needs a fix in upstream DuckDB then a fix here + // CREATE TABLE ... SORTED BY (e) accepts only raw expressions (no ASC/DESC/NULLS modifiers). + // Wrap each in an OrderByNode using the same defaults the DuckDB transformer produces for a bare + // expression list (ASCENDING + ORDER_DEFAULT) - matches the ALTER TABLE ... SET SORTED BY (e) path. + vector orders; + orders.reserve(base_info.sort_keys.size()); + for (auto &expr : base_info.sort_keys) { + orders.emplace_back(OrderType::ASCENDING, OrderByNullType::ORDER_DEFAULT, expr->Copy()); + } + table_entry->SetSortData( + DuckLakeTableEntry::BuildSortData(duck_transaction, table_entry->GetColumns(), orders)); + } + // stash on the entry; same-transaction writers pick these up via DuckLakeCopyInput, and at commit + // time WriteOptionsInCreateWith persists them under the freshly-allocated committed table_id. + // Routing through the entry (instead of mutating catalog options under the transaction-local id) + // keeps the catalog free of dead entries on rollback, matching how new_tables / local_changes + // already scope transaction state. + if (!options_in_create_with.empty()) { + table_entry->SetOptionsInCreateWith(options_in_create_with); + } auto result = table_entry.get(); duck_transaction.CreateEntry(std::move(table_entry)); return result; diff --git a/src/storage/ducklake_table_entry.cpp b/src/storage/ducklake_table_entry.cpp index 4b6dd9a9497..481b4388523 100644 --- a/src/storage/ducklake_table_entry.cpp +++ b/src/storage/ducklake_table_entry.cpp @@ -76,6 +76,9 @@ DuckLakeTableEntry::DuckLakeTableEntry(DuckLakeTableEntry &parent, CreateTableIn if (parent.sort_data) { sort_data = make_uniq(*parent.sort_data); } + if (!parent.options_in_create_with.empty()) { + options_in_create_with = parent.options_in_create_with; + } CheckSupportedTypes(); if (local_change.type == LocalChangeType::ADD_COLUMN) { LogicalIndex new_col_idx(columns.LogicalColumnCount() - 1); @@ -100,6 +103,9 @@ DuckLakeTableEntry::DuckLakeTableEntry(DuckLakeTableEntry &parent, CreateTableIn if (parent.sort_data) { sort_data = make_uniq(*parent.sort_data); } + if (!parent.options_in_create_with.empty()) { + options_in_create_with = parent.options_in_create_with; + } CheckSupportedTypes(); auto changed_id = local_change.field_index; @@ -423,7 +429,7 @@ string GetPartitionColumnName(ColumnRefExpression &colref) { return colref.GetColumnName(); } -void DuckLakeTableEntry::ValidateSortExpressionColumns(DuckLakeTableEntry &table, const vector &orders) { +void DuckLakeTableEntry::ValidateSortExpressionColumns(const ColumnList &columns, const vector &orders) { vector missing_columns; for (auto &order : orders) { ParsedExpressionIterator::VisitExpression( @@ -433,7 +439,7 @@ void DuckLakeTableEntry::ValidateSortExpressionColumns(DuckLakeTableEntry &table "Unexpected qualified column reference - only unqualified columns are supported"); } string column_name = colref.GetColumnName(); - if (!table.ColumnExists(column_name)) { + if (!columns.ColumnExists(column_name)) { if (std::find(missing_columns.begin(), missing_columns.end(), column_name) == missing_columns.end()) { missing_columns.push_back(column_name); @@ -454,7 +460,8 @@ void DuckLakeTableEntry::ValidateSortExpressionColumns(DuckLakeTableEntry &table } } -DuckLakePartitionField GetPartitionField(DuckLakeTableEntry &table, ParsedExpression &expr) { +DuckLakePartitionField GetPartitionField(const ColumnList &columns, DuckLakeFieldData &field_data, + ParsedExpression &expr) { string column_name; DuckLakePartitionField field; @@ -525,28 +532,68 @@ DuckLakePartitionField GetPartitionField(DuckLakeTableEntry &table, ParsedExpres "Unsupported partition key %s - only identity columns and year/month/day/hour/bucket are supported", expr.ToString()); } - if (!table.ColumnExists(column_name)) { + if (!columns.ColumnExists(column_name)) { throw CatalogException("Unexpected partition key - column \"%s\" does not exist", column_name); } - auto &col = table.GetColumn(column_name); + auto &col = columns.GetColumn(column_name); PhysicalIndex column_index(col.StorageOid()); - auto &field_id = table.GetFieldData().GetByRootIndex(column_index); + auto &field_id = field_data.GetByRootIndex(column_index); field.field_id = field_id.GetFieldIndex(); return field; } -unique_ptr DuckLakeTableEntry::AlterTable(DuckLakeTransaction &transaction, SetPartitionedByInfo &info) { - auto create_info = GetInfo(); - auto &table_info = create_info->Cast(); - // create a complete copy of this table with the partition info added +static bool PartitionFieldsMatch(optional_ptr current_partition, + const DuckLakePartition &requested_partition) { + if (!current_partition) { + return requested_partition.fields.empty(); + } + if (current_partition->fields.size() != requested_partition.fields.size()) { + return false; + } + for (idx_t i = 0; i < current_partition->fields.size(); i++) { + if (!(current_partition->fields[i] == requested_partition.fields[i])) { + return false; + } + } + return true; +} + +unique_ptr +DuckLakeTableEntry::BuildPartitionData(DuckLakeTransaction &transaction, const ColumnList &columns, + DuckLakeFieldData &field_data, + const vector> &partition_keys) { + // Always returns a non-null DuckLakePartition. Empty partition_keys yields a partition with no fields, + // which is the shape RESET PARTITIONED BY (and SET PARTITIONED BY () with empty list) needs to drop the + // existing partition spec at commit time. CTAS callers that mean "no PARTITIONED BY clause at all" should + // short-circuit and not call this helper. auto partition_data = make_uniq(); partition_data->partition_id = transaction.GetLocalCatalogId(); - for (idx_t expr_idx = 0; expr_idx < info.partition_keys.size(); expr_idx++) { - auto &expr = *info.partition_keys[expr_idx]; - auto partition_field = GetPartitionField(*this, expr); + for (idx_t expr_idx = 0; expr_idx < partition_keys.size(); expr_idx++) { + auto &expr = *partition_keys[expr_idx]; + auto partition_field = GetPartitionField(columns, field_data, expr); + // Reject duplicate partition keys: two expressions that resolve to the same (field_id, transform). + // `(year(ts), month(ts))` is fine - same column, different transforms - but `(a, a)` is not. + for (auto &existing : partition_data->fields) { + // partition_key_index is intentionally excluded here: it gets assigned below, after the + // duplicate check. Two fields are duplicates if they resolve to the same (field_id, transform). + if (existing.field_id == partition_field.field_id && existing.transform == partition_field.transform) { + throw BinderException("Duplicate partition key: expression \"%s\" matches an earlier partition key", + expr.ToString()); + } + } partition_field.partition_key_index = expr_idx; partition_data->fields.push_back(partition_field); } + return partition_data; +} + +unique_ptr DuckLakeTableEntry::AlterTable(DuckLakeTransaction &transaction, SetPartitionedByInfo &info) { + auto create_info = GetInfo(); + auto &table_info = create_info->Cast(); + auto partition_data = BuildPartitionData(transaction, GetColumns(), GetFieldData(), info.partition_keys); + if (PartitionFieldsMatch(GetPartitionData(), *partition_data)) { + return nullptr; + } auto new_entry = make_uniq(*this, table_info, std::move(partition_data)); return std::move(new_entry); @@ -1195,24 +1242,16 @@ unique_ptr DuckLakeTableEntry::AlterTable(DuckLakeTransaction &tra return std::move(new_entry); } -unique_ptr DuckLakeTableEntry::AlterTable(DuckLakeTransaction &transaction, SetSortedByInfo &info) { - auto create_info = GetInfo(); - auto &table_info = create_info->Cast(); - - if (info.orders.empty()) { - // RESET SORTED BY - clear sort data - auto new_entry = make_uniq(*this, table_info, unique_ptr()); - return std::move(new_entry); +unique_ptr DuckLakeTableEntry::BuildSortData(DuckLakeTransaction &transaction, const ColumnList &columns, + const vector &orders) { + if (orders.empty()) { + return nullptr; } - - // Validate all column references in all sort expressions - ValidateSortExpressionColumns(*this, info.orders); - + ValidateSortExpressionColumns(columns, orders); auto sort_data = make_uniq(); sort_data->sort_id = transaction.GetLocalCatalogId(); - for (idx_t order_node_idx = 0; order_node_idx < info.orders.size(); order_node_idx++) { - auto &order_node = info.orders[order_node_idx]; - + for (idx_t order_node_idx = 0; order_node_idx < orders.size(); order_node_idx++) { + auto &order_node = orders[order_node_idx]; DuckLakeSortField sort_field; sort_field.sort_key_index = order_node_idx; sort_field.expression = order_node.expression->ToString(); @@ -1221,7 +1260,14 @@ unique_ptr DuckLakeTableEntry::AlterTable(DuckLakeTransaction &tra sort_field.null_order = order_node.null_order; sort_data->fields.push_back(sort_field); } + return sort_data; +} + +unique_ptr DuckLakeTableEntry::AlterTable(DuckLakeTransaction &transaction, SetSortedByInfo &info) { + auto create_info = GetInfo(); + auto &table_info = create_info->Cast(); + auto sort_data = BuildSortData(transaction, GetColumns(), info.orders); auto new_entry = make_uniq(*this, table_info, std::move(sort_data)); return std::move(new_entry); } diff --git a/src/storage/ducklake_transaction.cpp b/src/storage/ducklake_transaction.cpp index 7bef4034692..58c383b9537 100644 --- a/src/storage/ducklake_transaction.cpp +++ b/src/storage/ducklake_transaction.cpp @@ -1400,7 +1400,13 @@ DuckLakePartitionInfo DuckLakeTransaction::GetNewPartitionKey(DuckLakeCommitStat auto local_partition_id = partition_data->partition_id; auto partition_id = commit_state.commit_snapshot.next_catalog_id++; partition_key.id = partition_id; - partition_data->partition_id = partition_id; + // Do NOT mutate partition_data->partition_id here. On commit retry (concurrent metadata-write + // conflicts), GetNewPartitionKey runs again with a fresh commit_state. If the in-memory partition + // data carried the previous attempt's committed id, local_partition_id above would be wrong and + // committed_partition_ids would never contain the actual local sentinel that data files were + // stamped with — RemapPartitionId would then leak TRANSACTION_LOCAL_ID_START (= 2^63) into the + // SQL, which Postgres bigint rejects (SQLite silently truncates). Keeping the in-memory value as + // the local id makes GetNewPartitionKey idempotent across retries. for (auto &field : partition_data->fields) { DuckLakePartitionFieldInfo partition_field; partition_field.partition_key_index = field.partition_key_index; @@ -1449,7 +1455,10 @@ DuckLakeSortInfo DuckLakeTransaction::GetNewSortKey(DuckLakeCommitState &commit_ auto sort_id = commit_state.commit_snapshot.next_catalog_id++; sort_key.id = sort_id; - sort_data->sort_id = sort_id; + // Do NOT mutate sort_data->sort_id here — same retry-idempotency reasoning as GetNewPartitionKey. + // No downstream reader observes the mutated value (data files don't reference sort_id), so this + // is harmless today, but removing it keeps both code paths symmetric and prevents the same bug + // from being reintroduced if anyone adds a RemapSortId step later. for (auto &field : sort_data->fields) { DuckLakeSortFieldInfo sort_field; sort_field.sort_key_index = field.sort_key_index; @@ -1520,6 +1529,7 @@ struct NewTableInfo { vector new_columns; vector new_inlined_data_tables; vector new_sort_keys; + vector options_in_create_with; }; struct NewMacroInfo { @@ -1758,6 +1768,30 @@ void DuckLakeTransaction::GetNewTableInfo(DuckLakeCommitState &commit_state, Duc inlined_entry.uuid = latest_table.GetTableUUID(); inlined_entry.columns = latest_table.GetTableColumns(); result.new_inlined_data_tables.push_back(std::move(inlined_entry)); + + // CREATE TABLE ... PARTITIONED BY / SORTED BY + if (local_change.type == LocalChangeType::CREATED) { + if (table.GetPartitionData()) { + auto partition_key = GetNewPartitionKey(commit_state, table); + result.new_partition_keys.push_back(std::move(partition_key)); + } + if (table.GetSortData()) { + auto sort_key = GetNewSortKey(commit_state, table); + result.new_sort_keys.push_back(std::move(sort_key)); + } + } + // CREATE TABLE / CTAS WITH (...) options. AlterEntryInternal drops the CREATED entry from + // the transaction-local set on rename, so a CREATE+RENAME chain arrives here as a single + // RENAMED entry with a still-transaction-local id — emit whenever old_table_id is local. + if (old_table_id.IsTransactionLocal()) { + auto &options_in_create_with = table.GetOptionsInCreateWith(); + for (auto &tag : options_in_create_with) { + DuckLakeConfigOption opt; + opt.option = tag; + opt.table_id = new_table_id; + result.options_in_create_with.push_back(std::move(opt)); + } + } break; } default: @@ -2368,6 +2402,11 @@ string DuckLakeTransaction::CommitChanges(DuckLakeCommitState &commit_state, batch_queries += metadata_manager->WriteNewColumns(result.new_columns); batch_queries += metadata_manager->WriteNewInlinedTables(commit_snapshot, result.new_inlined_data_tables); batch_queries += metadata_manager->WriteNewSortKeys(commit_snapshot, result.new_sort_keys); + batch_queries += metadata_manager->WriteOptionsInCreateWith(result.options_in_create_with); + // re-key in-memory options from the local id to the committed id + for (auto &opt : result.options_in_create_with) { + ducklake_catalog.SetConfigOption(opt); + } new_tables_result = result.new_tables; new_inlined_data_tables_result = result.new_inlined_data_tables; } diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_basic.test b/test/sql/create_table_inline_partition_sort/create_table_inline_basic.test new file mode 100644 index 00000000000..38b97dd29f4 --- /dev/null +++ b/test/sql/create_table_inline_partition_sort/create_table_inline_basic.test @@ -0,0 +1,311 @@ +# name: test/sql/create_table_inline_partition_sort/create_table_inline_basic.test +# description: CREATE TABLE with inline PARTITIONED BY / SORTED BY clauses +# group: [create_table_inline_partition_sort] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION {TEST_DIR}/{UUID}.db + +test-env DATA_PATH {TEST_DIR} + +statement ok +ATTACH 'ducklake:{DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '{DATA_PATH}/inline_basic', METADATA_CATALOG 'ducklake_metadata', DATA_INLINING_ROW_LIMIT 0) + +statement ok +USE ducklake + +# Test 1: CREATE TABLE with inline PARTITIONED BY only (single column) +statement ok +CREATE TABLE t_part (i INTEGER, v VARCHAR) PARTITIONED BY (i) + +statement ok +INSERT INTO t_part VALUES (1, 'a'), (2, 'b'), (1, 'c') + +query II +SELECT i, v FROM t_part ORDER BY i, v +---- +1 a +1 c +2 b + +# verify partition metadata was persisted at CREATE time (no ALTER ran) +query I +SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_part') +---- +1 + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_part') +---- +1 + +# verify the on-disk Parquet files landed in hive-style partition-specific directories +# (DATA_INLINING_ROW_LIMIT 0 forces INSERT to write Parquet directly; one file per partition value) +query I +SELECT count(*) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_part') +---- +2 + +query I +SELECT DISTINCT regexp_extract(path, '.*(i=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_part') ORDER BY 1 +---- +i=1 +i=2 + +# Test 2: CREATE TABLE with inline SORTED BY only (single column) +statement ok +CREATE TABLE t_sort (i INTEGER, v VARCHAR) SORTED BY (i) + +statement ok +INSERT INTO t_sort VALUES (3, 'x'), (1, 'y'), (2, 'z') + +# t_sort has a single file (DATA_INLINING_ROW_LIMIT 0, no partitioning, one INSERT batch). +# SORTED BY (i) means rows are i-ascending in the file - SELECT without ORDER BY returns the +# file's row order directly, which IS the sort verification. +query II +SELECT i, v FROM t_sort +---- +1 y +2 z +3 x + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_sort') +---- +1 + +query II +SELECT sort_key_index, expression FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_sort') ORDER BY sort_key_index +---- +0 i + +# verify the inline path persisted the same defaults a bare ALTER TABLE ... SET SORTED BY (col) would +# (the metadata writer collapses OrderType::ASCENDING -> "ASC" and OrderByNullType::ORDER_DEFAULT -> "NULLS_LAST") +query III +SELECT sort_direction, null_order, dialect FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_sort') +---- +ASC NULLS_LAST duckdb + +# Test 3: CREATE TABLE with both clauses, PARTITIONED first +statement ok +CREATE TABLE t_both1 (a INTEGER, b INTEGER, v VARCHAR) PARTITIONED BY (a) SORTED BY (b) + +statement ok +INSERT INTO t_both1 VALUES (1, 30, 'p'), (2, 10, 'q'), (1, 20, 'r') + +# t_both1 is PARTITIONED BY (a), so each value of a lives in its own file. SORTED BY (b) means +# within each partition's file rows are b-ascending. Filter to one partition at a time and verify +# row order without ORDER BY - that order IS the on-disk row order of the partition's file. +query III +SELECT a, b, v FROM t_both1 WHERE a = 1 +---- +1 20 r +1 30 p + +# on-disk: one Parquet file per distinct partition value (a=1 and a=2), each in its own a=N directory +query I +SELECT DISTINCT regexp_extract(path, '.*(a=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_both1') ORDER BY 1 +---- +a=1 +a=2 + +# Test 4: CREATE TABLE with both clauses, SORTED first (verify either-order grammar) +statement ok +CREATE TABLE t_both2 (a INTEGER, b INTEGER) SORTED BY (b) PARTITIONED BY (a) + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_both2') +---- +1 + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_both2') +---- +1 + +# Test 5: multi-column partition + multi-column sort +statement ok +CREATE TABLE t_multi (a INTEGER, b INTEGER, c INTEGER) PARTITIONED BY (a, b) SORTED BY (c, a) + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_multi') +---- +2 + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_multi') +---- +2 + +statement ok +INSERT INTO t_multi VALUES (1, 1, 30), (1, 1, 10), (1, 2, 20), (2, 1, 5) + +# t_multi is PARTITIONED BY (a, b), so each (a, b) combination lives in its own file. SORTED BY +# (c, a) means rows within a partition's file are (c, a)-ascending. +query III +SELECT a, b, c FROM t_multi WHERE a = 1 AND b = 1 +---- +1 1 10 +1 1 30 + +# multi-column hive-style nesting: a=N/b=M/ +query II +SELECT regexp_extract(path, '.*(a=[0-9]+)[/\\].*', 1) AS a_dir, regexp_extract(path, '.*(b=[0-9]+)[/\\].*', 1) AS b_dir +FROM ducklake_metadata.ducklake_data_file +WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_multi') +ORDER BY a_dir, b_dir +---- +a=1 b=1 +a=1 b=2 +a=2 b=1 + +# the a directory must come *before* the b directory in the path (PARTITIONED BY (a, b) ordering) +query I +SELECT count(*) FROM ducklake_metadata.ducklake_data_file +WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_multi') + AND regexp_matches(path, 'a=[0-9]+/b=[0-9]+') +---- +3 + +# Test 6: PARTITIONED BY with a transform function +statement ok +CREATE TABLE t_transform (ts TIMESTAMP, v INTEGER) PARTITIONED BY (year(ts)) + +query II +SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_transform') +---- +0 year + +statement ok +INSERT INTO t_transform VALUES (TIMESTAMP '2023-06-01', 1), (TIMESTAMP '2023-09-15', 2), (TIMESTAMP '2024-03-10', 3) + +# year() transform produces hive directories named with the partition key NAME (year, not ts) +query I +SELECT DISTINCT regexp_extract(path, '.*(year=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_transform') ORDER BY 1 +---- +year=2023 +year=2024 + +# Test 7: PARTITIONED BY only writes partition metadata, NOT sort metadata (asymmetric) +statement ok +CREATE TABLE t_only_part (a INTEGER) PARTITIONED BY (a) + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_part') +---- +1 + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_part') +---- +0 + +statement ok +INSERT INTO t_only_part VALUES (10), (20), (10) + +query I +SELECT DISTINCT regexp_extract(path, '.*(a=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_part') ORDER BY 1 +---- +a=10 +a=20 + +# Test 8: SORTED BY only writes sort metadata, NOT partition metadata (asymmetric) +statement ok +CREATE TABLE t_only_sort (a INTEGER) SORTED BY (a) + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_sort') +---- +0 + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_sort') +---- +1 + +# Test 9: PARTITIONED BY with bucket(N, col) transform happy path +statement ok +CREATE TABLE t_bucket (id BIGINT, v VARCHAR) PARTITIONED BY (bucket(8, id)) + +query II +SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_bucket') +---- +0 bucket(8) + +statement ok +INSERT INTO t_bucket VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e'), (6, 'f'), (7, 'g'), (8, 'h') + +# bucket() transform writes into hive directories named "bucket=N" (modulo bucket count) +# we don't fix specific bucket assignments (hash-dependent) but we assert at least 1 and at most 8 distinct buckets +query I +SELECT count(DISTINCT regexp_extract(path, '.*(bucket=[0-9]+)[/\\].*', 1)) BETWEEN 1 AND 8 FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_bucket') +---- +true + +# every file must live in a bucket=N directory +query I +SELECT count(*) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_bucket') AND NOT regexp_matches(path, 'bucket=[0-9]+') +---- +0 + +# Test 10: same-transaction CREATE-inline + ALTER-set must NOT emit duplicate partition rows. +# The reverse-chain commit walk has to dedupe so only the latest (ALTER) wins. +statement ok +BEGIN + +statement ok +CREATE TABLE t_create_then_alter (a INTEGER, b INTEGER) PARTITIONED BY (a) SORTED BY (a) + +statement ok +ALTER TABLE t_create_then_alter SET PARTITIONED BY (b) + +statement ok +ALTER TABLE t_create_then_alter SET SORTED BY (b DESC) + +statement ok +COMMIT + +# exactly one partition_info row (the ALTER's), not two +query I +SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter') +---- +1 + +# the surviving partition row references column b (the ALTER's choice), not a (the inline create table's). +query III +SELECT pc.partition_key_index, c.column_name, pc.transform +FROM ducklake_metadata.ducklake_partition_column pc +JOIN ducklake_metadata.ducklake_column c + ON pc.table_id = c.table_id AND pc.column_id = c.column_id AND c.end_snapshot IS NULL +WHERE pc.table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter') +---- +0 b identity + +# exactly one sort_info row, and its expression is b DESC (the ALTER's), not a +query I +SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter') +---- +1 + +query III +SELECT expression, sort_direction, null_order FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter') +---- +b DESC NULLS_LAST + +# INSERT after the chain: data must land in b=N directories (ALTER won), NOT a=N directories +statement ok +INSERT INTO t_create_then_alter VALUES (1, 100), (2, 100), (3, 200) + +query I +SELECT DISTINCT regexp_extract(path, '.*(b=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter') ORDER BY 1 +---- +b=100 +b=200 + +# none of the files should be under an a=N directory (regression guard for the dedup logic) +query I +SELECT count(*) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter') AND regexp_matches(path, 'a=[0-9]+/') +---- +0 diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_ctas.test b/test/sql/create_table_inline_partition_sort/create_table_inline_ctas.test new file mode 100644 index 00000000000..d93155f7202 --- /dev/null +++ b/test/sql/create_table_inline_partition_sort/create_table_inline_ctas.test @@ -0,0 +1,248 @@ +# name: test/sql/create_table_inline_partition_sort/create_table_inline_ctas.test +# description: CREATE TABLE AS SELECT with inline PARTITIONED BY / SORTED BY clauses +# group: [create_table_inline_partition_sort] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION {TEST_DIR}/{UUID}.db + +test-env DATA_PATH {TEST_DIR} + +statement ok +ATTACH 'ducklake:{DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '{DATA_PATH}/inline_ctas', METADATA_CATALOG 'ducklake_metadata', DATA_INLINING_ROW_LIMIT 0) + +statement ok +USE ducklake + +# Test 1: CTAS with PARTITIONED BY only +statement ok +CREATE TABLE ctas_part PARTITIONED BY (i) AS SELECT range AS i, range::VARCHAR AS v FROM range(5) + +query II +SELECT i, v FROM ctas_part ORDER BY i +---- +0 0 +1 1 +2 2 +3 3 +4 4 + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_part') +---- +1 + +# CTAS must hive-partition on the initial write: 5 distinct values from range(5) -> 5 hive directories +query I +SELECT DISTINCT regexp_extract(path, '.*(i=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_part') ORDER BY 1 +---- +i=0 +i=1 +i=2 +i=3 +i=4 + +# Test 2: CTAS with SORTED BY only. +statement ok +CREATE TABLE ctas_sort SORTED BY (j) AS SELECT range AS i, 9 - range AS j FROM range(10) + +# ctas_sort is non-partitioned; CTAS produces a single file. SELECT without ORDER BY returns the +# file's row order directly. SORTED BY (j) means j-ascending; i is forced descending by construction. +query II +SELECT i, j FROM ctas_sort +---- +9 0 +8 1 +7 2 +6 3 +5 4 +4 5 +3 6 +2 7 +1 8 +0 9 + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_sort') +---- +1 + +# Test 3: CTAS with both clauses, PARTITIONED first. 30 rows partitioned into 3 groups of 10; +# sort key v = (29 - i) is the inverse of the SELECT's natural order so the sort has real work. +statement ok +CREATE TABLE ctas_both PARTITIONED BY (g) SORTED BY (v) AS +SELECT range % 3 AS g, 29 - range AS v FROM range(30) + +query II +SELECT + (SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_both')), + (SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_both')) +---- +1 1 + +# 3 distinct partition values -> 3 hive directories +query I +SELECT count(DISTINCT regexp_extract(path, '.*(g=[0-9]+)[/\\].*', 1)) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_both') +---- +3 + +# verify sort actually happened: each partition's file holds rows in v-ascending order +query II +SELECT g, v FROM ctas_both WHERE g = 0 +---- +0 2 +0 5 +0 8 +0 11 +0 14 +0 17 +0 20 +0 23 +0 26 +0 29 + +query II +SELECT g, v FROM ctas_both WHERE g = 1 +---- +1 1 +1 4 +1 7 +1 10 +1 13 +1 16 +1 19 +1 22 +1 25 +1 28 + +query II +SELECT g, v FROM ctas_both WHERE g = 2 +---- +2 0 +2 3 +2 6 +2 9 +2 12 +2 15 +2 18 +2 21 +2 24 +2 27 + +# Test 4: CTAS with SORTED first then PARTITIONED (either-order grammar). Same shape as Test 3 but +statement ok +CREATE TABLE ctas_swap SORTED BY (v) PARTITIONED BY (g) AS +SELECT range % 3 AS g, 29 - range AS v FROM range(30) + +query II +SELECT + (SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_swap')), + (SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_swap')) +---- +1 1 + +query I +SELECT count(DISTINCT regexp_extract(path, '.*(g=[0-9]+)[/\\].*', 1)) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_swap') +---- +3 + +# partition g=1's file +query II +SELECT g, v FROM ctas_swap WHERE g = 1 +---- +1 1 +1 4 +1 7 +1 10 +1 13 +1 16 +1 19 +1 22 +1 25 +1 28 + +# Test 5: CTAS partitioning by a transform expression +statement ok +CREATE TABLE ctas_transform PARTITIONED BY (year(ts)) AS +SELECT TIMESTAMP '2024-01-01' + INTERVAL (i) YEAR AS ts, i AS v FROM range(3) t(i) + +query II +SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_transform') +---- +0 year + +# CTAS with year() transform must hive-partition on the initial write +query I +SELECT DISTINCT regexp_extract(path, '.*(year=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_transform') ORDER BY 1 +---- +year=2024 +year=2025 +year=2026 + +# Test 6: CTAS with multi-column SORTED BY using string and integer columns +statement ok +CREATE TABLE ctas_multi SORTED BY (a, b) AS +SELECT i AS a, ('tag_' || i) AS b FROM range(4) t(i) + +# ctas_multi is non-partitioned; CTAS produces a single file. SORTED BY (a, b) means rows are +# (a, b)-ascending in that file - SELECT without ORDER BY returns the file's actual row order. +query II +SELECT a, b FROM ctas_multi +---- +0 tag_0 +1 tag_1 +2 tag_2 +3 tag_3 + +query II +SELECT sort_key_index, expression FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_multi') ORDER BY sort_key_index +---- +0 a +1 b + +# Test 7: CTAS with a CTE source (regression: planning shape differs from a flat SELECT) +statement ok +CREATE TABLE ctas_cte PARTITIONED BY (g) SORTED BY (v) AS +WITH src AS (SELECT i % 3 AS g, i AS v FROM range(9) t(i)) +SELECT g, v FROM src + +query II +SELECT g, count(*) FROM ctas_cte GROUP BY g ORDER BY g +---- +0 3 +1 3 +2 3 + +# ctas_cte is PARTITIONED BY (g), so each value of g lives in its own file. SORTED BY (v) means +# rows within each partition's file are v-ascending. Filter to one partition; the row order +# returned IS the on-disk file order. +query II +SELECT g, v FROM ctas_cte WHERE g = 0 +---- +0 0 +0 3 +0 6 + +query II +SELECT g, v FROM ctas_cte WHERE g = 1 +---- +1 1 +1 4 +1 7 + +query II +SELECT g, v FROM ctas_cte WHERE g = 2 +---- +2 2 +2 5 +2 8 + +# CTE-sourced CTAS must still partition on the initial write (regression: planner passes the spec through) +query I +SELECT DISTINCT regexp_extract(path, '.*(g=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_cte') ORDER BY 1 +---- +g=0 +g=1 +g=2 diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_ctas_sort_verification.test b/test/sql/create_table_inline_partition_sort/create_table_inline_ctas_sort_verification.test new file mode 100644 index 00000000000..8aaad79b5b0 --- /dev/null +++ b/test/sql/create_table_inline_partition_sort/create_table_inline_ctas_sort_verification.test @@ -0,0 +1,88 @@ +# name: test/sql/create_table_inline_partition_sort/create_table_inline_ctas_sort_verification.test +# description: Verify SORTED BY actually sorts data during CTAS by inspecting parquet rowgroup statistics. +# group: [create_table_inline_partition_sort] + +# Uses 3000+ rows across multiple 2048-row rowgroups; sort column is the INVERSE of the SELECT's +# natural order so any "sort skipped" regression would leave rowgroups in descending order. + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION {TEST_DIR}/{UUID}.db + +test-env DATA_PATH {TEST_DIR} + +statement ok +ATTACH 'ducklake:{DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '{DATA_PATH}/inline_ctas_sort_verify', METADATA_CATALOG 'ducklake_metadata', DATA_INLINING_ROW_LIMIT 0) + +# 2048 is the minimum parquet rowgroup size in DuckDB; pick it so 3000 rows produce >=2 rowgroups. +statement ok +CALL ducklake.set_option('parquet_row_group_size', 2048) + +# Single-threaded + preserved insertion order keeps rowgroup boundaries deterministic, so we can assert +# specific stats_min values per rowgroup rather than an order-tolerant "monotonic" check. +statement ok +SET threads=1 + +statement ok +SET preserve_insertion_order=true + +statement ok +USE ducklake + +# CTAS feeding from range(3000): natural SELECT order is `a` ascending. Sort key `b` is `2999 - a` so its +# natural in-stream order is strictly descending. If SORTED BY (b) is honored at CTAS time, the data is +# resorted to b ascending and the per-rowgroup stats_min/max for `b` will be disjoint and increasing. +statement ok +CREATE TABLE sort_verify SORTED BY (b) AS +SELECT range AS a, (2999 - range) AS b FROM range(3000) + +# logical correctness: data round-trips +query I +SELECT count(*) FROM sort_verify +---- +3000 + +# Confirm we got at least 2 rowgroups (otherwise the per-rowgroup-stats check is meaningless). +query I +SELECT count(DISTINCT row_group_id) >= 2 +FROM parquet_metadata('{DATA_PATH}/inline_ctas_sort_verify/main/sort_verify/*.parquet') +WHERE path_in_schema = 'b' +---- +true + +# ---------------------------------------------------------------------------------------------------------------------- +# Hardcoded per-rowgroup min/max sanity check on `sort_verify`. +# Layout: 3000 rows, parquet_row_group_size=2048, single thread, preserve_insertion_order=true. After SORT BY b ASC: +# ---------------------------------------------------------------------------------------------------------------------- + +# column `b` +query III +SELECT row_group_id, stats_min::INTEGER AS rg_min, stats_max::INTEGER AS rg_max +FROM parquet_metadata('{DATA_PATH}/inline_ctas_sort_verify/main/sort_verify/*.parquet') +WHERE path_in_schema = 'b' +ORDER BY row_group_id +---- +0 0 2047 +1 2048 2999 + +# column `a` +query III +SELECT row_group_id, stats_min::INTEGER AS rg_min, stats_max::INTEGER AS rg_max +FROM parquet_metadata('{DATA_PATH}/inline_ctas_sort_verify/main/sort_verify/*.parquet') +WHERE path_in_schema = 'a' +ORDER BY row_group_id +---- +0 952 2999 +1 0 951 + +# row counts per rowgroup confirm the 2048 + 952 split +query II +SELECT row_group_id, row_group_num_rows +FROM parquet_metadata('{DATA_PATH}/inline_ctas_sort_verify/main/sort_verify/*.parquet') +WHERE path_in_schema = 'b' +ORDER BY row_group_id +---- +0 2048 +1 952 diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_errors.test b/test/sql/create_table_inline_partition_sort/create_table_inline_errors.test new file mode 100644 index 00000000000..e5bda9a2439 --- /dev/null +++ b/test/sql/create_table_inline_partition_sort/create_table_inline_errors.test @@ -0,0 +1,96 @@ +# name: test/sql/create_table_inline_partition_sort/create_table_inline_errors.test +# description: validation errors for inline PARTITIONED BY / SORTED BY on CREATE TABLE / CTAS +# group: [create_table_inline_partition_sort] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION {TEST_DIR}/{UUID}.db + +test-env DATA_PATH {TEST_DIR} + +statement ok +ATTACH 'ducklake:{DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '{DATA_PATH}/inline_errors', METADATA_CATALOG 'ducklake_metadata', DATA_INLINING_ROW_LIMIT 0) + +statement ok +USE ducklake + +# PARTITIONED BY references nonexistent column +statement error +CREATE TABLE bad_part (a INTEGER) PARTITIONED BY (nonexistent) +---- +column "nonexistent" does not exist + +# SORTED BY references nonexistent column +statement error +CREATE TABLE bad_sort (a INTEGER) SORTED BY (nonexistent) +---- +:.*nonexistent.* + +# WITH (...) accepts known DuckLake config option keys; unknown keys are rejected during validation +statement error +CREATE TABLE bad_with (a INTEGER) WITH (some_option='x') +---- +:.*Unsupported option.* + +# unsupported transform function +statement error +CREATE TABLE bad_transform (a INTEGER) PARTITIONED BY (foo(a)) +---- +:.*Unsupported partition function.* + +# malformed bucket() expression - missing args +statement error +CREATE TABLE bad_bucket (a INTEGER) PARTITIONED BY (bucket(a)) +---- +Expected bucket(bucket_count, column) + +# duplicate partition key (same column with same transform) is rejected +statement error +CREATE TABLE bad_dup (a INTEGER) PARTITIONED BY (a, a) +---- +Duplicate partition key + +# CTAS variants: SORTED BY references nonexistent column +statement error +CREATE TABLE bad_ctas SORTED BY (nonexistent) AS SELECT 1 AS a +---- +:.*nonexistent.* + +# CTAS variants: PARTITIONED BY references nonexistent column +statement error +CREATE TABLE bad_ctas_part PARTITIONED BY (nonexistent) AS SELECT 1 AS a +---- +:.*nonexistent.* + +# verify the failing CREATEs left no metadata behind +query I +SELECT count(*) FROM ducklake_metadata.ducklake_partition_info +---- +0 + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_sort_info +---- +0 + +# A successful CREATE after failures still works (no transaction state corruption) +statement ok +CREATE TABLE ok_after_errors (a INTEGER) PARTITIONED BY (a) + +query I +SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ok_after_errors') +---- +1 + +# Same column with DIFFERENT transforms is allowed (e.g. year + month of the same timestamp). +# This must NOT be flagged as a duplicate partition key. +statement ok +CREATE TABLE ok_diff_transforms (ts TIMESTAMP) PARTITIONED BY (year(ts), month(ts)) + +query II +SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ok_diff_transforms') ORDER BY partition_key_index +---- +0 year +1 month diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test b/test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test new file mode 100644 index 00000000000..d04656cfc00 --- /dev/null +++ b/test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test @@ -0,0 +1,453 @@ +# name: test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test +# description: Transaction semantics for inline PARTITIONED BY / SORTED BY on CREATE TABLE / CTAS: +# rollback discards everything (entry, files, transaction-local ids); concurrent CTAS with +# distinct names cannot conflict; concurrent CTAS with the same name produces a commit-time +# conflict; +# group: [create_table_inline_partition_sort] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION {TEST_DIR}/{UUID}.db + +test-env DATA_PATH {TEST_DIR} + +statement ok +ATTACH 'ducklake:{DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '{DATA_PATH}/inline_tx', METADATA_CATALOG 'ducklake_meta', DATA_INLINING_ROW_LIMIT 0) + +statement ok +SET immediate_transaction_mode=true + +statement ok +USE ducklake + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 1: ROLLBACK after a successful inline-partitioned CREATE+INSERT. +# Rollback must discard the table entry, the planning-time-allocated partition_id/sort_id, and the data files. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +BEGIN + +statement ok +CREATE TABLE rollback_part (i INTEGER, v VARCHAR) PARTITIONED BY (i) + +statement ok +INSERT INTO rollback_part VALUES (1, 'a'), (2, 'b') + +# table is visible inside the transaction +query I +SELECT count(*) FROM rollback_part +---- +2 + +statement ok +ROLLBACK + +# table is gone after rollback +statement error +SELECT * FROM rollback_part +---- +:.*not exist.* + +# the next inline CTAS proceeds cleanly - no leftover transaction-local catalog state poisoning subsequent +# planning (regression guard for the planning-time partition_id allocator). +statement ok +CREATE TABLE post_rollback PARTITIONED BY (a) AS SELECT 1 AS a + +query I +SELECT * FROM post_rollback +---- +1 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 2: ROLLBACK after CTAS with both inline clauses. The most invasive shape - exercises the full +# planning-time field_data + partition_data + sort_data construction, the sink-time CreateTableExtended +# branch, the data-file write, and the rollback cleanup. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +BEGIN + +statement ok +CREATE TABLE rollback_ctas PARTITIONED BY (g) SORTED BY (v) AS +SELECT i % 3 AS g, i AS v FROM range(9) t(i) + +query I +SELECT count(*) FROM rollback_ctas +---- +9 + +statement ok +ROLLBACK + +statement error +SELECT * FROM rollback_ctas +---- +:.*not exist.* + +# verify the metadata catalog never saw the rolled-back partition_info / sort_info rows +query I +SELECT count(*) FROM ducklake_meta.ducklake_partition_info +WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'rollback_ctas') +---- +0 + +query I +SELECT count(*) FROM ducklake_meta.ducklake_sort_info +WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'rollback_ctas') +---- +0 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 3: ROLLBACK + same-name CREATE in a fresh transaction succeeds. The rolled-back table_id / +# partition_id were transaction-local and should not leak into the next transaction's namespace. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +BEGIN + +statement ok +CREATE TABLE name_reuse PARTITIONED BY (a) AS SELECT 1 AS a, 'x' AS v + +statement ok +ROLLBACK + +# Same name, same partition column - works because the rollback wiped the prior attempt entirely. +statement ok +CREATE TABLE name_reuse PARTITIONED BY (a) AS SELECT 99 AS a, 'y' AS v + +query II +SELECT a, v FROM name_reuse +---- +99 y + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 4: Two concurrent transactions both CREATE TABLE foo PARTITIONED BY ... AS SELECT with the SAME +# name. First to commit wins; second errors at COMMIT time. This is a name-collision conflict - the new +# inline clauses don't introduce a different conflict mode. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +CREATE TABLE ducklake.conflict_same_name PARTITIONED BY (i) AS SELECT 1 AS i + +statement ok con2 +CREATE TABLE ducklake.conflict_same_name SORTED BY (i) AS SELECT 2 AS i + +statement ok con1 +COMMIT + +statement error con2 +COMMIT +---- + +# con1's table is the surviving one - check its inline partition spec landed +query I +SELECT count(*) FROM ducklake_meta.ducklake_partition_info +WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'conflict_same_name') +---- +1 + +query I +SELECT * FROM ducklake.conflict_same_name +---- +1 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 5: Two concurrent CTAS-with-inline-spec with DIFFERENT names. No conflict - each transaction +# allocates partition_id from its own counter; commit-time remap assigns globally-unique committed ids. +# This proves the planning-time partition_id allocation isn't a serialization point. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok con1 +BEGIN + +statement ok con2 +BEGIN + +statement ok con1 +CREATE TABLE ducklake.concurrent_a PARTITIONED BY (i) AS SELECT range AS i, 'a' AS src FROM range(5) + +statement ok con2 +CREATE TABLE ducklake.concurrent_b PARTITIONED BY (i) SORTED BY (i) AS SELECT range AS i, 'b' AS src FROM range(7) + +statement ok con1 +COMMIT + +statement ok con2 +COMMIT + +# both tables exist and have their own partition_info rows +query II +SELECT + (SELECT count(*) FROM ducklake_meta.ducklake_partition_info + WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'concurrent_a')), + (SELECT count(*) FROM ducklake_meta.ducklake_partition_info + WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'concurrent_b')) +---- +1 1 + +# the two tables have DISTINCT committed partition_ids (proves the commit-time remap deduplicates the +# transaction-local ids that started identical at planning time on each connection). +query I +SELECT count(DISTINCT partition_id) FROM ducklake_meta.ducklake_partition_info +WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name IN ('concurrent_a', 'concurrent_b')) +---- +2 + +query II +SELECT count(*), max(i) FROM ducklake.concurrent_a +---- +5 4 + +query II +SELECT count(*), max(i) FROM ducklake.concurrent_b +---- +7 6 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 6: Same-transaction inline CTAS + ALTER SET PARTITIONED BY + COMMIT. Last writer wins (the +# metadata-layer dedup collapses by table_id; final on-disk state references the ALTER's partition column). +# This is regression coverage for the chain-walk behavior change in this branch. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +BEGIN + +statement ok +CREATE TABLE chain_winner PARTITIONED BY (a) SORTED BY (a) AS +SELECT range AS a, range + 100 AS b FROM range(3) + +statement ok +ALTER TABLE chain_winner SET PARTITIONED BY (b) + +statement ok +ALTER TABLE chain_winner SET SORTED BY (b DESC) + +statement ok +COMMIT + +# exactly one partition_info row (the last ALTER's) and one sort_info row (the last ALTER's) +query I +SELECT count(*) FROM ducklake_meta.ducklake_partition_info +WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'chain_winner') +---- +1 + +query I +SELECT count(*) FROM ducklake_meta.ducklake_sort_info +WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'chain_winner') +---- +1 + +# the surviving sort row is `b DESC`, not the inline `a ASC` +query III +SELECT expression, sort_direction, null_order FROM ducklake_meta.ducklake_sort_expression +WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'chain_winner') +---- +b DESC NULLS_LAST + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 7: CTAS that errors mid-execution because of a runtime constraint violation. The transaction must +# unwind cleanly - no zombie entry, no partition_info row +# ---------------------------------------------------------------------------------------------------------------------- + +statement error +CREATE TABLE error_ctas PARTITIONED BY (i) AS +SELECT CAST(range AS UTINYINT) AS i FROM range(300) +---- +:.*[Cc]onversion.*out of range.* + +# the failed CTAS must not have partially registered the table +statement error +SELECT * FROM error_ctas +---- +:.*not exist.* + +query I +SELECT count(*) FROM ducklake_meta.ducklake_partition_info +WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'error_ctas') +---- +0 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 8: CTAS that errors at the BINDER stage (unknown column in PARTITIONED BY). This fails in +# PlanCreateTableAs *before* any planning-time partition_id allocation completes, so transaction state +# must be uncorrupted. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +BEGIN + +statement error +CREATE TABLE binder_error PARTITIONED BY (nonexistent) AS SELECT 1 AS i +---- +:.*nonexistent.* + +# subsequent statements in the same transaction still work +statement ok +CREATE TABLE recovers_after_binder_error (i INTEGER) PARTITIONED BY (i) + +statement ok +INSERT INTO recovers_after_binder_error VALUES (42) + +statement ok +COMMIT + +query I +SELECT i FROM recovers_after_binder_error +---- +42 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 9: Long transaction that mixes inline CTAS + plain INSERT into other tables. Verifies the inline +# clauses don't disturb the transaction's broader commit logic. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +CREATE TABLE existing_target (k INTEGER, v VARCHAR) + +statement ok +BEGIN + +statement ok +INSERT INTO existing_target VALUES (1, 'first') + +statement ok +CREATE TABLE long_tx_ctas PARTITIONED BY (g) AS SELECT i AS g, i + 100 AS v FROM range(4) t(i) + +statement ok +INSERT INTO existing_target VALUES (2, 'second') + +statement ok +INSERT INTO long_tx_ctas VALUES (10, 110), (11, 111) + +statement ok +COMMIT + +query II +SELECT k, v FROM existing_target ORDER BY k +---- +1 first +2 second + +query II +SELECT g, count(*) FROM long_tx_ctas GROUP BY g ORDER BY g +---- +0 1 +1 1 +2 1 +3 1 +10 1 +11 1 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 10: Two inline CTAS in one transaction, ROLLBACK. Both tables AND both partition specs unwind. +# Guards against any per-CTAS state that might accumulate beyond the transaction-local catalog set. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +BEGIN + +statement ok +CREATE TABLE rb_a PARTITIONED BY (i) AS SELECT range AS i FROM range(3) + +statement ok +CREATE TABLE rb_b PARTITIONED BY (j) SORTED BY (j) AS SELECT range AS j FROM range(4) + +statement ok +ROLLBACK + +statement error +SELECT * FROM rb_a +---- +:.*not exist.* + +statement error +SELECT * FROM rb_b +---- +:.*not exist.* + +# neither rolled-back table left a partition_info row +query I +SELECT count(*) FROM ducklake_meta.ducklake_partition_info +WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name IN ('rb_a','rb_b')) +---- +0 + +# subsequent CTAS in fresh transaction succeeds and gets fresh ids - no collision with rolled-back local ids +statement ok +CREATE TABLE rb_after PARTITIONED BY (i) AS SELECT 1 AS i + +query I +SELECT * FROM rb_after +---- +1 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 11: Snapshot isolation. con2 begins BEFORE con1's inline-partitioned CTAS commits. con2 must NOT +# observe the new table even though it's already committed in the metadata DB. The partition_info row's +# begin_snapshot is the visibility gate. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok con2 +BEGIN + +statement ok con1 +CREATE TABLE ducklake.snap_iso PARTITIONED BY (i) AS SELECT 1 AS i + +# con2's snapshot predates the CTAS commit +statement error con2 +SELECT * FROM ducklake.snap_iso +---- +:.*not exist.* + +statement ok con2 +COMMIT + +# new transaction sees it +query I +SELECT * FROM ducklake.snap_iso +---- +1 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 12: Same-transaction INSERT after inline CTAS uses the partition spec. After commit, every data +# file (CTAS-written + INSERT-written) carries a partition_id, and on-disk paths are hive-partitioned. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +BEGIN + +statement ok +CREATE TABLE same_tx_part PARTITIONED BY (g) AS SELECT range AS g, range + 100 AS v FROM range(3) + +statement ok +INSERT INTO same_tx_part VALUES (10, 110), (10, 111), (11, 112) + +statement ok +COMMIT + +# every data file references the partition_id - CTAS-written and INSERT-written alike +query I +SELECT count(*) FROM ducklake_meta.ducklake_data_file +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name='same_tx_part') + AND partition_id IS NULL +---- +0 + +query II +SELECT g, count(*) FROM same_tx_part GROUP BY g ORDER BY g +---- +0 1 +1 1 +2 1 +10 2 +11 1 diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test b/test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test new file mode 100644 index 00000000000..7538a72c6da --- /dev/null +++ b/test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test @@ -0,0 +1,215 @@ +# name: test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test +# description: CTAS with inline PARTITIONED BY / SORTED BY interacting with the data-inlining row limit. +# When a CTAS's row count is below DATA_INLINING_ROW_LIMIT, rows go to the per-table +# ducklake_inlined_data__ catalog table instead of Parquet. Partition and sort +# metadata must still be recorded; the inline operator coordinates with the about-to-be-created +# table_id at sink-state-init time. Once a flush is called, inlined rows materialize as +# partition-aware Parquet files. (On-disk hive directory verification is intentionally omitted - +# that's covered by create_table_inline_basic.test / create_table_inline_ctas.test.) +# group: [create_table_inline_partition_sort] + +require ducklake + +require parquet + +test-env DUCKLAKE_CONNECTION {TEST_DIR}/{UUID}.db + +test-env DATA_PATH {TEST_DIR} + +# DATA_INLINING_ROW_LIMIT is non-zero (the historic default-shaped CTAS path). Pick 1000 so we can write +# both fully-inlined and overflow-to-Parquet cases below. +statement ok +ATTACH 'ducklake:{DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '{DATA_PATH}/inline_with_inlining', METADATA_CATALOG 'ducklake_meta', DATA_INLINING_ROW_LIMIT 1000) + +statement ok +USE ducklake + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 1: CTAS that fits entirely under DATA_INLINING_ROW_LIMIT, with PARTITIONED BY (i). +# Rows go to ducklake_inlined_data__*; ducklake_data_file stays empty for this table. +# Partition metadata is still recorded - the partition_id is allocated at planning time and bound to the +# table entry at sink-state-init regardless of whether the rows landed in Parquet or in the inlined table. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +CREATE TABLE small_part PARTITIONED BY (i) AS +SELECT range AS i, range::VARCHAR AS v FROM range(50) + +# data round-trips (rows readable whether inlined or in Parquet) +query I +SELECT count(*) FROM small_part +---- +50 + +# partition metadata recorded for the inlined CTAS +query I +SELECT count(*) FROM ducklake_meta.ducklake_partition_info +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part') +---- +1 + +query II +SELECT partition_key_index, transform FROM ducklake_meta.ducklake_partition_column +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part') +---- +0 identity + +# zero data_file rows: rows are sitting in the inlined-data catalog table, not on Parquet +query I +SELECT count(*) FROM ducklake_meta.ducklake_data_file +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part') +---- +0 + +# the per-table inlined-data table exists in the metadata catalog +query I +SELECT count(*) FROM ducklake_meta.ducklake_inlined_data_tables +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part') +---- +1 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 2: CTAS that fits under DATA_INLINING_ROW_LIMIT with both PARTITIONED BY and SORTED BY clauses. +# Both partition and sort metadata land in their respective catalog tables even though the data is inlined. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +CREATE TABLE small_both PARTITIONED BY (g) SORTED BY (v) AS +SELECT i % 4 AS g, i AS v FROM range(20) t(i) + +query I +SELECT count(*) FROM small_both +---- +20 + +query I +SELECT count(*) FROM ducklake_meta.ducklake_partition_info +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_both') +---- +1 + +query I +SELECT count(*) FROM ducklake_meta.ducklake_sort_info +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_both') +---- +1 + +# the sort_expression row carries the planning-time-allocated direction/null_order +query III +SELECT expression, sort_direction, null_order FROM ducklake_meta.ducklake_sort_expression +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_both') +---- +v ASC NULLS_LAST + +# fully inlined: zero Parquet files +query I +SELECT count(*) FROM ducklake_meta.ducklake_data_file +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_both') +---- +0 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 3: Flushing inlined data after a fully-inlined partitioned CTAS. +# `ducklake_flush_inlined_data` materializes the inlined rows as Parquet files using the table's +# partition spec. Post-flush, ducklake_data_file gains rows that reference the partition_id. +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +CALL ducklake_flush_inlined_data('ducklake') + +# at least one data file now exists for small_part, and each carries a partition_id (the inlining-then-flush +# path hands the spec through correctly) +query I +SELECT count(*) > 0 FROM ducklake_meta.ducklake_data_file +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part') +---- +true + +query I +SELECT count(*) FROM ducklake_meta.ducklake_data_file +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part') + AND partition_id IS NULL +---- +0 + +# data still visible after flush +query I +SELECT count(*) FROM small_part +---- +50 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 4: CTAS that OVERFLOWS DATA_INLINING_ROW_LIMIT (1000). 5000 rows -> the operator routes everything +# above the limit to Parquet. The partition spec applies to the Parquet writes; the sort spec applies to +# the row stream BEFORE the inline operator (sort_on_insert=true default). +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +CREATE TABLE overflow_part PARTITIONED BY (g) SORTED BY (v) AS +SELECT i % 8 AS g, i AS v FROM range(5000) t(i) + +query I +SELECT count(*) FROM overflow_part +---- +5000 + +# partition + sort metadata persist +query II +SELECT + (SELECT count(*) FROM ducklake_meta.ducklake_partition_info + WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part')), + (SELECT count(*) FROM ducklake_meta.ducklake_sort_info + WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part')) +---- +1 1 + +# overflow path: at least one Parquet file was written, and each has a non-null partition_id +query I +SELECT count(*) > 0 FROM ducklake_meta.ducklake_data_file +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part') +---- +true + +query I +SELECT count(*) FROM ducklake_meta.ducklake_data_file +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part') + AND partition_id IS NULL +---- +0 + +# ---------------------------------------------------------------------------------------------------------------------- +# Test 5: Flushing the overflow_part table (which had data both inlined and on Parquet). After flush, all +# rows should be on Parquet with non-null partition_id, and the inlined-data table count for it goes to 0 +# (or the inlined-data tables are cleared per-table by the flush). +# ---------------------------------------------------------------------------------------------------------------------- + +statement ok +CALL ducklake_flush_inlined_data('ducklake') + +# overflow_part: zero rows in any inlined-data table after flush +query I +SELECT count(*) FROM ducklake_meta.ducklake_data_file +WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part') + AND partition_id IS NULL +---- +0 + +# the row count is preserved end-to-end (inlined → flushed → Parquet) +query I +SELECT count(*) FROM overflow_part +---- +5000 + +# small_both: same checks - the partition+sort spec persists through inlining + flush +query I +SELECT count(*) FROM small_both +---- +20 + +query II +SELECT g, count(*) FROM small_both GROUP BY g ORDER BY g +---- +0 5 +1 5 +2 5 +3 5 diff --git a/test/sql/settings/create_table_with_options.test b/test/sql/settings/create_table_with_options.test new file mode 100644 index 00000000000..b71b67e3cf4 --- /dev/null +++ b/test/sql/settings/create_table_with_options.test @@ -0,0 +1,274 @@ +# name: test/sql/settings/create_table_with_options.test +# description: Test CREATE TABLE / CTAS with WITH (...) table-scoped config options +# group: [settings] + +require ducklake + +require parquet + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_create_with_options.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_create_with_options_files'); + +# CREATE TABLE with a single WITH option persists to ducklake_metadata +statement ok +CREATE TABLE ducklake.t_compress(i INTEGER) WITH (parquet_compression='zstd'); + +query III +SELECT option_name, value, scope FROM ducklake.options() WHERE option_name='parquet_compression' AND scope='TABLE' AND scope_entry='main.t_compress' +---- +parquet_compression zstd TABLE + +# inserting into the table uses the table-scoped option +statement ok +INSERT INTO ducklake.t_compress SELECT range FROM range(1000) + +query I +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_compress/**') +---- +ZSTD + +# CREATE TABLE with multiple WITH options +statement ok +CREATE TABLE ducklake.t_multi(i INTEGER) WITH (parquet_compression='gzip', parquet_row_group_size=512); + +query III +SELECT option_name, value, scope FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_multi' ORDER BY option_name +---- +parquet_compression gzip TABLE +parquet_row_group_size 512 TABLE + +# CTAS with WITH options - validates the option both takes effect and is persisted +statement ok +SET threads=1 + +statement ok +SET preserve_insertion_order=false + +statement ok +CREATE TABLE ducklake.t_ctas WITH (parquet_compression='gzip', parquet_row_group_size=2048) AS SELECT i, 'hello' || i s FROM range(4000) t(i); + +query III +SELECT option_name, value, scope FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_ctas' ORDER BY option_name +---- +parquet_compression gzip TABLE +parquet_row_group_size 2048 TABLE + +# parquet files written by the CTAS use the configured compression +query I +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_ctas/**') +---- +GZIP + +# 4000 rows at row_group_size=2048 produces at least 2 row groups +query I +SELECT COUNT(DISTINCT row_group_id) >= 2 FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_ctas/**') +---- +true + +# unsupported option key raises an error +statement error +CREATE TABLE ducklake.t_bad(i INTEGER) WITH (not_a_real_option='x'); +---- +Unsupported option + +# bad value also fails (parquet_compression rejects unknown codec) +statement error +CREATE TABLE ducklake.t_bad(i INTEGER) WITH (parquet_compression='bogus'); +---- +Unsupported codec + +statement ok +DETACH ducklake + +# options survive a restart +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_create_with_options.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_create_with_options_files'); + +query I +SELECT value FROM ducklake.options() WHERE option_name='parquet_compression' AND scope='TABLE' AND scope_entry='main.t_compress' +---- +zstd + +# CREATE OR REPLACE swaps in new options for the new table +statement ok +CREATE TABLE ducklake.t_repl(i INTEGER) WITH (parquet_compression='zstd'); + +statement ok +CREATE OR REPLACE TABLE ducklake.t_repl(i INTEGER) WITH (parquet_compression='gzip'); + +query I +SELECT value FROM ducklake.options() WHERE option_name='parquet_compression' AND scope='TABLE' AND scope_entry='main.t_repl' +---- +gzip + +# CREATE TABLE IF NOT EXISTS does not mutate options on an existing table +statement ok +CREATE TABLE ducklake.t_ine(i INTEGER) WITH (parquet_compression='zstd'); + +statement ok +CREATE TABLE IF NOT EXISTS ducklake.t_ine(i INTEGER) WITH (parquet_compression='gzip'); + +query I +SELECT value FROM ducklake.options() WHERE option_name='parquet_compression' AND scope='TABLE' AND scope_entry='main.t_ine' +---- +zstd + +# WITH (...) on a CTAS beats a schema-scope option for the same key +statement ok +CREATE SCHEMA ducklake.s_prec + +statement ok +CALL ducklake.set_option('parquet_compression', 'gzip', schema => 's_prec') + +statement ok +CREATE TABLE ducklake.s_prec.t_prec WITH (parquet_compression='zstd') AS SELECT i FROM range(1000) t(i) + +query I +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/s_prec/t_prec/**') +---- +ZSTD + +# option keys are case-insensitive and the value is canonicalized to lower +statement ok +CREATE TABLE ducklake.t_case(i INTEGER) WITH (PARQUET_COMPRESSION='GZIP') + +query II +SELECT option_name, value FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_case' +---- +parquet_compression gzip + +# foldable expressions (function calls) are accepted and folded to a literal +statement ok +CREATE TABLE ducklake.t_fold(i INTEGER) WITH (parquet_compression=upper('zstd')) + +query II +SELECT option_name, value FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_fold' +---- +parquet_compression zstd + +# session variables are read at bind time via getvariable(...) and feed the WITH clause +statement ok +SET VARIABLE an_option = (SELECT 'zstd') + +statement ok +CREATE OR REPLACE TABLE ducklake.t_var WITH (parquet_compression=getvariable('an_option')) AS FROM range(42) + +query II +SELECT option_name, value FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_var' +---- +parquet_compression zstd + +query I +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_var/**') +---- +ZSTD + +# expressions that cannot fold to a literal (e.g. a column reference with no FROM) are rejected +statement error +CREATE TABLE ducklake.t_bad_col(i INTEGER) WITH (parquet_compression=some_unknown_column) +---- +some_unknown_column + +# preserve_insertion_order=true keeps rowgroup boundaries deterministic so we can hardcode +# per-rowgroup stats_min/max below to verify the sort actually reordered rows on disk. +statement ok +SET preserve_insertion_order=true + +# All three clauses together: PARTITIONED BY + SORTED BY + WITH on plain CREATE TABLE. +# Verifies the parser accepts the combination, the WITH options are persisted, the partition layout +# matches PARTITIONED BY, the WITH compression takes effect, and SORTED BY actually reorders rows +# (per-rowgroup b stats are disjoint and ascending after sort). +statement ok +CREATE TABLE ducklake.t_combo(p INTEGER, b INTEGER) +PARTITIONED BY (p) +SORTED BY (b) +WITH (parquet_compression='gzip', parquet_row_group_size=2048) + +query III rowsort +SELECT option_name, value, scope FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_combo' ORDER BY option_name +---- +parquet_compression gzip TABLE +parquet_row_group_size 2048 TABLE + +statement ok +INSERT INTO ducklake.t_combo SELECT i // 3072 AS p, 6143 - i AS b FROM range(6144) t(i) + +# follow-up INSERT honors WITH compression +query I +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo/**') WHERE path_in_schema = 'b' +---- +GZIP + +# PARTITIONED BY (p) produces 2 hive directories +query I +SELECT COUNT(DISTINCT regexp_extract(file_name, 'p=(\d+)', 1)) FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo/**') +---- +2 + +# parquet_row_group_size=2048 from WITH produces 2 rowgroups per partition file (4 total) +query I +SELECT COUNT(*) FROM (SELECT DISTINCT file_name, row_group_id FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo/**')) +---- +4 + +# SORTED BY (b) reordered the rows: per-rowgroup b stats are disjoint and ascending within each +# partition file. (Input was b DESCENDING; without sort, rg 0's b would span the full input range.) +query IIII +SELECT regexp_extract(file_name, 'p=(\d+)', 1) AS partition, + row_group_id, stats_min::INTEGER AS rg_min, stats_max::INTEGER AS rg_max +FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo/**') +WHERE path_in_schema = 'b' +ORDER BY partition, row_group_id +---- +0 0 3072 5119 +0 1 5120 6143 +1 0 0 2047 +1 1 2048 3071 + +# All three clauses together on a CTAS — verifies plan-time PARTITIONED BY layout, SORTED BY data +# ordering, and WITH compression all coexist on a CREATE TABLE AS SELECT. +# Same data layout as t_combo (6144 rows, p=i//3072, b=6143-i) but compression=zstd this time. +statement ok +CREATE TABLE ducklake.t_combo_ctas +PARTITIONED BY (p) +SORTED BY (b) +WITH (parquet_compression='zstd', parquet_row_group_size=2048) +AS SELECT i // 3072 AS p, 6143 - i AS b FROM range(6144) t(i) + +query III rowsort +SELECT option_name, value, scope FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_combo_ctas' ORDER BY option_name +---- +parquet_compression zstd TABLE +parquet_row_group_size 2048 TABLE + +# CTAS-written parquet files use the WITH compression +query I +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo_ctas/**') WHERE path_in_schema = 'b' +---- +ZSTD + +# 2 hive partition directories +query I +SELECT COUNT(DISTINCT regexp_extract(file_name, 'p=(\d+)', 1)) FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo_ctas/**') +---- +2 + +# 2 rowgroups per partition file (4 total) +query I +SELECT COUNT(*) FROM (SELECT DISTINCT file_name, row_group_id FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo_ctas/**')) +---- +4 + +# SORTED BY (b) on the CTAS path: same per-rowgroup hardcoded stats as t_combo. +query IIII +SELECT regexp_extract(file_name, 'p=(\d+)', 1) AS partition, + row_group_id, stats_min::INTEGER AS rg_min, stats_max::INTEGER AS rg_max +FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo_ctas/**') +WHERE path_in_schema = 'b' +ORDER BY partition, row_group_id +---- +0 0 3072 5119 +0 1 5120 6143 +1 0 0 2047 +1 1 2048 3071 + diff --git a/test/sql/settings/create_table_with_options_rollback.test b/test/sql/settings/create_table_with_options_rollback.test new file mode 100644 index 00000000000..0df7ee6dbf6 --- /dev/null +++ b/test/sql/settings/create_table_with_options_rollback.test @@ -0,0 +1,59 @@ +# name: test/sql/settings/create_table_with_options_rollback.test +# description: A rolled-back CREATE TABLE WITH (...) must not leak across transactions. +# group: [settings] + +require ducklake + +require parquet + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_with_rollback.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_with_rollback_files'); + +statement ok +SET threads=1 + +statement ok +SET preserve_insertion_order=false + +# create a table with a WITH override, then ROLLBACK. The transaction-local table_id used in this +# transaction is reset for the next DuckLakeTransaction (each starts at TRANSACTION_LOCAL_ID_START), +# so a leaked override under that local id would silently contaminate any subsequent transaction +# whose first new table happens to land on the same local id. +statement ok +BEGIN + +statement ok +CREATE TABLE ducklake.t_rolled (i INTEGER) WITH (parquet_compression='gzip') + +statement ok +INSERT INTO ducklake.t_rolled SELECT i FROM range(100) t(i) + +statement ok +ROLLBACK + +# nothing about the rolled-back table should remain in metadata +query I +SELECT COUNT(*) FROM ducklake.options() WHERE scope='TABLE' +---- +0 + +# next transaction creates a plain table (no WITH) and writes to it BEFORE commit, so the +# INSERT planner looks up options under the transaction-local id. With the leak, that id +# resolves to the rolled-back gzip override; the parquet write would silently be gzip. +statement ok +BEGIN + +statement ok +CREATE TABLE ducklake.t_clean(i INTEGER) + +statement ok +INSERT INTO ducklake.t_clean SELECT i FROM range(2000) t(i) + +statement ok +COMMIT + +# default parquet compression in DuckDB is snappy +query I +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_with_rollback_files/main/t_clean/**') +---- +SNAPPY diff --git a/test/sql/settings/create_table_with_options_transaction.test b/test/sql/settings/create_table_with_options_transaction.test new file mode 100644 index 00000000000..9bd9fc24e98 --- /dev/null +++ b/test/sql/settings/create_table_with_options_transaction.test @@ -0,0 +1,94 @@ +# name: test/sql/settings/create_table_with_options_transaction.test +# description: WITH (...) options on CREATE / CTAS persist correctly across same-transaction ALTER and rename +# group: [settings] + +require ducklake + +require parquet + +statement ok +ATTACH 'ducklake:__TEST_DIR__/ducklake_create_with_options_txn.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_create_with_options_txn_files'); + +statement ok +SET threads=1 + +statement ok +SET preserve_insertion_order=false + +# CTAS + RENAME within the same transaction: options must follow the new name and the parquet +# files written during the CTAS must already honor the configured row group size. +statement ok +BEGIN + +statement ok +CREATE TABLE ducklake.t_rename WITH (parquet_compression='gzip', parquet_row_group_size=500) AS SELECT i FROM range(2500) t(i); + +statement ok +ALTER TABLE ducklake.t_rename RENAME TO t_renamed + +statement ok +COMMIT + +query III rowsort +SELECT option_name, value, scope FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_renamed' ORDER BY option_name +---- +parquet_compression gzip TABLE +parquet_row_group_size 500 TABLE + +# no leftover options under the original name +query I +SELECT COUNT(*) FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_rename' +---- +0 + +# data files were written with the configured compression (the CTAS happened before the rename) +query I +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_txn_files/main/**') +---- +GZIP + +# CREATE TABLE + insert + rename within the same transaction +statement ok +BEGIN + +statement ok +CREATE TABLE ducklake.t2(i INTEGER) WITH (parquet_compression='zstd'); + +statement ok +INSERT INTO ducklake.t2 SELECT range FROM range(1000); + +statement ok +ALTER TABLE ducklake.t2 RENAME TO t2_after + +statement ok +COMMIT + +query II +SELECT option_name, value FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t2_after' +---- +parquet_compression zstd + +query I +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_txn_files/main/t2*/**') +---- +ZSTD + +# CREATE TABLE WITH (...) + same-transaction INSERT must apply the override even though the table +# is still transaction-local (committed table_id has not been allocated yet). DuckLakeCopyInput pulls +# the override from DuckLakeTableEntry::options_in_create_with. +statement ok +BEGIN + +statement ok +CREATE TABLE ducklake.t_local(i INTEGER) WITH (parquet_compression='gzip') + +statement ok +INSERT INTO ducklake.t_local SELECT i FROM range(1000) t(i) + +statement ok +COMMIT + +query I +SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_txn_files/main/t_local/**') +---- +GZIP