From 3fa5ba250037245fa41a01f9f3e9ecda0999649f Mon Sep 17 00:00:00 2001
From: Alex-Monahan <alexm91@vt.edu>
Date: Thu, 30 Apr 2026 20:31:58 -0700
Subject: [PATCH 01/12] CREATE and CTAS SORTED BY and PARTITIONED BY

---
 .../ducklake_compaction_functions.cpp         |  15 +-
 .../ducklake_compaction_functions.hpp         |   4 +
 src/include/storage/ducklake_catalog.hpp      |   3 +
 src/include/storage/ducklake_insert.hpp       |  27 +-
 .../storage/ducklake_partition_data.hpp       |   9 +
 src/include/storage/ducklake_schema_entry.hpp |   8 +-
 src/include/storage/ducklake_table_entry.hpp  |  13 +-
 src/storage/ducklake_catalog.cpp              |  11 +
 src/storage/ducklake_insert.cpp               | 126 +++--
 src/storage/ducklake_schema_entry.cpp         |  35 +-
 src/storage/ducklake_table_entry.cpp          |  96 ++--
 src/storage/ducklake_transaction.cpp          |  12 +
 .../create_table_inline_basic.test            | 311 ++++++++++++
 .../create_table_inline_ctas.test             | 248 ++++++++++
 ...e_table_inline_ctas_sort_verification.test |  88 ++++
 .../create_table_inline_errors.test           |  96 ++++
 .../create_table_inline_transactions.test     | 453 ++++++++++++++++++
 .../create_table_inline_with_inlining.test    | 215 +++++++++
 18 files changed, 1695 insertions(+), 75 deletions(-)
 create mode 100644 test/sql/create_table_inline_partition_sort/create_table_inline_basic.test
 create mode 100644 test/sql/create_table_inline_partition_sort/create_table_inline_ctas.test
 create mode 100644 test/sql/create_table_inline_partition_sort/create_table_inline_ctas_sort_verification.test
 create mode 100644 test/sql/create_table_inline_partition_sort/create_table_inline_errors.test
 create mode 100644 test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test
 create mode 100644 test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test
diff --git a/src/functions/ducklake_compaction_functions.cpp b/src/functions/ducklake_compaction_functions.cpp
index 92ad52f05f4..dcd89b363be 100644
--- a/src/functions/ducklake_compaction_functions.cpp
+++ b/src/functions/ducklake_compaction_functions.cpp
@@ -47,15 +47,16 @@ vector<OrderByNode> DuckLakeCompactor::ParseSortOrders(const DuckLakeSort &sort_
 }
 
 //! Binds ORDER BY expressions directly using ExpressionBinder.
-vector<BoundOrderByNode> DuckLakeCompactor::BindSortOrders(Binder &binder, DuckLakeTableEntry &table, idx_t table_index,
+vector<BoundOrderByNode> DuckLakeCompactor::BindSortOrders(Binder &binder, const ColumnList &columns,
+                                                           const string &table_name, TableIndex table_index,
                                                            vector<OrderByNode> &pre_bound_orders) {
-	auto &columns = table.GetColumns();
 	auto column_names = columns.GetColumnNames();
 	auto column_types = columns.GetColumnTypes();
 
 	// Create a child binder with the table columns in scope
 	auto child_binder = Binder::CreateBinder(binder.context, &binder);
-	child_binder->bind_context.AddGenericBinding(table_index, table.name, column_names, column_types);
+	// AddGenericBinding takes the raw idx_t binding id, not DuckLake's TableIndex wrapper.
+	child_binder->bind_context.AddGenericBinding(table_index.index, table_name, column_names, column_types);
 
 	// Bind each ORDER BY expression directly
 	vector<BoundOrderByNode> orders;
@@ -68,6 +69,12 @@ vector<BoundOrderByNode> DuckLakeCompactor::BindSortOrders(Binder &binder, DuckL
 	return orders;
 }
 
+vector<BoundOrderByNode> DuckLakeCompactor::BindSortOrders(Binder &binder, DuckLakeTableEntry &table,
+                                                           idx_t table_index,
+                                                           vector<OrderByNode> &pre_bound_orders) {
+	return BindSortOrders(binder, table.GetColumns(), table.name, TableIndex(table_index), pre_bound_orders);
+}
+
 //===--------------------------------------------------------------------===//
 // Compaction Operator
 //===--------------------------------------------------------------------===//
@@ -384,7 +391,7 @@ unique_ptr<LogicalOperator> DuckLakeCompactor::InsertSort(Binder &binder, unique
 	}
 
 	// Validate all column references in sort expressions exist in the table
-	DuckLakeTableEntry::ValidateSortExpressionColumns(table, pre_bound_orders);
+	DuckLakeTableEntry::ValidateSortExpressionColumns(table.GetColumns(), pre_bound_orders);
 
 	// Resolve types for the input plan (could be LogicalGet or LogicalProjection)
 	plan->ResolveOperatorTypes();
diff --git a/src/include/functions/ducklake_compaction_functions.hpp b/src/include/functions/ducklake_compaction_functions.hpp
index ef69944ad52..94db42bc1f6 100644
--- a/src/include/functions/ducklake_compaction_functions.hpp
+++ b/src/include/functions/ducklake_compaction_functions.hpp
@@ -93,6 +93,10 @@ class DuckLakeCompactor {
 	static vector<OrderByNode> ParseSortOrders(const DuckLakeSort &sort_data);
 	static vector<BoundOrderByNode> BindSortOrders(Binder &binder, DuckLakeTableEntry &table, idx_t table_index,
 	                                               vector<OrderByNode> &pre_bound_orders);
+	//! Overload that takes (columns, table_name) directly so CTAS planning can sort-bind before any
+	//! DuckLakeTableEntry exists. Behaves identically to the entry-taking overload.
+	static vector<BoundOrderByNode> BindSortOrders(Binder &binder, const ColumnList &columns, const string &table_name,
+	                                               TableIndex table_index, vector<OrderByNode> &pre_bound_orders);
 
 private:
 	ClientContext &context;
diff --git a/src/include/storage/ducklake_catalog.hpp b/src/include/storage/ducklake_catalog.hpp
index f3d5cb014ab..a2b69e56b43 100644
--- a/src/include/storage/ducklake_catalog.hpp
+++ b/src/include/storage/ducklake_catalog.hpp
@@ -28,6 +28,7 @@ class DuckLakeFieldData;
 struct DuckLakeFileListEntry;
 struct DuckLakeConfigOption;
 struct DeleteFileMap;
+struct BoundCreateTableInfo;
 class LogicalGet;
 
 //! Cache entry for DuckLake table statistics
@@ -135,6 +136,8 @@ class DuckLakeCatalog : public Catalog {
 
 	optional_ptr<CatalogEntry> CreateSchema(CatalogTransaction transaction, CreateSchemaInfo &info) override;
 
+	ErrorData SupportsCreateTable(BoundCreateTableInfo &info) override;
+
 	void ScanSchemas(ClientContext &context, std::function<void(SchemaCatalogEntry &)> callback) override;
 
 	optional_ptr<SchemaCatalogEntry> LookupSchema(CatalogTransaction transaction, const EntryLookupInfo &schema_lookup,
diff --git a/src/include/storage/ducklake_insert.hpp b/src/include/storage/ducklake_insert.hpp
index c591763ca50..4bd5ad493d3 100644
--- a/src/include/storage/ducklake_insert.hpp
+++ b/src/include/storage/ducklake_insert.hpp
@@ -15,13 +15,14 @@
 #include "storage/ducklake_stats.hpp"
 #include "common/ducklake_data_file.hpp"
 #include "storage/ducklake_field_data.hpp"
+#include "storage/ducklake_partition_data.hpp"
+#include "storage/ducklake_sort_data.hpp"
 
 namespace duckdb {
 class DuckLakeCatalog;
 class DuckLakeSchemaEntry;
 class DuckLakeTableEntry;
 class DuckLakeFieldData;
-struct DuckLakePartition;
 struct DuckLakeCopyOptions;
 struct DuckLakeCopyInput;
 
@@ -41,15 +42,19 @@ class DuckLakeInsertGlobalState : public GlobalSinkState {
 
 class DuckLakeInsert : public PhysicalOperator {
 public:
-	//! INSERT INTO
+	//! INSERT INTO an existing table.
 	DuckLakeInsert(PhysicalPlan &physical_plan, const vector<LogicalType> &types, DuckLakeTableEntry &table,
 	               optional_idx partition_id, string encryption_key);
-	//! CREATE TABLE AS
+	//! CREATE TABLE AS - the table is created in GetGlobalSinkState. Any inline
+	//! PARTITIONED BY / SORTED BY clauses are pre-built into ctas_partition_data / ctas_sort_data at planning
+	//! time so the physical_copy upstream of this operator can hive-partition the write with a partition_id
+	//! that matches what gets attached to the table entry at sink-state-init.
 	DuckLakeInsert(PhysicalPlan &physical_plan, const vector<LogicalType> &types, SchemaCatalogEntry &schema,
 	               unique_ptr<BoundCreateTableInfo> info, string table_uuid, string table_data_path,
-	               string encryption_key);
+	               unique_ptr<DuckLakePartition> ctas_partition_data, unique_ptr<DuckLakeSort> ctas_sort_data,
+	               optional_idx partition_id, string encryption_key);
 
-	//! The table to insert into
+	//! The table to insert into (only set for INSERT INTO; nullptr for CTAS until GetGlobalSinkState resolves)
 	optional_ptr<DuckLakeTableEntry> table;
 	//! Table schema, in case of CREATE TABLE AS
 	optional_ptr<SchemaCatalogEntry> schema;
@@ -59,6 +64,11 @@ class DuckLakeInsert : public PhysicalOperator {
 	string table_uuid;
 	//! The table data path, in case of CREATE TABLE AS
 	string table_data_path;
+	//! Pre-built partition spec for CTAS (allocated at planning time so the physical write carries the
+	//! correct partition_id).
+	unique_ptr<DuckLakePartition> ctas_partition_data;
+	//! Pre-built sort spec for CTAS (same lifecycle as ctas_partition_data).
+	unique_ptr<DuckLakeSort> ctas_sort_data;
 	//! The partition id we are writing into (if any)
 	optional_idx partition_id;
 	//! The encryption key used for writing the Parquet files
@@ -140,8 +150,13 @@ struct DuckLakeCopyOptions {
 
 struct DuckLakeCopyInput {
 	explicit DuckLakeCopyInput(ClientContext &context, DuckLakeTableEntry &table, const string &hive_partition = "");
+	//! CTAS-flavored: take the field-id mapping and (optional) partition spec from the planning-time-built
+	//! spec rather than from a DuckLakeTableEntry that hasn't been created yet. field_data is required
+	//! because the parquet writer always needs it; partition_data is null when CREATE TABLE AS has no
+	//! inline PARTITIONED BY clause.
 	DuckLakeCopyInput(ClientContext &context, DuckLakeSchemaEntry &schema, const ColumnList &columns,
-	                  const string &data_path_p);
+	                  const string &data_path_p, DuckLakeFieldData &field_data,
+	                  optional_ptr<DuckLakePartition> partition_data = nullptr);
 
 	DuckLakeCatalog &catalog;
 	optional_ptr<DuckLakePartition> partition_data;
diff --git a/src/include/storage/ducklake_partition_data.hpp b/src/include/storage/ducklake_partition_data.hpp
index 7fa7c34f94b..f65300a56ff 100644
--- a/src/include/storage/ducklake_partition_data.hpp
+++ b/src/include/storage/ducklake_partition_data.hpp
@@ -20,12 +20,21 @@ enum class DuckLakeTransformType { IDENTITY, BUCKET, YEAR, MONTH, DAY, HOUR };
 struct DuckLakeTransform {
 	DuckLakeTransformType type;
 	idx_t bucket_count = 0; // only for BUCKET
+
+	bool operator==(const DuckLakeTransform &other) const {
+		return type == other.type && bucket_count == other.bucket_count;
+	}
 };
 
 struct DuckLakePartitionField {
 	idx_t partition_key_index = 0;
 	FieldIndex field_id;
 	DuckLakeTransform transform;
+
+	bool operator==(const DuckLakePartitionField &other) const {
+		return partition_key_index == other.partition_key_index && field_id == other.field_id &&
+		       transform == other.transform;
+	}
 };
 
 struct DuckLakePartition {
diff --git a/src/include/storage/ducklake_schema_entry.hpp b/src/include/storage/ducklake_schema_entry.hpp
index d50f8e637c9..3257444acd8 100644
--- a/src/include/storage/ducklake_schema_entry.hpp
+++ b/src/include/storage/ducklake_schema_entry.hpp
@@ -10,6 +10,8 @@
 
 #include "duckdb/catalog/catalog_entry/schema_catalog_entry.hpp"
 #include "storage/ducklake_catalog_set.hpp"
+#include "storage/ducklake_partition_data.hpp"
+#include "storage/ducklake_sort_data.hpp"
 
 namespace duckdb {
 class DuckLakeTransaction;
@@ -32,8 +34,12 @@ class DuckLakeSchemaEntry : public SchemaCatalogEntry {
 	}
 
 public:
+	//! Create a DuckLakeTableEntry and register it with the transaction.
+	//! When prebuilt_partition_data / prebuilt_sort_data are used by CTAS when partitioned or sorted
 	optional_ptr<CatalogEntry> CreateTableExtended(CatalogTransaction transaction, BoundCreateTableInfo &info,
-	                                               string table_uuid, string table_data_path);
+	                                               string table_uuid, string table_data_path,
+	                                               unique_ptr<DuckLakePartition> prebuilt_partition_data = nullptr,
+	                                               unique_ptr<DuckLakeSort> prebuilt_sort_data = nullptr);
 	unique_ptr<CreateInfo> GetInfo() const override;
 	optional_ptr<CatalogEntry> CreateTable(CatalogTransaction transaction, BoundCreateTableInfo &info) override;
 	optional_ptr<CatalogEntry> CreateFunction(CatalogTransaction transaction, CreateFunctionInfo &info) override;
diff --git a/src/include/storage/ducklake_table_entry.hpp b/src/include/storage/ducklake_table_entry.hpp
index 0f97e664a2f..8dc63131f21 100644
--- a/src/include/storage/ducklake_table_entry.hpp
+++ b/src/include/storage/ducklake_table_entry.hpp
@@ -122,8 +122,17 @@ class DuckLakeTableEntry : public TableCatalogEntry {
 	virtual_column_map_t GetVirtualColumns() const override;
 	vector<column_t> GetRowIdColumns() const override;
 
-	//! Validates that all column references in sort expressions exist in the table
-	static void ValidateSortExpressionColumns(DuckLakeTableEntry &table, const vector<OrderByNode> &orders);
+	//! Validates that all column references in sort expressions exist in the column list.
+	//! Takes (columns) directly so it can run at CTAS planning time before a DuckLakeTableEntry exists.
+	static void ValidateSortExpressionColumns(const ColumnList &columns, const vector<OrderByNode> &orders);
+
+	//! Build a DuckLakePartition from raw partition expressions (allocates a transaction-local id).
+	static unique_ptr<DuckLakePartition> BuildPartitionData(DuckLakeTransaction &transaction, const ColumnList &columns,
+	                                                        DuckLakeFieldData &field_data,
+	                                                        const vector<unique_ptr<ParsedExpression>> &partition_keys);
+	//! Build a DuckLakeSort from a vector of OrderByNode (allocates a transaction-local id).
+	static unique_ptr<DuckLakeSort> BuildSortData(DuckLakeTransaction &transaction, const ColumnList &columns,
+	                                              const vector<OrderByNode> &orders);
 
 private:
 	unique_ptr<CatalogEntry> AlterTable(DuckLakeTransaction &transaction, RenameTableInfo &info);
diff --git a/src/storage/ducklake_catalog.cpp b/src/storage/ducklake_catalog.cpp
index c2c7e1f8a5a..ec77b4bfed4 100644
--- a/src/storage/ducklake_catalog.cpp
+++ b/src/storage/ducklake_catalog.cpp
@@ -10,6 +10,7 @@
 #include "duckdb/parser/parsed_data/create_table_info.hpp"
 #include "duckdb/parser/parsed_data/create_view_info.hpp"
 #include "duckdb/parser/parsed_data/drop_info.hpp"
+#include "duckdb/planner/parsed_data/bound_create_table_info.hpp"
 #include "duckdb/storage/database_size.hpp"
 #include "storage/ducklake_initializer.hpp"
 #include "storage/ducklake_schema_entry.hpp"
@@ -155,6 +156,16 @@ optional_ptr<CatalogEntry> DuckLakeCatalog::CreateSchema(CatalogTransaction tran
 	return result;
 }
 
+ErrorData DuckLakeCatalog::SupportsCreateTable(BoundCreateTableInfo &info) {
+	auto &base = info.Base().Cast<CreateTableInfo>();
+	if (!base.options.empty()) {
+		return ErrorData(
+		    ExceptionType::CATALOG,
+		    StringUtil::Format("WITH clause is not supported for tables in a %s catalog", GetCatalogType()));
+	}
+	return ErrorData();
+}
+
 void DuckLakeCatalog::DropSchema(ClientContext &context, DropInfo &info) {
 	auto schema = GetSchema(GetCatalogTransaction(context), info.name, info.if_not_found);
 	if (!schema) {
diff --git a/src/storage/ducklake_insert.cpp b/src/storage/ducklake_insert.cpp
index ba3cc8bcf84..81f4375d70d 100644
--- a/src/storage/ducklake_insert.cpp
+++ b/src/storage/ducklake_insert.cpp
@@ -42,10 +42,13 @@ DuckLakeInsert::DuckLakeInsert(PhysicalPlan &physical_plan, const vector<Logical
 
 DuckLakeInsert::DuckLakeInsert(PhysicalPlan &physical_plan, const vector<LogicalType> &types,
                                SchemaCatalogEntry &schema, unique_ptr<BoundCreateTableInfo> info, string table_uuid_p,
-                               string table_data_path_p, string encryption_key_p)
+                               string table_data_path_p, unique_ptr<DuckLakePartition> ctas_partition_data_p,
+                               unique_ptr<DuckLakeSort> ctas_sort_data_p, optional_idx partition_id,
+                               string encryption_key_p)
     : PhysicalOperator(physical_plan, PhysicalOperatorType::EXTENSION, types, 1), table(nullptr), schema(&schema),
       info(std::move(info)), table_uuid(std::move(table_uuid_p)), table_data_path(std::move(table_data_path_p)),
-      encryption_key(std::move(encryption_key_p)) {
+      ctas_partition_data(std::move(ctas_partition_data_p)), ctas_sort_data(std::move(ctas_sort_data_p)),
+      partition_id(partition_id), encryption_key(std::move(encryption_key_p)) {
 }
 
 //===--------------------------------------------------------------------===//
@@ -58,12 +61,18 @@ DuckLakeInsertGlobalState::DuckLakeInsertGlobalState(DuckLakeTableEntry &table)
 unique_ptr<GlobalSinkState> DuckLakeInsert::GetGlobalSinkState(ClientContext &context) const {
 	optional_ptr<DuckLakeTableEntry> table_ptr;
 	if (info) {
-		// CREATE TABLE AS - create the table
+		// CREATE TABLE AS - materialize the table entry now, attaching a *clone* of the partition / sort spec
+		// we built at planning time so the on-disk partition_id and the in-memory partition_data agree.
+		// We clone (rather than std::move) so the operator's prebuilt spec stays intact across executions.
+		auto partition_clone = ctas_partition_data ? make_uniq<DuckLakePartition>(*ctas_partition_data) : nullptr;
+		auto sort_clone = ctas_sort_data ? make_uniq<DuckLakeSort>(*ctas_sort_data) : nullptr;
+
 		auto &catalog = schema->catalog;
 		auto &ducklake_schema = schema.get_mutable()->Cast<DuckLakeSchemaEntry>();
 		auto transaction = catalog.GetCatalogTransaction(context);
-		table_ptr = &ducklake_schema.CreateTableExtended(transaction, *info, table_uuid, table_data_path)
-		                 ->Cast<DuckLakeTableEntry>();
+		auto created = ducklake_schema.CreateTableExtended(transaction, *info, table_uuid, table_data_path,
+		                                                   std::move(partition_clone), std::move(sort_clone));
+		table_ptr = &created->Cast<DuckLakeTableEntry>();
 	} else {
 		// INSERT INTO
 		table_ptr = table;
@@ -344,8 +353,11 @@ DuckLakeCopyInput::DuckLakeCopyInput(ClientContext &context, DuckLakeTableEntry
 }
 
 DuckLakeCopyInput::DuckLakeCopyInput(ClientContext &context, DuckLakeSchemaEntry &schema, const ColumnList &columns,
-                                     const string &data_path_p)
+                                     const string &data_path_p, DuckLakeFieldData &field_data_p,
+                                     optional_ptr<DuckLakePartition> partition_data_p)
     : catalog(schema.ParentCatalog().Cast<DuckLakeCatalog>()), columns(columns), data_path(data_path_p) {
+	field_data = &field_data_p;
+	partition_data = partition_data_p;
 	schema_id = schema.GetSchemaId();
 	encryption_key = catalog.GenerateEncryptionKey(context);
 }
@@ -735,40 +747,43 @@ static void ResolveColumnRefs(unique_ptr<Expression> &expr) {
 	ExpressionIterator::EnumerateChildren(*expr, [](unique_ptr<Expression> &child) { ResolveColumnRefs(child); });
 }
 
-static optional_ptr<PhysicalOperator> PlanInsertSort(ClientContext &context, PhysicalPlanGenerator &planner,
-                                                     PhysicalOperator &plan, DuckLakeTableEntry &table,
-                                                     optional_ptr<DuckLakeSort> sort_data) {
-	// Parse the sort expressions from the sort_data
+static optional_ptr<PhysicalOperator> PlanInsertSortFromColumns(ClientContext &context, PhysicalPlanGenerator &planner,
+                                                                PhysicalOperator &plan, const ColumnList &columns,
+                                                                const string &table_name,
+                                                                optional_ptr<DuckLakeSort> sort_data) {
+	// Shared between the INSERT path (called with an existing table's columns/name) and the CTAS path
+	// (called with the planning-time CreateTableInfo's columns and the to-be-created table's name).
 	auto pre_bound_orders = DuckLakeCompactor::ParseSortOrders(*sort_data);
 	if (pre_bound_orders.empty()) {
 		return nullptr;
 	}
+	DuckLakeTableEntry::ValidateSortExpressionColumns(columns, pre_bound_orders);
 
-	// Validate all column references in sort expressions exist in the table
-	DuckLakeTableEntry::ValidateSortExpressionColumns(table, pre_bound_orders);
-
-	// Bind the ORDER BY expressions
 	auto binder = Binder::CreateBinder(context);
-	idx_t table_index = 0;
-	auto orders = DuckLakeCompactor::BindSortOrders(*binder, table, table_index, pre_bound_orders);
+	TableIndex table_index(0);
+	auto orders = DuckLakeCompactor::BindSortOrders(*binder, columns, table_name, table_index, pre_bound_orders);
 
 	// Convert BoundColumnRefExpression to BoundReferenceExpression for physical plan
 	for (auto &order : orders) {
 		ResolveColumnRefs(order.expression);
 	}
 
-	// Create identity projection map
 	vector<idx_t> projection_map;
 	for (idx_t i = 0; i < plan.types.size(); i++) {
 		projection_map.push_back(i);
 	}
-
 	auto &order_op = planner.Make<PhysicalOrder>(plan.types, std::move(orders), std::move(projection_map),
 	                                             plan.estimated_cardinality);
 	order_op.children.push_back(plan);
 	return &order_op;
 }
 
+static optional_ptr<PhysicalOperator> PlanInsertSort(ClientContext &context, PhysicalPlanGenerator &planner,
+                                                     PhysicalOperator &plan, DuckLakeTableEntry &table,
+                                                     optional_ptr<DuckLakeSort> sort_data) {
+	return PlanInsertSortFromColumns(context, planner, plan, table.GetColumns(), table.name, sort_data);
+}
+
 PhysicalOperator &DuckLakeCatalog::PlanInsert(ClientContext &context, PhysicalPlanGenerator &planner, LogicalInsert &op,
                                               optional_ptr<PhysicalOperator> plan) {
 	if (op.return_chunk) {
@@ -782,10 +797,10 @@ PhysicalOperator &DuckLakeCatalog::PlanInsert(ClientContext &context, PhysicalPl
 	}
 	auto &ducklake_table = op.table.Cast<DuckLakeTableEntry>();
 
-	// Sort data according to the table's SET SORTED BY configuration
+	// Sort wrap (when SET SORTED BY is configured and sort_on_insert is enabled).
 	auto sort_data = ducklake_table.GetSortData();
-	auto &ducklake_schema_for_sort = ducklake_table.ParentSchema().Cast<DuckLakeSchemaEntry>();
-	bool sort_on_insert = GetConfigOption<string>("sort_on_insert", ducklake_schema_for_sort.GetSchemaId(),
+	auto &ducklake_schema = ducklake_table.ParentSchema().Cast<DuckLakeSchemaEntry>();
+	bool sort_on_insert = GetConfigOption<string>("sort_on_insert", ducklake_schema.GetSchemaId(),
 	                                              ducklake_table.GetTableId(), "true") == "true";
 	if (sort_data && sort_on_insert) {
 		auto sorted_plan = PlanInsertSort(context, planner, *plan, ducklake_table, sort_data);
@@ -794,17 +809,13 @@ PhysicalOperator &DuckLakeCatalog::PlanInsert(ClientContext &context, PhysicalPl
 		}
 	}
 
+	// Data-inlining wrap. When sort_on_insert=false and inlining is enabled, sort AFTER the inline operator
+	// so overflow rows reach Parquet sorted; inlined rows skip the sort (they live in metadata, not Parquet).
 	optional_ptr<DuckLakeInlineData> inline_data;
-
 	idx_t data_inlining_row_limit = GetInliningLimit(context, ducklake_table);
 	if (data_inlining_row_limit > 0) {
 		plan = planner.Make<DuckLakeInlineData>(*plan, data_inlining_row_limit);
 		inline_data = plan->Cast<DuckLakeInlineData>();
-
-		// When sort_on_insert=false but inlining is enabled, add sorting AFTER
-		// the inline data operator. Data that exceeds the inlining limit passes
-		// through to parquet files and must be sorted. Data that is inlined
-		// (absorbed by DuckLakeInlineData) never reaches this sort operator.
 		if (sort_data && !sort_on_insert) {
 			auto sorted_plan = PlanInsertSort(context, planner, *plan, ducklake_table, sort_data);
 			if (sorted_plan) {
@@ -812,6 +823,7 @@ PhysicalOperator &DuckLakeCatalog::PlanInsert(ClientContext &context, PhysicalPl
 			}
 		}
 	}
+
 	DuckLakeCopyInput copy_input(context, ducklake_table);
 	auto &physical_copy = DuckLakeInsert::PlanCopyForInsert(context, planner, copy_input, plan);
 	auto &insert = DuckLakeInsert::PlanInsert(context, planner, ducklake_table, std::move(copy_input.encryption_key));
@@ -828,8 +840,53 @@ PhysicalOperator &DuckLakeCatalog::PlanCreateTableAs(ClientContext &context, Phy
 	auto &columns = create_info.columns;
 	auto &duck_transaction = DuckLakeTransaction::Get(context, *this);
 	auto &duck_schema = op.schema.Cast<DuckLakeSchemaEntry>();
+
 	// FIXME: if table already exists and we are doing CREATE IF NOT EXISTS - skip
+
+	// CTAS planning time: the table entry does NOT exist yet (it will be materialized at sink-state-init by
+	// DuckLakeInsert::GetGlobalSinkState). However the physical write we're
+	// about to build needs the partition spec + field-id mapping right now so the parquet files land in the
+	// right hive directories with the right partition_id stamped on them.
+	//
+	// The split: we build field_data + partition_data + sort_data SYNTHETICALLY here from the raw inline
+	// clauses on `op.info`. We hand the partition_data / sort_data to the DuckLakeInsert operator.
+
+	idx_t column_id = 1;
+	auto field_data = DuckLakeFieldData::FromColumns(columns, column_id);
+	unique_ptr<DuckLakePartition> partition_data;
+	if (!create_info.partition_keys.empty()) {
+		partition_data =
+		    DuckLakeTableEntry::BuildPartitionData(duck_transaction, columns, *field_data, create_info.partition_keys);
+	}
+
+	unique_ptr<DuckLakeSort> sort_data;
+	if (!create_info.sort_keys.empty()) {
+		// FIXME: TODO: The syntax needs to be enabled in DuckDB upstream
+		// CREATE TABLE AS ... SORTED BY (e) accepts only raw expressions; wrap each in an OrderByNode using the
+		// same defaults the DuckDB transformer produces for a bare expression list (ASCENDING + ORDER_DEFAULT).
+		// Matches the ALTER TABLE ... SET SORTED BY (e) path.
+		vector<OrderByNode> orders;
+		orders.reserve(create_info.sort_keys.size());
+		for (auto &expr : create_info.sort_keys) {
+			orders.emplace_back(OrderType::ASCENDING, OrderByNullType::ORDER_DEFAULT, expr->Copy());
+		}
+		sort_data = DuckLakeTableEntry::BuildSortData(duck_transaction, columns, orders);
+	}
+
 	reference<PhysicalOperator> root = plan;
+
+	// Sort wrap (mirrors PlanInsert's sort_on_insert wrap). For CTAS we don't have a table_id yet so the
+	// table-level sort_on_insert override doesn't apply; we honor the schema-level setting (defaults to true).
+	bool sort_on_insert =
+	    GetConfigOption<string>("sort_on_insert", duck_schema.GetSchemaId(), TableIndex(), "true") == "true";
+	if (sort_data && sort_on_insert) {
+		auto sorted_plan =
+		    PlanInsertSortFromColumns(context, planner, root.get(), columns, create_info.table, sort_data.get());
+		if (sorted_plan) {
+			root = *sorted_plan;
+		}
+	}
+
 	optional_ptr<DuckLakeInlineData> inline_data;
 	idx_t data_inlining_row_limit = DataInliningRowLimit(context, duck_schema.GetSchemaId(), TableIndex());
 	auto &metadata_manager = duck_transaction.GetMetadataManager();
@@ -837,17 +894,24 @@ PhysicalOperator &DuckLakeCatalog::PlanCreateTableAs(ClientContext &context, Phy
 		root = planner.Make<DuckLakeInlineData>(root.get(), data_inlining_row_limit);
 		inline_data = root.get().Cast<DuckLakeInlineData>();
 	}
-	for (auto &col : op.info->Base().columns.Logical()) {
+	for (auto &col : columns.Logical()) {
 		DuckLakeTypes::CheckSupportedType(col.Type());
 	}
 	auto table_uuid = duck_transaction.GenerateUUID();
 	auto table_data_path =
 	    duck_schema.DataPath() + DuckLakeCatalog::GeneratePathFromName(table_uuid, create_info.table);
 
-	DuckLakeCopyInput copy_input(context, duck_schema, columns, table_data_path);
+	DuckLakeCopyInput copy_input(context, duck_schema, columns, table_data_path, *field_data, partition_data.get());
 	auto &physical_copy = DuckLakeInsert::PlanCopyForInsert(context, planner, copy_input, root.get());
-	auto &insert = planner.Make<DuckLakeInsert>(op.types, op.schema, std::move(op.info), std::move(table_uuid),
-	                                            std::move(table_data_path), std::move(copy_input.encryption_key));
+
+	optional_idx insert_partition_id;
+	if (partition_data) {
+		insert_partition_id = partition_data->partition_id;
+	}
+
+	auto &insert = planner.Make<DuckLakeInsert>(
+	    op.types, op.schema, std::move(op.info), std::move(table_uuid), std::move(table_data_path),
+	    std::move(partition_data), std::move(sort_data), insert_partition_id, std::move(copy_input.encryption_key));
 	if (inline_data) {
 		inline_data->insert = insert.Cast<DuckLakeInsert>();
 	}
diff --git a/src/storage/ducklake_schema_entry.cpp b/src/storage/ducklake_schema_entry.cpp
index 10840d41b9f..b7b03fc962b 100644
--- a/src/storage/ducklake_schema_entry.cpp
+++ b/src/storage/ducklake_schema_entry.cpp
@@ -4,6 +4,7 @@
 #include "duckdb/parser/parsed_data/comment_on_column_info.hpp"
 #include "duckdb/parser/parsed_data/create_view_info.hpp"
 #include "duckdb/parser/parsed_data/drop_info.hpp"
+#include "duckdb/parser/result_modifier.hpp"
 #include "duckdb/planner/parsed_data/bound_create_table_info.hpp"
 #include "storage/ducklake_catalog.hpp"
 #include "storage/ducklake_table_entry.hpp"
@@ -61,9 +62,10 @@ bool DuckLakeSchemaEntry::HandleCreateConflict(CatalogTransaction transaction, C
 	return true;
 }
 
-optional_ptr<CatalogEntry> DuckLakeSchemaEntry::CreateTableExtended(CatalogTransaction transaction,
-                                                                    BoundCreateTableInfo &info, string table_uuid,
-                                                                    string table_data_path) {
+optional_ptr<CatalogEntry>
+DuckLakeSchemaEntry::CreateTableExtended(CatalogTransaction transaction, BoundCreateTableInfo &info, string table_uuid,
+                                         string table_data_path, unique_ptr<DuckLakePartition> prebuilt_partition_data,
+                                         unique_ptr<DuckLakeSort> prebuilt_sort_data) {
 	auto &duck_transaction = transaction.transaction->Cast<DuckLakeTransaction>();
 	auto &base_info = info.Base();
 	// check if we have an existing entry with this name
@@ -85,6 +87,33 @@ optional_ptr<CatalogEntry> DuckLakeSchemaEntry::CreateTableExtended(CatalogTrans
 	auto table_entry = make_uniq<DuckLakeTableEntry>(ParentCatalog(), *this, base_info, table_id, std::move(table_uuid),
 	                                                 std::move(table_data_path), std::move(field_data), column_id,
 	                                                 std::move(inlined_tables), LocalChangeType::CREATED);
+	// Two routes for partition / sort data:
+	//   (A) CTAS: partition_data and sort_data were pre-built at planning time so the planner-built physical
+	//       write could embed the right partition_id into the data files. Attach them as-is - rebuilding here
+	//       would allocate new ids that wouldn't match the ids baked into the on-disk files.
+	//   (B) Plain CREATE TABLE (non-CTAS): rebuild from the inline clauses on info.Base().partition_keys /
+	//       sort_keys. There's no planning-time write to coordinate with, so the id allocated here is the only id.
+	if (prebuilt_partition_data) {
+		table_entry->SetPartitionData(std::move(prebuilt_partition_data));
+	} else if (!base_info.partition_keys.empty()) {
+		table_entry->SetPartitionData(DuckLakeTableEntry::BuildPartitionData(
+		    duck_transaction, table_entry->GetColumns(), table_entry->GetFieldData(), base_info.partition_keys));
+	}
+	if (prebuilt_sort_data) {
+		table_entry->SetSortData(std::move(prebuilt_sort_data));
+	} else if (!base_info.sort_keys.empty()) {
+		// TODO: FIXME: Needs a fix in upstream DuckDB then a fix here
+		// CREATE TABLE ... SORTED BY (e) accepts only raw expressions (no ASC/DESC/NULLS modifiers).
+		// Wrap each in an OrderByNode using the same defaults the DuckDB transformer produces for a bare
+		// expression list (ASCENDING + ORDER_DEFAULT) - matches the ALTER TABLE ... SET SORTED BY (e) path.
+		vector<OrderByNode> orders;
+		orders.reserve(base_info.sort_keys.size());
+		for (auto &expr : base_info.sort_keys) {
+			orders.emplace_back(OrderType::ASCENDING, OrderByNullType::ORDER_DEFAULT, expr->Copy());
+		}
+		table_entry->SetSortData(
+		    DuckLakeTableEntry::BuildSortData(duck_transaction, table_entry->GetColumns(), orders));
+	}
 	auto result = table_entry.get();
 	duck_transaction.CreateEntry(std::move(table_entry));
 	return result;
diff --git a/src/storage/ducklake_table_entry.cpp b/src/storage/ducklake_table_entry.cpp
index 0903f800de4..628b23a1b56 100644
--- a/src/storage/ducklake_table_entry.cpp
+++ b/src/storage/ducklake_table_entry.cpp
@@ -422,7 +422,7 @@ string GetPartitionColumnName(ColumnRefExpression &colref) {
 	return colref.GetColumnName();
 }
 
-void DuckLakeTableEntry::ValidateSortExpressionColumns(DuckLakeTableEntry &table, const vector<OrderByNode> &orders) {
+void DuckLakeTableEntry::ValidateSortExpressionColumns(const ColumnList &columns, const vector<OrderByNode> &orders) {
 	vector<string> missing_columns;
 	for (auto &order : orders) {
 		ParsedExpressionIterator::VisitExpression<ColumnRefExpression>(
@@ -432,7 +432,7 @@ void DuckLakeTableEntry::ValidateSortExpressionColumns(DuckLakeTableEntry &table
 				        "Unexpected qualified column reference - only unqualified columns are supported");
 			    }
 			    string column_name = colref.GetColumnName();
-			    if (!table.ColumnExists(column_name)) {
+			    if (!columns.ColumnExists(column_name)) {
 				    if (std::find(missing_columns.begin(), missing_columns.end(), column_name) ==
 				        missing_columns.end()) {
 					    missing_columns.push_back(column_name);
@@ -453,7 +453,8 @@ void DuckLakeTableEntry::ValidateSortExpressionColumns(DuckLakeTableEntry &table
 	}
 }
 
-DuckLakePartitionField GetPartitionField(DuckLakeTableEntry &table, ParsedExpression &expr) {
+DuckLakePartitionField GetPartitionField(const ColumnList &columns, DuckLakeFieldData &field_data,
+                                         ParsedExpression &expr) {
 	string column_name;
 	DuckLakePartitionField field;
 
@@ -521,28 +522,68 @@ DuckLakePartitionField GetPartitionField(DuckLakeTableEntry &table, ParsedExpres
 		    "Unsupported partition key %s - only identity columns and year/month/day/hour/bucket are supported",
 		    expr.ToString());
 	}
-	if (!table.ColumnExists(column_name)) {
+	if (!columns.ColumnExists(column_name)) {
 		throw CatalogException("Unexpected partition key - column \"%s\" does not exist", column_name);
 	}
-	auto &col = table.GetColumn(column_name);
+	auto &col = columns.GetColumn(column_name);
 	PhysicalIndex column_index(col.StorageOid());
-	auto &field_id = table.GetFieldData().GetByRootIndex(column_index);
+	auto &field_id = field_data.GetByRootIndex(column_index);
 	field.field_id = field_id.GetFieldIndex();
 	return field;
 }
 
-unique_ptr<CatalogEntry> DuckLakeTableEntry::AlterTable(DuckLakeTransaction &transaction, SetPartitionedByInfo &info) {
-	auto create_info = GetInfo();
-	auto &table_info = create_info->Cast<CreateTableInfo>();
-	// create a complete copy of this table with the partition info added
+static bool PartitionFieldsMatch(optional_ptr<DuckLakePartition> current_partition,
+                                 const DuckLakePartition &requested_partition) {
+	if (!current_partition) {
+		return requested_partition.fields.empty();
+	}
+	if (current_partition->fields.size() != requested_partition.fields.size()) {
+		return false;
+	}
+	for (idx_t i = 0; i < current_partition->fields.size(); i++) {
+		if (!(current_partition->fields[i] == requested_partition.fields[i])) {
+			return false;
+		}
+	}
+	return true;
+}
+
+unique_ptr<DuckLakePartition>
+DuckLakeTableEntry::BuildPartitionData(DuckLakeTransaction &transaction, const ColumnList &columns,
+                                       DuckLakeFieldData &field_data,
+                                       const vector<unique_ptr<ParsedExpression>> &partition_keys) {
+	// Always returns a non-null DuckLakePartition. Empty partition_keys yields a partition with no fields,
+	// which is the shape RESET PARTITIONED BY (and SET PARTITIONED BY () with empty list) needs to drop the
+	// existing partition spec at commit time. CTAS callers that mean "no PARTITIONED BY clause at all" should
+	// short-circuit and not call this helper.
 	auto partition_data = make_uniq<DuckLakePartition>();
 	partition_data->partition_id = transaction.GetLocalCatalogId();
-	for (idx_t expr_idx = 0; expr_idx < info.partition_keys.size(); expr_idx++) {
-		auto &expr = *info.partition_keys[expr_idx];
-		auto partition_field = GetPartitionField(*this, expr);
+	for (idx_t expr_idx = 0; expr_idx < partition_keys.size(); expr_idx++) {
+		auto &expr = *partition_keys[expr_idx];
+		auto partition_field = GetPartitionField(columns, field_data, expr);
+		// Reject duplicate partition keys: two expressions that resolve to the same (field_id, transform).
+		// `(year(ts), month(ts))` is fine - same column, different transforms - but `(a, a)` is not.
+		for (auto &existing : partition_data->fields) {
+			// partition_key_index is intentionally excluded here: it gets assigned below, after the
+			// duplicate check. Two fields are duplicates if they resolve to the same (field_id, transform).
+			if (existing.field_id == partition_field.field_id && existing.transform == partition_field.transform) {
+				throw BinderException("Duplicate partition key: expression \"%s\" matches an earlier partition key",
+				                      expr.ToString());
+			}
+		}
 		partition_field.partition_key_index = expr_idx;
 		partition_data->fields.push_back(partition_field);
 	}
+	return partition_data;
+}
+
+unique_ptr<CatalogEntry> DuckLakeTableEntry::AlterTable(DuckLakeTransaction &transaction, SetPartitionedByInfo &info) {
+	auto create_info = GetInfo();
+	auto &table_info = create_info->Cast<CreateTableInfo>();
+	auto partition_data = BuildPartitionData(transaction, GetColumns(), GetFieldData(), info.partition_keys);
+	if (PartitionFieldsMatch(GetPartitionData(), *partition_data)) {
+		return nullptr;
+	}
 
 	auto new_entry = make_uniq<DuckLakeTableEntry>(*this, table_info, std::move(partition_data));
 	return std::move(new_entry);
@@ -1239,24 +1280,16 @@ unique_ptr<CatalogEntry> DuckLakeTableEntry::AlterTable(DuckLakeTransaction &tra
 	return std::move(new_entry);
 }
 
-unique_ptr<CatalogEntry> DuckLakeTableEntry::AlterTable(DuckLakeTransaction &transaction, SetSortedByInfo &info) {
-	auto create_info = GetInfo();
-	auto &table_info = create_info->Cast<CreateTableInfo>();
-
-	if (info.orders.empty()) {
-		// RESET SORTED BY - clear sort data
-		auto new_entry = make_uniq<DuckLakeTableEntry>(*this, table_info, unique_ptr<DuckLakeSort>());
-		return std::move(new_entry);
+unique_ptr<DuckLakeSort> DuckLakeTableEntry::BuildSortData(DuckLakeTransaction &transaction, const ColumnList &columns,
+                                                           const vector<OrderByNode> &orders) {
+	if (orders.empty()) {
+		return nullptr;
 	}
-
-	// Validate all column references in all sort expressions
-	ValidateSortExpressionColumns(*this, info.orders);
-
+	ValidateSortExpressionColumns(columns, orders);
 	auto sort_data = make_uniq<DuckLakeSort>();
 	sort_data->sort_id = transaction.GetLocalCatalogId();
-	for (idx_t order_node_idx = 0; order_node_idx < info.orders.size(); order_node_idx++) {
-		auto &order_node = info.orders[order_node_idx];
-
+	for (idx_t order_node_idx = 0; order_node_idx < orders.size(); order_node_idx++) {
+		auto &order_node = orders[order_node_idx];
 		DuckLakeSortField sort_field;
 		sort_field.sort_key_index = order_node_idx;
 		sort_field.expression = order_node.expression->ToString();
@@ -1265,7 +1298,14 @@ unique_ptr<CatalogEntry> DuckLakeTableEntry::AlterTable(DuckLakeTransaction &tra
 		sort_field.null_order = order_node.null_order;
 		sort_data->fields.push_back(sort_field);
 	}
+	return sort_data;
+}
+
+unique_ptr<CatalogEntry> DuckLakeTableEntry::AlterTable(DuckLakeTransaction &transaction, SetSortedByInfo &info) {
+	auto create_info = GetInfo();
+	auto &table_info = create_info->Cast<CreateTableInfo>();
 
+	auto sort_data = BuildSortData(transaction, GetColumns(), info.orders);
 	auto new_entry = make_uniq<DuckLakeTableEntry>(*this, table_info, std::move(sort_data));
 	return std::move(new_entry);
 }
diff --git a/src/storage/ducklake_transaction.cpp b/src/storage/ducklake_transaction.cpp
index 728d1a8c0a7..6bc5e6c2575 100644
--- a/src/storage/ducklake_transaction.cpp
+++ b/src/storage/ducklake_transaction.cpp
@@ -1751,6 +1751,18 @@ void DuckLakeTransaction::GetNewTableInfo(DuckLakeCommitState &commit_state, Duc
 			inlined_entry.uuid = latest_table.GetTableUUID();
 			inlined_entry.columns = latest_table.GetTableColumns();
 			result.new_inlined_data_tables.push_back(std::move(inlined_entry));
+
+			// CREATE TABLE ... PARTITIONED BY / SORTED BY
+			if (local_change.type == LocalChangeType::CREATED) {
+				if (table.GetPartitionData()) {
+					auto partition_key = GetNewPartitionKey(commit_state, table);
+					result.new_partition_keys.push_back(std::move(partition_key));
+				}
+				if (table.GetSortData()) {
+					auto sort_key = GetNewSortKey(commit_state, table);
+					result.new_sort_keys.push_back(std::move(sort_key));
+				}
+			}
 			break;
 		}
 		default:
diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_basic.test b/test/sql/create_table_inline_partition_sort/create_table_inline_basic.test
new file mode 100644
index 00000000000..38b97dd29f4
--- /dev/null
+++ b/test/sql/create_table_inline_partition_sort/create_table_inline_basic.test
@@ -0,0 +1,311 @@
+# name: test/sql/create_table_inline_partition_sort/create_table_inline_basic.test
+# description: CREATE TABLE with inline PARTITIONED BY / SORTED BY clauses
+# group: [create_table_inline_partition_sort]
+
+require ducklake
+
+require parquet
+
+test-env DUCKLAKE_CONNECTION {TEST_DIR}/{UUID}.db
+
+test-env DATA_PATH {TEST_DIR}
+
+statement ok
+ATTACH 'ducklake:{DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '{DATA_PATH}/inline_basic', METADATA_CATALOG 'ducklake_metadata', DATA_INLINING_ROW_LIMIT 0)
+
+statement ok
+USE ducklake
+
+# Test 1: CREATE TABLE with inline PARTITIONED BY only (single column)
+statement ok
+CREATE TABLE t_part (i INTEGER, v VARCHAR) PARTITIONED BY (i)
+
+statement ok
+INSERT INTO t_part VALUES (1, 'a'), (2, 'b'), (1, 'c')
+
+query II
+SELECT i, v FROM t_part ORDER BY i, v
+----
+1	a
+1	c
+2	b
+
+# verify partition metadata was persisted at CREATE time (no ALTER ran)
+query I
+SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_part')
+----
+1
+
+query I
+SELECT count(*) FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_part')
+----
+1
+
+# verify the on-disk Parquet files landed in hive-style partition-specific directories
+# (DATA_INLINING_ROW_LIMIT 0 forces INSERT to write Parquet directly; one file per partition value)
+query I
+SELECT count(*) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_part')
+----
+2
+
+query I
+SELECT DISTINCT regexp_extract(path, '.*(i=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_part') ORDER BY 1
+----
+i=1
+i=2
+
+# Test 2: CREATE TABLE with inline SORTED BY only (single column)
+statement ok
+CREATE TABLE t_sort (i INTEGER, v VARCHAR) SORTED BY (i)
+
+statement ok
+INSERT INTO t_sort VALUES (3, 'x'), (1, 'y'), (2, 'z')
+
+# t_sort has a single file (DATA_INLINING_ROW_LIMIT 0, no partitioning, one INSERT batch).
+# SORTED BY (i) means rows are i-ascending in the file - SELECT without ORDER BY returns the
+# file's row order directly, which IS the sort verification.
+query II
+SELECT i, v FROM t_sort
+----
+1	y
+2	z
+3	x
+
+query I
+SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_sort')
+----
+1
+
+query II
+SELECT sort_key_index, expression FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_sort') ORDER BY sort_key_index
+----
+0	i
+
+# verify the inline path persisted the same defaults a bare ALTER TABLE ... SET SORTED BY (col) would
+# (the metadata writer collapses OrderType::ASCENDING -> "ASC" and OrderByNullType::ORDER_DEFAULT -> "NULLS_LAST")
+query III
+SELECT sort_direction, null_order, dialect FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_sort')
+----
+ASC	NULLS_LAST	duckdb
+
+# Test 3: CREATE TABLE with both clauses, PARTITIONED first
+statement ok
+CREATE TABLE t_both1 (a INTEGER, b INTEGER, v VARCHAR) PARTITIONED BY (a) SORTED BY (b)
+
+statement ok
+INSERT INTO t_both1 VALUES (1, 30, 'p'), (2, 10, 'q'), (1, 20, 'r')
+
+# t_both1 is PARTITIONED BY (a), so each value of a lives in its own file. SORTED BY (b) means
+# within each partition's file rows are b-ascending. Filter to one partition at a time and verify
+# row order without ORDER BY - that order IS the on-disk row order of the partition's file.
+query III
+SELECT a, b, v FROM t_both1 WHERE a = 1
+----
+1	20	r
+1	30	p
+
+# on-disk: one Parquet file per distinct partition value (a=1 and a=2), each in its own a=N directory
+query I
+SELECT DISTINCT regexp_extract(path, '.*(a=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_both1') ORDER BY 1
+----
+a=1
+a=2
+
+# Test 4: CREATE TABLE with both clauses, SORTED first (verify either-order grammar)
+statement ok
+CREATE TABLE t_both2 (a INTEGER, b INTEGER) SORTED BY (b) PARTITIONED BY (a)
+
+query I
+SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_both2')
+----
+1
+
+query I
+SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_both2')
+----
+1
+
+# Test 5: multi-column partition + multi-column sort
+statement ok
+CREATE TABLE t_multi (a INTEGER, b INTEGER, c INTEGER) PARTITIONED BY (a, b) SORTED BY (c, a)
+
+query I
+SELECT count(*) FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_multi')
+----
+2
+
+query I
+SELECT count(*) FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_multi')
+----
+2
+
+statement ok
+INSERT INTO t_multi VALUES (1, 1, 30), (1, 1, 10), (1, 2, 20), (2, 1, 5)
+
+# t_multi is PARTITIONED BY (a, b), so each (a, b) combination lives in its own file. SORTED BY
+# (c, a) means rows within a partition's file are (c, a)-ascending. 
+query III
+SELECT a, b, c FROM t_multi WHERE a = 1 AND b = 1
+----
+1	1	10
+1	1	30
+
+# multi-column hive-style nesting: a=N/b=M/
+query II
+SELECT regexp_extract(path, '.*(a=[0-9]+)[/\\].*', 1) AS a_dir, regexp_extract(path, '.*(b=[0-9]+)[/\\].*', 1) AS b_dir
+FROM ducklake_metadata.ducklake_data_file
+WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_multi')
+ORDER BY a_dir, b_dir
+----
+a=1	b=1
+a=1	b=2
+a=2	b=1
+
+# the a directory must come *before* the b directory in the path (PARTITIONED BY (a, b) ordering)
+query I
+SELECT count(*) FROM ducklake_metadata.ducklake_data_file
+WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_multi')
+  AND regexp_matches(path, 'a=[0-9]+/b=[0-9]+')
+----
+3
+
+# Test 6: PARTITIONED BY with a transform function
+statement ok
+CREATE TABLE t_transform (ts TIMESTAMP, v INTEGER) PARTITIONED BY (year(ts))
+
+query II
+SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_transform')
+----
+0	year
+
+statement ok
+INSERT INTO t_transform VALUES (TIMESTAMP '2023-06-01', 1), (TIMESTAMP '2023-09-15', 2), (TIMESTAMP '2024-03-10', 3)
+
+# year() transform produces hive directories named with the partition key NAME (year, not ts)
+query I
+SELECT DISTINCT regexp_extract(path, '.*(year=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_transform') ORDER BY 1
+----
+year=2023
+year=2024
+
+# Test 7: PARTITIONED BY only writes partition metadata, NOT sort metadata (asymmetric)
+statement ok
+CREATE TABLE t_only_part (a INTEGER) PARTITIONED BY (a)
+
+query I
+SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_part')
+----
+1
+
+query I
+SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_part')
+----
+0
+
+statement ok
+INSERT INTO t_only_part VALUES (10), (20), (10)
+
+query I
+SELECT DISTINCT regexp_extract(path, '.*(a=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_part') ORDER BY 1
+----
+a=10
+a=20
+
+# Test 8: SORTED BY only writes sort metadata, NOT partition metadata (asymmetric)
+statement ok
+CREATE TABLE t_only_sort (a INTEGER) SORTED BY (a)
+
+query I
+SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_sort')
+----
+0
+
+query I
+SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_sort')
+----
+1
+
+# Test 9: PARTITIONED BY with bucket(N, col) transform happy path
+statement ok
+CREATE TABLE t_bucket (id BIGINT, v VARCHAR) PARTITIONED BY (bucket(8, id))
+
+query II
+SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_bucket')
+----
+0	bucket(8)
+
+statement ok
+INSERT INTO t_bucket VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e'), (6, 'f'), (7, 'g'), (8, 'h')
+
+# bucket() transform writes into hive directories named "bucket=N" (modulo bucket count)
+# we don't fix specific bucket assignments (hash-dependent) but we assert at least 1 and at most 8 distinct buckets
+query I
+SELECT count(DISTINCT regexp_extract(path, '.*(bucket=[0-9]+)[/\\].*', 1)) BETWEEN 1 AND 8 FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_bucket')
+----
+true
+
+# every file must live in a bucket=N directory
+query I
+SELECT count(*) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_bucket') AND NOT regexp_matches(path, 'bucket=[0-9]+')
+----
+0
+
+# Test 10: same-transaction CREATE-inline + ALTER-set must NOT emit duplicate partition rows.
+# The reverse-chain commit walk has to dedupe so only the latest (ALTER) wins.
+statement ok
+BEGIN
+
+statement ok
+CREATE TABLE t_create_then_alter (a INTEGER, b INTEGER) PARTITIONED BY (a) SORTED BY (a)
+
+statement ok
+ALTER TABLE t_create_then_alter SET PARTITIONED BY (b)
+
+statement ok
+ALTER TABLE t_create_then_alter SET SORTED BY (b DESC)
+
+statement ok
+COMMIT
+
+# exactly one partition_info row (the ALTER's), not two
+query I
+SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter')
+----
+1
+
+# the surviving partition row references column b (the ALTER's choice), not a (the inline create table's).
+query III
+SELECT pc.partition_key_index, c.column_name, pc.transform
+FROM ducklake_metadata.ducklake_partition_column pc
+JOIN ducklake_metadata.ducklake_column c
+  ON pc.table_id = c.table_id AND pc.column_id = c.column_id AND c.end_snapshot IS NULL
+WHERE pc.table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter')
+----
+0	b	identity
+
+# exactly one sort_info row, and its expression is b DESC (the ALTER's), not a
+query I
+SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter')
+----
+1
+
+query III
+SELECT expression, sort_direction, null_order FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter')
+----
+b	DESC	NULLS_LAST
+
+# INSERT after the chain: data must land in b=N directories (ALTER won), NOT a=N directories
+statement ok
+INSERT INTO t_create_then_alter VALUES (1, 100), (2, 100), (3, 200)
+
+query I
+SELECT DISTINCT regexp_extract(path, '.*(b=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter') ORDER BY 1
+----
+b=100
+b=200
+
+# none of the files should be under an a=N directory (regression guard for the dedup logic)
+query I
+SELECT count(*) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter') AND regexp_matches(path, 'a=[0-9]+/')
+----
+0
diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_ctas.test b/test/sql/create_table_inline_partition_sort/create_table_inline_ctas.test
new file mode 100644
index 00000000000..d93155f7202
--- /dev/null
+++ b/test/sql/create_table_inline_partition_sort/create_table_inline_ctas.test
@@ -0,0 +1,248 @@
+# name: test/sql/create_table_inline_partition_sort/create_table_inline_ctas.test
+# description: CREATE TABLE AS SELECT with inline PARTITIONED BY / SORTED BY clauses
+# group: [create_table_inline_partition_sort]
+
+require ducklake
+
+require parquet
+
+test-env DUCKLAKE_CONNECTION {TEST_DIR}/{UUID}.db
+
+test-env DATA_PATH {TEST_DIR}
+
+statement ok
+ATTACH 'ducklake:{DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '{DATA_PATH}/inline_ctas', METADATA_CATALOG 'ducklake_metadata', DATA_INLINING_ROW_LIMIT 0)
+
+statement ok
+USE ducklake
+
+# Test 1: CTAS with PARTITIONED BY only
+statement ok
+CREATE TABLE ctas_part PARTITIONED BY (i) AS SELECT range AS i, range::VARCHAR AS v FROM range(5)
+
+query II
+SELECT i, v FROM ctas_part ORDER BY i
+----
+0	0
+1	1
+2	2
+3	3
+4	4
+
+query I
+SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_part')
+----
+1
+
+# CTAS must hive-partition on the initial write: 5 distinct values from range(5) -> 5 hive directories
+query I
+SELECT DISTINCT regexp_extract(path, '.*(i=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_part') ORDER BY 1
+----
+i=0
+i=1
+i=2
+i=3
+i=4
+
+# Test 2: CTAS with SORTED BY only.
+statement ok
+CREATE TABLE ctas_sort SORTED BY (j) AS SELECT range AS i, 9 - range AS j FROM range(10)
+
+# ctas_sort is non-partitioned; CTAS produces a single file. SELECT without ORDER BY returns the
+# file's row order directly. SORTED BY (j) means j-ascending; i is forced descending by construction.
+query II
+SELECT i, j FROM ctas_sort
+----
+9	0
+8	1
+7	2
+6	3
+5	4
+4	5
+3	6
+2	7
+1	8
+0	9
+
+query I
+SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_sort')
+----
+1
+
+# Test 3: CTAS with both clauses, PARTITIONED first. 30 rows partitioned into 3 groups of 10;
+# sort key v = (29 - i) is the inverse of the SELECT's natural order so the sort has real work.
+statement ok
+CREATE TABLE ctas_both PARTITIONED BY (g) SORTED BY (v) AS
+SELECT range % 3 AS g, 29 - range AS v FROM range(30)
+
+query II
+SELECT
+  (SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_both')),
+  (SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_both'))
+----
+1	1
+
+# 3 distinct partition values -> 3 hive directories
+query I
+SELECT count(DISTINCT regexp_extract(path, '.*(g=[0-9]+)[/\\].*', 1)) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_both')
+----
+3
+
+# verify sort actually happened: each partition's file holds rows in v-ascending order
+query II
+SELECT g, v FROM ctas_both WHERE g = 0
+----
+0	2
+0	5
+0	8
+0	11
+0	14
+0	17
+0	20
+0	23
+0	26
+0	29
+
+query II
+SELECT g, v FROM ctas_both WHERE g = 1
+----
+1	1
+1	4
+1	7
+1	10
+1	13
+1	16
+1	19
+1	22
+1	25
+1	28
+
+query II
+SELECT g, v FROM ctas_both WHERE g = 2
+----
+2	0
+2	3
+2	6
+2	9
+2	12
+2	15
+2	18
+2	21
+2	24
+2	27
+
+# Test 4: CTAS with SORTED first then PARTITIONED (either-order grammar). Same shape as Test 3 but
+statement ok
+CREATE TABLE ctas_swap SORTED BY (v) PARTITIONED BY (g) AS
+SELECT range % 3 AS g, 29 - range AS v FROM range(30)
+
+query II
+SELECT
+  (SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_swap')),
+  (SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_swap'))
+----
+1	1
+
+query I
+SELECT count(DISTINCT regexp_extract(path, '.*(g=[0-9]+)[/\\].*', 1)) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_swap')
+----
+3
+
+# partition g=1's file
+query II
+SELECT g, v FROM ctas_swap WHERE g = 1
+----
+1	1
+1	4
+1	7
+1	10
+1	13
+1	16
+1	19
+1	22
+1	25
+1	28
+
+# Test 5: CTAS partitioning by a transform expression
+statement ok
+CREATE TABLE ctas_transform PARTITIONED BY (year(ts)) AS
+SELECT TIMESTAMP '2024-01-01' + INTERVAL (i) YEAR AS ts, i AS v FROM range(3) t(i)
+
+query II
+SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_transform')
+----
+0	year
+
+# CTAS with year() transform must hive-partition on the initial write
+query I
+SELECT DISTINCT regexp_extract(path, '.*(year=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_transform') ORDER BY 1
+----
+year=2024
+year=2025
+year=2026
+
+# Test 6: CTAS with multi-column SORTED BY using string and integer columns
+statement ok
+CREATE TABLE ctas_multi SORTED BY (a, b) AS
+SELECT i AS a, ('tag_' || i) AS b FROM range(4) t(i)
+
+# ctas_multi is non-partitioned; CTAS produces a single file. SORTED BY (a, b) means rows are
+# (a, b)-ascending in that file - SELECT without ORDER BY returns the file's actual row order.
+query II
+SELECT a, b FROM ctas_multi
+----
+0	tag_0
+1	tag_1
+2	tag_2
+3	tag_3
+
+query II
+SELECT sort_key_index, expression FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_multi') ORDER BY sort_key_index
+----
+0	a
+1	b
+
+# Test 7: CTAS with a CTE source (regression: planning shape differs from a flat SELECT)
+statement ok
+CREATE TABLE ctas_cte PARTITIONED BY (g) SORTED BY (v) AS
+WITH src AS (SELECT i % 3 AS g, i AS v FROM range(9) t(i))
+SELECT g, v FROM src
+
+query II
+SELECT g, count(*) FROM ctas_cte GROUP BY g ORDER BY g
+----
+0	3
+1	3
+2	3
+
+# ctas_cte is PARTITIONED BY (g), so each value of g lives in its own file. SORTED BY (v) means
+# rows within each partition's file are v-ascending. Filter to one partition; the row order
+# returned IS the on-disk file order.
+query II
+SELECT g, v FROM ctas_cte WHERE g = 0
+----
+0	0
+0	3
+0	6
+
+query II
+SELECT g, v FROM ctas_cte WHERE g = 1
+----
+1	1
+1	4
+1	7
+
+query II
+SELECT g, v FROM ctas_cte WHERE g = 2
+----
+2	2
+2	5
+2	8
+
+# CTE-sourced CTAS must still partition on the initial write (regression: planner passes the spec through)
+query I
+SELECT DISTINCT regexp_extract(path, '.*(g=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_cte') ORDER BY 1
+----
+g=0
+g=1
+g=2
diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_ctas_sort_verification.test b/test/sql/create_table_inline_partition_sort/create_table_inline_ctas_sort_verification.test
new file mode 100644
index 00000000000..8aaad79b5b0
--- /dev/null
+++ b/test/sql/create_table_inline_partition_sort/create_table_inline_ctas_sort_verification.test
@@ -0,0 +1,88 @@
+# name: test/sql/create_table_inline_partition_sort/create_table_inline_ctas_sort_verification.test
+# description: Verify SORTED BY actually sorts data during CTAS by inspecting parquet rowgroup statistics.
+# group: [create_table_inline_partition_sort]
+
+#              Uses 3000+ rows across multiple 2048-row rowgroups; sort column is the INVERSE of the SELECT's
+#              natural order so any "sort skipped" regression would leave rowgroups in descending order.
+
+require ducklake
+
+require parquet
+
+test-env DUCKLAKE_CONNECTION {TEST_DIR}/{UUID}.db
+
+test-env DATA_PATH {TEST_DIR}
+
+statement ok
+ATTACH 'ducklake:{DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '{DATA_PATH}/inline_ctas_sort_verify', METADATA_CATALOG 'ducklake_metadata', DATA_INLINING_ROW_LIMIT 0)
+
+# 2048 is the minimum parquet rowgroup size in DuckDB; pick it so 3000 rows produce >=2 rowgroups.
+statement ok
+CALL ducklake.set_option('parquet_row_group_size', 2048)
+
+# Single-threaded + preserved insertion order keeps rowgroup boundaries deterministic, so we can assert
+# specific stats_min values per rowgroup rather than an order-tolerant "monotonic" check.
+statement ok
+SET threads=1
+
+statement ok
+SET preserve_insertion_order=true
+
+statement ok
+USE ducklake
+
+# CTAS feeding from range(3000): natural SELECT order is `a` ascending. Sort key `b` is `2999 - a` so its
+# natural in-stream order is strictly descending. If SORTED BY (b) is honored at CTAS time, the data is
+# resorted to b ascending and the per-rowgroup stats_min/max for `b` will be disjoint and increasing.
+statement ok
+CREATE TABLE sort_verify SORTED BY (b) AS
+SELECT range AS a, (2999 - range) AS b FROM range(3000)
+
+# logical correctness: data round-trips
+query I
+SELECT count(*) FROM sort_verify
+----
+3000
+
+# Confirm we got at least 2 rowgroups (otherwise the per-rowgroup-stats check is meaningless).
+query I
+SELECT count(DISTINCT row_group_id) >= 2
+FROM parquet_metadata('{DATA_PATH}/inline_ctas_sort_verify/main/sort_verify/*.parquet')
+WHERE path_in_schema = 'b'
+----
+true
+
+# ----------------------------------------------------------------------------------------------------------------------
+# Hardcoded per-rowgroup min/max sanity check on `sort_verify`.
+# Layout: 3000 rows, parquet_row_group_size=2048, single thread, preserve_insertion_order=true. After SORT BY b ASC:
+# ----------------------------------------------------------------------------------------------------------------------
+
+# column `b`
+query III
+SELECT row_group_id, stats_min::INTEGER AS rg_min, stats_max::INTEGER AS rg_max
+FROM parquet_metadata('{DATA_PATH}/inline_ctas_sort_verify/main/sort_verify/*.parquet')
+WHERE path_in_schema = 'b'
+ORDER BY row_group_id
+----
+0	0	2047
+1	2048	2999
+
+# column `a`
+query III
+SELECT row_group_id, stats_min::INTEGER AS rg_min, stats_max::INTEGER AS rg_max
+FROM parquet_metadata('{DATA_PATH}/inline_ctas_sort_verify/main/sort_verify/*.parquet')
+WHERE path_in_schema = 'a'
+ORDER BY row_group_id
+----
+0	952	2999
+1	0	951
+
+# row counts per rowgroup confirm the 2048 + 952 split
+query II
+SELECT row_group_id, row_group_num_rows
+FROM parquet_metadata('{DATA_PATH}/inline_ctas_sort_verify/main/sort_verify/*.parquet')
+WHERE path_in_schema = 'b'
+ORDER BY row_group_id
+----
+0	2048
+1	952
diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_errors.test b/test/sql/create_table_inline_partition_sort/create_table_inline_errors.test
new file mode 100644
index 00000000000..85343564a51
--- /dev/null
+++ b/test/sql/create_table_inline_partition_sort/create_table_inline_errors.test
@@ -0,0 +1,96 @@
+# name: test/sql/create_table_inline_partition_sort/create_table_inline_errors.test
+# description: validation errors for inline PARTITIONED BY / SORTED BY on CREATE TABLE / CTAS
+# group: [create_table_inline_partition_sort]
+
+require ducklake
+
+require parquet
+
+test-env DUCKLAKE_CONNECTION {TEST_DIR}/{UUID}.db
+
+test-env DATA_PATH {TEST_DIR}
+
+statement ok
+ATTACH 'ducklake:{DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '{DATA_PATH}/inline_errors', METADATA_CATALOG 'ducklake_metadata', DATA_INLINING_ROW_LIMIT 0)
+
+statement ok
+USE ducklake
+
+# PARTITIONED BY references nonexistent column
+statement error
+CREATE TABLE bad_part (a INTEGER) PARTITIONED BY (nonexistent)
+----
+column "nonexistent" does not exist
+
+# SORTED BY references nonexistent column
+statement error
+CREATE TABLE bad_sort (a INTEGER) SORTED BY (nonexistent)
+----
+<REGEX>:.*nonexistent.*
+
+# WITH (...) clause is rejected by SupportsCreateTable override (DuckLake has no WITH semantics)
+statement error
+CREATE TABLE bad_with (a INTEGER) WITH (some_option='x')
+----
+WITH clause is not supported for tables in a ducklake catalog
+
+# unsupported transform function
+statement error
+CREATE TABLE bad_transform (a INTEGER) PARTITIONED BY (foo(a))
+----
+<REGEX>:.*Unsupported partition function.*
+
+# malformed bucket() expression - missing args
+statement error
+CREATE TABLE bad_bucket (a INTEGER) PARTITIONED BY (bucket(a))
+----
+Expected bucket(bucket_count, column)
+
+# duplicate partition key (same column with same transform) is rejected
+statement error
+CREATE TABLE bad_dup (a INTEGER) PARTITIONED BY (a, a)
+----
+Duplicate partition key
+
+# CTAS variants: SORTED BY references nonexistent column
+statement error
+CREATE TABLE bad_ctas SORTED BY (nonexistent) AS SELECT 1 AS a
+----
+<REGEX>:.*nonexistent.*
+
+# CTAS variants: PARTITIONED BY references nonexistent column
+statement error
+CREATE TABLE bad_ctas_part PARTITIONED BY (nonexistent) AS SELECT 1 AS a
+----
+<REGEX>:.*nonexistent.*
+
+# verify the failing CREATEs left no metadata behind
+query I
+SELECT count(*) FROM ducklake_metadata.ducklake_partition_info
+----
+0
+
+query I
+SELECT count(*) FROM ducklake_metadata.ducklake_sort_info
+----
+0
+
+# A successful CREATE after failures still works (no transaction state corruption)
+statement ok
+CREATE TABLE ok_after_errors (a INTEGER) PARTITIONED BY (a)
+
+query I
+SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ok_after_errors')
+----
+1
+
+# Same column with DIFFERENT transforms is allowed (e.g. year + month of the same timestamp).
+# This must NOT be flagged as a duplicate partition key.
+statement ok
+CREATE TABLE ok_diff_transforms (ts TIMESTAMP) PARTITIONED BY (year(ts), month(ts))
+
+query II
+SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ok_diff_transforms') ORDER BY partition_key_index
+----
+0	year
+1	month
diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test b/test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test
new file mode 100644
index 00000000000..d04656cfc00
--- /dev/null
+++ b/test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test
@@ -0,0 +1,453 @@
+# name: test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test
+# description: Transaction semantics for inline PARTITIONED BY / SORTED BY on CREATE TABLE / CTAS:
+#              rollback discards everything (entry, files, transaction-local ids); concurrent CTAS with
+#              distinct names cannot conflict; concurrent CTAS with the same name produces a commit-time
+#              conflict; 
+# group: [create_table_inline_partition_sort]
+
+require ducklake
+
+require parquet
+
+test-env DUCKLAKE_CONNECTION {TEST_DIR}/{UUID}.db
+
+test-env DATA_PATH {TEST_DIR}
+
+statement ok
+ATTACH 'ducklake:{DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '{DATA_PATH}/inline_tx', METADATA_CATALOG 'ducklake_meta', DATA_INLINING_ROW_LIMIT 0)
+
+statement ok
+SET immediate_transaction_mode=true
+
+statement ok
+USE ducklake
+
+# ----------------------------------------------------------------------------------------------------------------------
+# Test 1: ROLLBACK after a successful inline-partitioned CREATE+INSERT.
+# Rollback must discard the table entry, the planning-time-allocated partition_id/sort_id, and the data files.
+# ----------------------------------------------------------------------------------------------------------------------
+
+statement ok
+BEGIN
+
+statement ok
+CREATE TABLE rollback_part (i INTEGER, v VARCHAR) PARTITIONED BY (i)
+
+statement ok
+INSERT INTO rollback_part VALUES (1, 'a'), (2, 'b')
+
+# table is visible inside the transaction
+query I
+SELECT count(*) FROM rollback_part
+----
+2
+
+statement ok
+ROLLBACK
+
+# table is gone after rollback
+statement error
+SELECT * FROM rollback_part
+----
+<REGEX>:.*not exist.*
+
+# the next inline CTAS proceeds cleanly - no leftover transaction-local catalog state poisoning subsequent
+# planning (regression guard for the planning-time partition_id allocator).
+statement ok
+CREATE TABLE post_rollback PARTITIONED BY (a) AS SELECT 1 AS a
+
+query I
+SELECT * FROM post_rollback
+----
+1
+
+# ----------------------------------------------------------------------------------------------------------------------
+# Test 2: ROLLBACK after CTAS with both inline clauses. The most invasive shape - exercises the full
+# planning-time field_data + partition_data + sort_data construction, the sink-time CreateTableExtended
+# branch, the data-file write, and the rollback cleanup.
+# ----------------------------------------------------------------------------------------------------------------------
+
+statement ok
+BEGIN
+
+statement ok
+CREATE TABLE rollback_ctas PARTITIONED BY (g) SORTED BY (v) AS
+SELECT i % 3 AS g, i AS v FROM range(9) t(i)
+
+query I
+SELECT count(*) FROM rollback_ctas
+----
+9
+
+statement ok
+ROLLBACK
+
+statement error
+SELECT * FROM rollback_ctas
+----
+<REGEX>:.*not exist.*
+
+# verify the metadata catalog never saw the rolled-back partition_info / sort_info rows
+query I
+SELECT count(*) FROM ducklake_meta.ducklake_partition_info
+WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'rollback_ctas')
+----
+0
+
+query I
+SELECT count(*) FROM ducklake_meta.ducklake_sort_info
+WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'rollback_ctas')
+----
+0
+
+# ----------------------------------------------------------------------------------------------------------------------
+# Test 3: ROLLBACK + same-name CREATE in a fresh transaction succeeds. The rolled-back table_id /
+# partition_id were transaction-local and should not leak into the next transaction's namespace.
+# ----------------------------------------------------------------------------------------------------------------------
+
+statement ok
+BEGIN
+
+statement ok
+CREATE TABLE name_reuse PARTITIONED BY (a) AS SELECT 1 AS a, 'x' AS v
+
+statement ok
+ROLLBACK
+
+# Same name, same partition column - works because the rollback wiped the prior attempt entirely.
+statement ok
+CREATE TABLE name_reuse PARTITIONED BY (a) AS SELECT 99 AS a, 'y' AS v
+
+query II
+SELECT a, v FROM name_reuse
+----
+99	y
+
+# ----------------------------------------------------------------------------------------------------------------------
+# Test 4: Two concurrent transactions both CREATE TABLE foo PARTITIONED BY ... AS SELECT with the SAME
+# name. First to commit wins; second errors at COMMIT time. This is a name-collision conflict - the new
+# inline clauses don't introduce a different conflict mode.
+# ----------------------------------------------------------------------------------------------------------------------
+
+statement ok con1
+BEGIN
+
+statement ok con2
+BEGIN
+
+statement ok con1
+CREATE TABLE ducklake.conflict_same_name PARTITIONED BY (i) AS SELECT 1 AS i
+
+statement ok con2
+CREATE TABLE ducklake.conflict_same_name SORTED BY (i) AS SELECT 2 AS i
+
+statement ok con1
+COMMIT
+
+statement error con2
+COMMIT
+----
+
+# con1's table is the surviving one - check its inline partition spec landed
+query I
+SELECT count(*) FROM ducklake_meta.ducklake_partition_info
+WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'conflict_same_name')
+----
+1
+
+query I
+SELECT * FROM ducklake.conflict_same_name
+----
+1
+
+# ----------------------------------------------------------------------------------------------------------------------
+# Test 5: Two concurrent CTAS-with-inline-spec with DIFFERENT names. No conflict - each transaction
+# allocates partition_id from its own counter; commit-time remap assigns globally-unique committed ids.
+# This proves the planning-time partition_id allocation isn't a serialization point.
+# ----------------------------------------------------------------------------------------------------------------------
+
+statement ok con1
+BEGIN
+
+statement ok con2
+BEGIN
+
+statement ok con1
+CREATE TABLE ducklake.concurrent_a PARTITIONED BY (i) AS SELECT range AS i, 'a' AS src FROM range(5)
+
+statement ok con2
+CREATE TABLE ducklake.concurrent_b PARTITIONED BY (i) SORTED BY (i) AS SELECT range AS i, 'b' AS src FROM range(7)
+
+statement ok con1
+COMMIT
+
+statement ok con2
+COMMIT
+
+# both tables exist and have their own partition_info rows
+query II
+SELECT
+  (SELECT count(*) FROM ducklake_meta.ducklake_partition_info
+   WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'concurrent_a')),
+  (SELECT count(*) FROM ducklake_meta.ducklake_partition_info
+   WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'concurrent_b'))
+----
+1	1
+
+# the two tables have DISTINCT committed partition_ids (proves the commit-time remap deduplicates the
+# transaction-local ids that started identical at planning time on each connection).
+query I
+SELECT count(DISTINCT partition_id) FROM ducklake_meta.ducklake_partition_info
+WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name IN ('concurrent_a', 'concurrent_b'))
+----
+2
+
+query II
+SELECT count(*), max(i) FROM ducklake.concurrent_a
+----
+5	4
+
+query II
+SELECT count(*), max(i) FROM ducklake.concurrent_b
+----
+7	6
+
+# ----------------------------------------------------------------------------------------------------------------------
+# Test 6: Same-transaction inline CTAS + ALTER SET PARTITIONED BY + COMMIT. Last writer wins (the
+# metadata-layer dedup collapses by table_id; final on-disk state references the ALTER's partition column).
+# This is regression coverage for the chain-walk behavior change in this branch.
+# ----------------------------------------------------------------------------------------------------------------------
+
+statement ok
+BEGIN
+
+statement ok
+CREATE TABLE chain_winner PARTITIONED BY (a) SORTED BY (a) AS
+SELECT range AS a, range + 100 AS b FROM range(3)
+
+statement ok
+ALTER TABLE chain_winner SET PARTITIONED BY (b)
+
+statement ok
+ALTER TABLE chain_winner SET SORTED BY (b DESC)
+
+statement ok
+COMMIT
+
+# exactly one partition_info row (the last ALTER's) and one sort_info row (the last ALTER's)
+query I
+SELECT count(*) FROM ducklake_meta.ducklake_partition_info
+WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'chain_winner')
+----
+1
+
+query I
+SELECT count(*) FROM ducklake_meta.ducklake_sort_info
+WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'chain_winner')
+----
+1
+
+# the surviving sort row is `b DESC`, not the inline `a ASC`
+query III
+SELECT expression, sort_direction, null_order FROM ducklake_meta.ducklake_sort_expression
+WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'chain_winner')
+----
+b	DESC	NULLS_LAST
+
+# ----------------------------------------------------------------------------------------------------------------------
+# Test 7: CTAS that errors mid-execution because of a runtime constraint violation. The transaction must
+# unwind cleanly - no zombie entry, no partition_info row
+# ----------------------------------------------------------------------------------------------------------------------
+
+statement error
+CREATE TABLE error_ctas PARTITIONED BY (i) AS
+SELECT CAST(range AS UTINYINT) AS i FROM range(300)
+----
+<REGEX>:.*[Cc]onversion.*out of range.*
+
+# the failed CTAS must not have partially registered the table
+statement error
+SELECT * FROM error_ctas
+----
+<REGEX>:.*not exist.*
+
+query I
+SELECT count(*) FROM ducklake_meta.ducklake_partition_info
+WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'error_ctas')
+----
+0
+
+# ----------------------------------------------------------------------------------------------------------------------
+# Test 8: CTAS that errors at the BINDER stage (unknown column in PARTITIONED BY). This fails in
+# PlanCreateTableAs *before* any planning-time partition_id allocation completes, so transaction state
+# must be uncorrupted.
+# ----------------------------------------------------------------------------------------------------------------------
+
+statement ok
+BEGIN
+
+statement error
+CREATE TABLE binder_error PARTITIONED BY (nonexistent) AS SELECT 1 AS i
+----
+<REGEX>:.*nonexistent.*
+
+# subsequent statements in the same transaction still work
+statement ok
+CREATE TABLE recovers_after_binder_error (i INTEGER) PARTITIONED BY (i)
+
+statement ok
+INSERT INTO recovers_after_binder_error VALUES (42)
+
+statement ok
+COMMIT
+
+query I
+SELECT i FROM recovers_after_binder_error
+----
+42
+
+# ----------------------------------------------------------------------------------------------------------------------
+# Test 9: Long transaction that mixes inline CTAS + plain INSERT into other tables. Verifies the inline
+# clauses don't disturb the transaction's broader commit logic.
+# ----------------------------------------------------------------------------------------------------------------------
+
+statement ok
+CREATE TABLE existing_target (k INTEGER, v VARCHAR)
+
+statement ok
+BEGIN
+
+statement ok
+INSERT INTO existing_target VALUES (1, 'first')
+
+statement ok
+CREATE TABLE long_tx_ctas PARTITIONED BY (g) AS SELECT i AS g, i + 100 AS v FROM range(4) t(i)
+
+statement ok
+INSERT INTO existing_target VALUES (2, 'second')
+
+statement ok
+INSERT INTO long_tx_ctas VALUES (10, 110), (11, 111)
+
+statement ok
+COMMIT
+
+query II
+SELECT k, v FROM existing_target ORDER BY k
+----
+1	first
+2	second
+
+query II
+SELECT g, count(*) FROM long_tx_ctas GROUP BY g ORDER BY g
+----
+0	1
+1	1
+2	1
+3	1
+10	1
+11	1
+
+# ----------------------------------------------------------------------------------------------------------------------
+# Test 10: Two inline CTAS in one transaction, ROLLBACK. Both tables AND both partition specs unwind.
+# Guards against any per-CTAS state that might accumulate beyond the transaction-local catalog set.
+# ----------------------------------------------------------------------------------------------------------------------
+
+statement ok
+BEGIN
+
+statement ok
+CREATE TABLE rb_a PARTITIONED BY (i) AS SELECT range AS i FROM range(3)
+
+statement ok
+CREATE TABLE rb_b PARTITIONED BY (j) SORTED BY (j) AS SELECT range AS j FROM range(4)
+
+statement ok
+ROLLBACK
+
+statement error
+SELECT * FROM rb_a
+----
+<REGEX>:.*not exist.*
+
+statement error
+SELECT * FROM rb_b
+----
+<REGEX>:.*not exist.*
+
+# neither rolled-back table left a partition_info row
+query I
+SELECT count(*) FROM ducklake_meta.ducklake_partition_info
+WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name IN ('rb_a','rb_b'))
+----
+0
+
+# subsequent CTAS in fresh transaction succeeds and gets fresh ids - no collision with rolled-back local ids
+statement ok
+CREATE TABLE rb_after PARTITIONED BY (i) AS SELECT 1 AS i
+
+query I
+SELECT * FROM rb_after
+----
+1
+
+# ----------------------------------------------------------------------------------------------------------------------
+# Test 11: Snapshot isolation. con2 begins BEFORE con1's inline-partitioned CTAS commits. con2 must NOT
+# observe the new table even though it's already committed in the metadata DB. The partition_info row's
+# begin_snapshot is the visibility gate.
+# ----------------------------------------------------------------------------------------------------------------------
+
+statement ok con2
+BEGIN
+
+statement ok con1
+CREATE TABLE ducklake.snap_iso PARTITIONED BY (i) AS SELECT 1 AS i
+
+# con2's snapshot predates the CTAS commit
+statement error con2
+SELECT * FROM ducklake.snap_iso
+----
+<REGEX>:.*not exist.*
+
+statement ok con2
+COMMIT
+
+# new transaction sees it
+query I
+SELECT * FROM ducklake.snap_iso
+----
+1
+
+# ----------------------------------------------------------------------------------------------------------------------
+# Test 12: Same-transaction INSERT after inline CTAS uses the partition spec. After commit, every data
+# file (CTAS-written + INSERT-written) carries a partition_id, and on-disk paths are hive-partitioned.
+# ----------------------------------------------------------------------------------------------------------------------
+
+statement ok
+BEGIN
+
+statement ok
+CREATE TABLE same_tx_part PARTITIONED BY (g) AS SELECT range AS g, range + 100 AS v FROM range(3)
+
+statement ok
+INSERT INTO same_tx_part VALUES (10, 110), (10, 111), (11, 112)
+
+statement ok
+COMMIT
+
+# every data file references the partition_id - CTAS-written and INSERT-written alike
+query I
+SELECT count(*) FROM ducklake_meta.ducklake_data_file
+WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name='same_tx_part')
+  AND partition_id IS NULL
+----
+0
+
+query II
+SELECT g, count(*) FROM same_tx_part GROUP BY g ORDER BY g
+----
+0	1
+1	1
+2	1
+10	2
+11	1
diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test b/test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test
new file mode 100644
index 00000000000..7538a72c6da
--- /dev/null
+++ b/test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test
@@ -0,0 +1,215 @@
+# name: test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test
+# description: CTAS with inline PARTITIONED BY / SORTED BY interacting with the data-inlining row limit.
+#              When a CTAS's row count is below DATA_INLINING_ROW_LIMIT, rows go to the per-table
+#              ducklake_inlined_data_<id>_<seq> catalog table instead of Parquet. Partition and sort
+#              metadata must still be recorded; the inline operator coordinates with the about-to-be-created
+#              table_id at sink-state-init time. Once a flush is called, inlined rows materialize as
+#              partition-aware Parquet files. (On-disk hive directory verification is intentionally omitted -
+#              that's covered by create_table_inline_basic.test / create_table_inline_ctas.test.)
+# group: [create_table_inline_partition_sort]
+
+require ducklake
+
+require parquet
+
+test-env DUCKLAKE_CONNECTION {TEST_DIR}/{UUID}.db
+
+test-env DATA_PATH {TEST_DIR}
+
+# DATA_INLINING_ROW_LIMIT is non-zero (the historic default-shaped CTAS path). Pick 1000 so we can write
+# both fully-inlined and overflow-to-Parquet cases below.
+statement ok
+ATTACH 'ducklake:{DUCKLAKE_CONNECTION}' AS ducklake (DATA_PATH '{DATA_PATH}/inline_with_inlining', METADATA_CATALOG 'ducklake_meta', DATA_INLINING_ROW_LIMIT 1000)
+
+statement ok
+USE ducklake
+
+# ----------------------------------------------------------------------------------------------------------------------
+# Test 1: CTAS that fits entirely under DATA_INLINING_ROW_LIMIT, with PARTITIONED BY (i).
+# Rows go to ducklake_inlined_data_<id>_*; ducklake_data_file stays empty for this table.
+# Partition metadata is still recorded - the partition_id is allocated at planning time and bound to the
+# table entry at sink-state-init regardless of whether the rows landed in Parquet or in the inlined table.
+# ----------------------------------------------------------------------------------------------------------------------
+
+statement ok
+CREATE TABLE small_part PARTITIONED BY (i) AS
+SELECT range AS i, range::VARCHAR AS v FROM range(50)
+
+# data round-trips (rows readable whether inlined or in Parquet)
+query I
+SELECT count(*) FROM small_part
+----
+50
+
+# partition metadata recorded for the inlined CTAS
+query I
+SELECT count(*) FROM ducklake_meta.ducklake_partition_info
+WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part')
+----
+1
+
+query II
+SELECT partition_key_index, transform FROM ducklake_meta.ducklake_partition_column
+WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part')
+----
+0	identity
+
+# zero data_file rows: rows are sitting in the inlined-data catalog table, not on Parquet
+query I
+SELECT count(*) FROM ducklake_meta.ducklake_data_file
+WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part')
+----
+0
+
+# the per-table inlined-data table exists in the metadata catalog
+query I
+SELECT count(*) FROM ducklake_meta.ducklake_inlined_data_tables
+WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part')
+----
+1
+
+# ----------------------------------------------------------------------------------------------------------------------
+# Test 2: CTAS that fits under DATA_INLINING_ROW_LIMIT with both PARTITIONED BY and SORTED BY clauses.
+# Both partition and sort metadata land in their respective catalog tables even though the data is inlined.
+# ----------------------------------------------------------------------------------------------------------------------
+
+statement ok
+CREATE TABLE small_both PARTITIONED BY (g) SORTED BY (v) AS
+SELECT i % 4 AS g, i AS v FROM range(20) t(i)
+
+query I
+SELECT count(*) FROM small_both
+----
+20
+
+query I
+SELECT count(*) FROM ducklake_meta.ducklake_partition_info
+WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_both')
+----
+1
+
+query I
+SELECT count(*) FROM ducklake_meta.ducklake_sort_info
+WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_both')
+----
+1
+
+# the sort_expression row carries the planning-time-allocated direction/null_order
+query III
+SELECT expression, sort_direction, null_order FROM ducklake_meta.ducklake_sort_expression
+WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_both')
+----
+v	ASC	NULLS_LAST
+
+# fully inlined: zero Parquet files
+query I
+SELECT count(*) FROM ducklake_meta.ducklake_data_file
+WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_both')
+----
+0
+
+# ----------------------------------------------------------------------------------------------------------------------
+# Test 3: Flushing inlined data after a fully-inlined partitioned CTAS.
+# `ducklake_flush_inlined_data` materializes the inlined rows as Parquet files using the table's
+# partition spec. Post-flush, ducklake_data_file gains rows that reference the partition_id.
+# ----------------------------------------------------------------------------------------------------------------------
+
+statement ok
+CALL ducklake_flush_inlined_data('ducklake')
+
+# at least one data file now exists for small_part, and each carries a partition_id (the inlining-then-flush
+# path hands the spec through correctly)
+query I
+SELECT count(*) > 0 FROM ducklake_meta.ducklake_data_file
+WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part')
+----
+true
+
+query I
+SELECT count(*) FROM ducklake_meta.ducklake_data_file
+WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part')
+  AND partition_id IS NULL
+----
+0
+
+# data still visible after flush
+query I
+SELECT count(*) FROM small_part
+----
+50
+
+# ----------------------------------------------------------------------------------------------------------------------
+# Test 4: CTAS that OVERFLOWS DATA_INLINING_ROW_LIMIT (1000). 5000 rows -> the operator routes everything
+# above the limit to Parquet. The partition spec applies to the Parquet writes; the sort spec applies to
+# the row stream BEFORE the inline operator (sort_on_insert=true default).
+# ----------------------------------------------------------------------------------------------------------------------
+
+statement ok
+CREATE TABLE overflow_part PARTITIONED BY (g) SORTED BY (v) AS
+SELECT i % 8 AS g, i AS v FROM range(5000) t(i)
+
+query I
+SELECT count(*) FROM overflow_part
+----
+5000
+
+# partition + sort metadata persist
+query II
+SELECT
+  (SELECT count(*) FROM ducklake_meta.ducklake_partition_info
+   WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part')),
+  (SELECT count(*) FROM ducklake_meta.ducklake_sort_info
+   WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part'))
+----
+1	1
+
+# overflow path: at least one Parquet file was written, and each has a non-null partition_id
+query I
+SELECT count(*) > 0 FROM ducklake_meta.ducklake_data_file
+WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part')
+----
+true
+
+query I
+SELECT count(*) FROM ducklake_meta.ducklake_data_file
+WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part')
+  AND partition_id IS NULL
+----
+0
+
+# ----------------------------------------------------------------------------------------------------------------------
+# Test 5: Flushing the overflow_part table (which had data both inlined and on Parquet). After flush, all
+# rows should be on Parquet with non-null partition_id, and the inlined-data table count for it goes to 0
+# (or the inlined-data tables are cleared per-table by the flush).
+# ----------------------------------------------------------------------------------------------------------------------
+
+statement ok
+CALL ducklake_flush_inlined_data('ducklake')
+
+# overflow_part: zero rows in any inlined-data table after flush
+query I
+SELECT count(*) FROM ducklake_meta.ducklake_data_file
+WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part')
+  AND partition_id IS NULL
+----
+0
+
+# the row count is preserved end-to-end (inlined → flushed → Parquet)
+query I
+SELECT count(*) FROM overflow_part
+----
+5000
+
+# small_both: same checks - the partition+sort spec persists through inlining + flush
+query I
+SELECT count(*) FROM small_both
+----
+20
+
+query II
+SELECT g, count(*) FROM small_both GROUP BY g ORDER BY g
+----
+0	5
+1	5
+2	5
+3	5

From 164b4ff6dc344820529c73292713034b9e5bce06 Mon Sep 17 00:00:00 2001
From: Alex-Monahan <alex@motherduck.com>
Date: Mon, 4 May 2026 21:56:01 -0700
Subject: [PATCH 02/12] Fix Postgres bigint overflow in CTAS PARTITIONED BY
 commit retry
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GetNewPartitionKey was mutating partition_data->partition_id from the local
TRANSACTION_LOCAL_ID_START sentinel to the freshly-allocated committed id at
commit time. On a metadata-write conflict (concurrent CTAS), CommitChanges
retries with a fresh commit_state. The retry would read the already-mutated
committed id as if it were the local id and register the wrong key in
committed_partition_ids — RemapPartitionId would then leak the unmodified
local sentinel (2^63) into the data-file SQL. Postgres bigint rejects 2^63
(signed int64 max is 2^63 - 1); SQLite silently truncates, which masked the
bug on the SQLite metadata backend.

The mutation never had any consumer: every reader of partition_data->partition_id
runs at plan time of a current or future statement, and future statements
load fresh state from metadata via the persisted partition_key.id. Removing
the mutation makes GetNewPartitionKey idempotent across retries.

Apply the same removal to GetNewSortKey for symmetry — sort_id is currently
harmless because no RemapSortId step exists, but the mutation pattern is the
same latent foot-gun.
---
 src/storage/ducklake_transaction.cpp | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/storage/ducklake_transaction.cpp b/src/storage/ducklake_transaction.cpp
index 6bc5e6c2575..13df5788c42 100644
--- a/src/storage/ducklake_transaction.cpp
+++ b/src/storage/ducklake_transaction.cpp
@@ -1393,7 +1393,13 @@ DuckLakePartitionInfo DuckLakeTransaction::GetNewPartitionKey(DuckLakeCommitStat
 	auto local_partition_id = partition_data->partition_id;
 	auto partition_id = commit_state.commit_snapshot.next_catalog_id++;
 	partition_key.id = partition_id;
-	partition_data->partition_id = partition_id;
+	// Do NOT mutate partition_data->partition_id here. On commit retry (concurrent metadata-write
+	// conflicts), GetNewPartitionKey runs again with a fresh commit_state. If the in-memory partition
+	// data carried the previous attempt's committed id, local_partition_id above would be wrong and
+	// committed_partition_ids would never contain the actual local sentinel that data files were
+	// stamped with — RemapPartitionId would then leak TRANSACTION_LOCAL_ID_START (= 2^63) into the
+	// SQL, which Postgres bigint rejects (SQLite silently truncates). Keeping the in-memory value as
+	// the local id makes GetNewPartitionKey idempotent across retries.
 	for (auto &field : partition_data->fields) {
 		DuckLakePartitionFieldInfo partition_field;
 		partition_field.partition_key_index = field.partition_key_index;
@@ -1442,7 +1448,10 @@ DuckLakeSortInfo DuckLakeTransaction::GetNewSortKey(DuckLakeCommitState &commit_
 
 	auto sort_id = commit_state.commit_snapshot.next_catalog_id++;
 	sort_key.id = sort_id;
-	sort_data->sort_id = sort_id;
+	// Do NOT mutate sort_data->sort_id here — same retry-idempotency reasoning as GetNewPartitionKey.
+	// No downstream reader observes the mutated value (data files don't reference sort_id), so this
+	// is harmless today, but removing it keeps both code paths symmetric and prevents the same bug
+	// from being reintroduced if anyone adds a RemapSortId step later.
 	for (auto &field : sort_data->fields) {
 		DuckLakeSortFieldInfo sort_field;
 		sort_field.sort_key_index = field.sort_key_index;

From 251593aac8c51ce1a02eb61e1aee975668746917 Mon Sep 17 00:00:00 2001
From: Alex-Monahan <alex@motherduck.com>
Date: Fri, 1 May 2026 13:18:09 -0700
Subject: [PATCH 03/12] Support table-scoped config options via CREATE TABLE /
 CTAS WITH (...)

Allow CREATE TABLE and CREATE TABLE AS SELECT statements to set per-table
DuckLake config options through the existing WITH (key=value, ...) clause.
Each key/value pair is validated using the same path as ducklake_set_option
and persisted into ducklake_metadata at commit time, scoped to the new
table. Options also take effect immediately for any data written within
the same transaction (e.g. CTAS row group sizing) and survive a rename
within that transaction.
---
 src/functions/ducklake_set_option.cpp         |  30 ++--
 .../functions/ducklake_table_functions.hpp    |   5 +
 src/include/storage/ducklake_insert.hpp       |  17 ++-
 .../storage/ducklake_metadata_manager.hpp     |   1 +
 src/include/storage/ducklake_table_entry.hpp  |   9 ++
 src/storage/ducklake_catalog.cpp              |   8 +-
 src/storage/ducklake_insert.cpp               |  56 +++++--
 src/storage/ducklake_metadata_manager.cpp     |  19 +++
 src/storage/ducklake_schema_entry.cpp         |  33 ++++
 src/storage/ducklake_table_entry.cpp          |   2 +
 src/storage/ducklake_transaction.cpp          |  18 +++
 .../create_table_inline_errors.test           |   4 +-
 .../settings/create_table_with_options.test   | 144 ++++++++++++++++++
 .../create_table_with_options_rollback.test   |  59 +++++++
 ...create_table_with_options_transaction.test |  94 ++++++++++++
 15 files changed, 466 insertions(+), 33 deletions(-)
 create mode 100644 test/sql/settings/create_table_with_options.test
 create mode 100644 test/sql/settings/create_table_with_options_rollback.test
 create mode 100644 test/sql/settings/create_table_with_options_transaction.test

diff --git a/src/functions/ducklake_set_option.cpp b/src/functions/ducklake_set_option.cpp
index f4910846e4b..83771a1edf7 100644
--- a/src/functions/ducklake_set_option.cpp
+++ b/src/functions/ducklake_set_option.cpp
@@ -15,17 +15,12 @@ struct DuckLakeSetOptionData : public TableFunctionData {
 	DuckLakeConfigOption option;
 };
 
-static unique_ptr<FunctionData> DuckLakeSetOptionBind(ClientContext &context, TableFunctionBindInput &input,
-                                                      vector<LogicalType> &return_types, vector<string> &names) {
-	auto &catalog = DuckLakeBaseMetadataFunction::GetCatalog(context, input.inputs[0]);
-	DuckLakeConfigOption config_option;
-	auto &option = config_option.option.key;
-	auto &value = config_option.option.value;
-
-	option = StringUtil::Lower(StringValue::Get(input.inputs[1]));
-	auto &val = input.inputs[2];
+DuckLakeTag ValidateDuckLakeConfigOption(ClientContext &context, const string &option_key, const Value &val) {
+	DuckLakeTag result;
+	auto option = StringUtil::Lower(option_key);
+	result.key = option;
+	auto &value = result.value;
 
-	// read the option
 	if (option == "parquet_compression") {
 		auto codec = val.DefaultCastAs(LogicalType::VARCHAR).GetValue<string>();
 		vector<string> supported_algorithms {"uncompressed", "snappy", "gzip", "zstd", "brotli", "lz4", "lz4_raw"};
@@ -82,9 +77,8 @@ static unique_ptr<FunctionData> DuckLakeSetOptionBind(ClientContext &context, Ta
 	} else if (option == "delete_older_than" || option == "expire_older_than") {
 		auto interval_value = val.ToString();
 		if (!interval_value.empty()) {
-			// Let's verify this is actually an interval
-			interval_t result;
-			if (!Interval::FromString(val.ToString(), result)) {
+			interval_t interval_result;
+			if (!Interval::FromString(val.ToString(), interval_result)) {
 				throw BinderException("%s is not a valid interval value.", option);
 			}
 		}
@@ -103,6 +97,16 @@ static unique_ptr<FunctionData> DuckLakeSetOptionBind(ClientContext &context, Ta
 	} else {
 		throw NotImplementedException("Unsupported option %s", option);
 	}
+	return result;
+}
+
+static unique_ptr<FunctionData> DuckLakeSetOptionBind(ClientContext &context, TableFunctionBindInput &input,
+                                                      vector<LogicalType> &return_types, vector<string> &names) {
+	auto &catalog = DuckLakeBaseMetadataFunction::GetCatalog(context, input.inputs[0]);
+	DuckLakeConfigOption config_option;
+	config_option.option =
+	    ValidateDuckLakeConfigOption(context, StringValue::Get(input.inputs[1]), input.inputs[2]);
+	auto &option = config_option.option.key;
 
 	// read the scope
 	string schema;
diff --git a/src/include/functions/ducklake_table_functions.hpp b/src/include/functions/ducklake_table_functions.hpp
index 3a6814d6ee7..571c8d73687 100644
--- a/src/include/functions/ducklake_table_functions.hpp
+++ b/src/include/functions/ducklake_table_functions.hpp
@@ -15,6 +15,11 @@
 namespace duckdb {
 class DuckLakeCatalog;
 struct DuckLakeSnapshotInfo;
+struct DuckLakeTag;
+
+//! Validate and canonicalize a single (key, value) DuckLake config option.
+//! Shared by `CALL ducklake.set_option(...)` and CREATE TABLE / CTAS `WITH (...)`.
+DuckLakeTag ValidateDuckLakeConfigOption(ClientContext &context, const string &option_key, const Value &val);
 
 class DuckLakeTableFunctionUtil {
 public:
diff --git a/src/include/storage/ducklake_insert.hpp b/src/include/storage/ducklake_insert.hpp
index 4bd5ad493d3..e2c33c173e6 100644
--- a/src/include/storage/ducklake_insert.hpp
+++ b/src/include/storage/ducklake_insert.hpp
@@ -14,6 +14,7 @@
 #include "duckdb/common/index_vector.hpp"
 #include "storage/ducklake_stats.hpp"
 #include "common/ducklake_data_file.hpp"
+#include "common/ducklake_options.hpp"
 #include "storage/ducklake_field_data.hpp"
 #include "storage/ducklake_partition_data.hpp"
 #include "storage/ducklake_sort_data.hpp"
@@ -22,6 +23,7 @@ namespace duckdb {
 class DuckLakeCatalog;
 class DuckLakeSchemaEntry;
 class DuckLakeTableEntry;
+class ParsedExpression;
 class DuckLakeFieldData;
 struct DuckLakeCopyOptions;
 struct DuckLakeCopyInput;
@@ -153,10 +155,18 @@ struct DuckLakeCopyInput {
 	//! CTAS-flavored: take the field-id mapping and (optional) partition spec from the planning-time-built
 	//! spec rather than from a DuckLakeTableEntry that hasn't been created yet. field_data is required
 	//! because the parquet writer always needs it; partition_data is null when CREATE TABLE AS has no
-	//! inline PARTITIONED BY clause.
+	//! inline PARTITIONED BY clause. `create_with_options` is the raw `WITH (...)` clause from the parser;
+	//! it is validated and surfaced through `with_options` so the parquet write honors it.
 	DuckLakeCopyInput(ClientContext &context, DuckLakeSchemaEntry &schema, const ColumnList &columns,
 	                  const string &data_path_p, DuckLakeFieldData &field_data,
-	                  optional_ptr<DuckLakePartition> partition_data = nullptr);
+	                  optional_ptr<DuckLakePartition> partition_data = nullptr,
+	                  const case_insensitive_map_t<unique_ptr<ParsedExpression>> &create_with_options = {});
+
+	//! Look up an effective DuckLake config option for this write: returns the WITH override if one is
+	//! present in `with_options`, otherwise falls through to the catalog's table/schema/global lookup.
+	//! Used by PlanCopyForInsert so a CTAS write whose new table_id is not yet allocated still picks
+	//! up the user's WITH (...) values.
+	bool GetEffectiveOption(const string &key, string &out) const;
 
 	DuckLakeCatalog &catalog;
 	optional_ptr<DuckLakePartition> partition_data;
@@ -168,6 +178,9 @@ struct DuckLakeCopyInput {
 	TableIndex table_id;
 	InsertVirtualColumns virtual_columns = InsertVirtualColumns::NONE;
 	optional_idx get_table_index;
+	//! CREATE TABLE / CTAS WITH (...) options that override same-key catalog options for this write.
+	//! Empty for plain INSERT. Validated at plan time before reaching here.
+	option_map_t with_options;
 };
 
 } // namespace duckdb
diff --git a/src/include/storage/ducklake_metadata_manager.hpp b/src/include/storage/ducklake_metadata_manager.hpp
index 90206e1f3ce..5192794220c 100644
--- a/src/include/storage/ducklake_metadata_manager.hpp
+++ b/src/include/storage/ducklake_metadata_manager.hpp
@@ -174,6 +174,7 @@ class DuckLakeMetadataManager {
 	virtual string WriteNewPartitionKeys(DuckLakeSnapshot commit_snapshot,
 	                                     const vector<DuckLakePartitionInfo> &new_partitions);
 	virtual string WriteNewSortKeys(DuckLakeSnapshot commit_snapshot, const vector<DuckLakeSortInfo> &new_sorts);
+	virtual string WriteNewTableConfigOptions(const vector<DuckLakeConfigOption> &new_options);
 	virtual string WriteDroppedColumns(const vector<DuckLakeDroppedColumn> &dropped_columns);
 	virtual string WriteNewColumns(const vector<DuckLakeNewColumn> &new_columns);
 	virtual string WriteNewTags(const vector<DuckLakeTagInfo> &new_tags);
diff --git a/src/include/storage/ducklake_table_entry.hpp b/src/include/storage/ducklake_table_entry.hpp
index 8dc63131f21..d1d2c082db8 100644
--- a/src/include/storage/ducklake_table_entry.hpp
+++ b/src/include/storage/ducklake_table_entry.hpp
@@ -84,6 +84,13 @@ class DuckLakeTableEntry : public TableCatalogEntry {
 	optional_ptr<const DuckLakeFieldId> GetFieldId(FieldIndex field_index) const;
 	void SetPartitionData(unique_ptr<DuckLakePartition> partition_data);
 	void SetSortData(unique_ptr<DuckLakeSort> sort_data);
+	//! Pending table-scoped config options collected from a CREATE TABLE / CTAS WITH (...) clause.
+	void SetPendingTableOptions(vector<DuckLakeTag> options) {
+		pending_table_options = std::move(options);
+	}
+	const vector<DuckLakeTag> &GetPendingTableOptions() const {
+		return pending_table_options;
+	}
 	shared_ptr<DuckLakeTableStats> GetTableStats(ClientContext &context);
 	shared_ptr<DuckLakeTableStats> GetTableStats(DuckLakeTransaction &transaction);
 	idx_t GetNetDataFileRowCount(DuckLakeTransaction &transaction);
@@ -187,6 +194,8 @@ class DuckLakeTableEntry : public TableCatalogEntry {
 	unique_ptr<DuckLakeSort> sort_data;
 	// only set for REMOVED_COLUMN
 	unique_ptr<ColumnChangeInfo> changed_fields;
+	// table-scoped config options collected from CREATE TABLE / CTAS WITH (...), pending until commit
+	vector<DuckLakeTag> pending_table_options;
 };
 
 } // namespace duckdb
diff --git a/src/storage/ducklake_catalog.cpp b/src/storage/ducklake_catalog.cpp
index ec77b4bfed4..2ed27012ce3 100644
--- a/src/storage/ducklake_catalog.cpp
+++ b/src/storage/ducklake_catalog.cpp
@@ -157,12 +157,8 @@ optional_ptr<CatalogEntry> DuckLakeCatalog::CreateSchema(CatalogTransaction tran
 }
 
 ErrorData DuckLakeCatalog::SupportsCreateTable(BoundCreateTableInfo &info) {
-	auto &base = info.Base().Cast<CreateTableInfo>();
-	if (!base.options.empty()) {
-		return ErrorData(
-		    ExceptionType::CATALOG,
-		    StringUtil::Format("WITH clause is not supported for tables in a %s catalog", GetCatalogType()));
-	}
+	// DuckLake handles PARTITIONED BY, SORTED BY, and WITH (...) itself in DuckLakeSchemaEntry,
+	// so suppress the base-class rejection of all three.
 	return ErrorData();
 }
 
diff --git a/src/storage/ducklake_insert.cpp b/src/storage/ducklake_insert.cpp
index 81f4375d70d..ac12159b77d 100644
--- a/src/storage/ducklake_insert.cpp
+++ b/src/storage/ducklake_insert.cpp
@@ -10,11 +10,13 @@
 #include "storage/ducklake_geo_stats.hpp"
 #include "common/ducklake_types.hpp"
 #include "functions/ducklake_compaction_functions.hpp"
+#include "functions/ducklake_table_functions.hpp"
 
 #include "duckdb/catalog/catalog_entry/copy_function_catalog_entry.hpp"
 #include "duckdb/execution/operator/order/physical_order.hpp"
 #include "duckdb/execution/operator/projection/physical_projection.hpp"
 #include "duckdb/execution/operator/scan/physical_table_scan.hpp"
+#include "duckdb/parser/expression/constant_expression.hpp"
 #include "duckdb/parser/parser.hpp"
 #include "duckdb/planner/binder.hpp"
 #include "duckdb/planner/expression_binder.hpp"
@@ -350,16 +352,43 @@ DuckLakeCopyInput::DuckLakeCopyInput(ClientContext &context, DuckLakeTableEntry
 	schema_id = table.ParentSchema().Cast<DuckLakeSchemaEntry>().GetSchemaId();
 	table_id = table.GetTableId();
 	encryption_key = catalog.GenerateEncryptionKey(context);
+	// Surface CREATE TABLE / CTAS WITH (...) options for transaction-local tables: the catalog
+	// is not mutated for transaction-local ids, so the only source of truth is the table entry.
+	for (auto &tag : table.GetPendingTableOptions()) {
+		with_options[tag.key] = tag.value;
+	}
 }
 
 DuckLakeCopyInput::DuckLakeCopyInput(ClientContext &context, DuckLakeSchemaEntry &schema, const ColumnList &columns,
                                      const string &data_path_p, DuckLakeFieldData &field_data_p,
-                                     optional_ptr<DuckLakePartition> partition_data_p)
+                                     optional_ptr<DuckLakePartition> partition_data_p,
+                                     const case_insensitive_map_t<unique_ptr<ParsedExpression>> &create_with_options)
     : catalog(schema.ParentCatalog().Cast<DuckLakeCatalog>()), columns(columns), data_path(data_path_p) {
 	field_data = &field_data_p;
 	partition_data = partition_data_p;
 	schema_id = schema.GetSchemaId();
 	encryption_key = catalog.GenerateEncryptionKey(context);
+	// Validate CREATE TABLE AS ... WITH (key=value, ...) and surface as per-write overrides so the
+	// parquet files land with the requested compression / row group size, even though the new table_id
+	// won't be allocated until DuckLakeInsert::GetGlobalSinkState (long after this plan is built).
+	for (auto &option : create_with_options) {
+		if (!option.second || option.second->GetExpressionClass() != ExpressionClass::CONSTANT) {
+			throw BinderException(
+			    "WITH option \"%s\" must be a constant expression in DuckLake CREATE TABLE / CTAS", option.first);
+		}
+		auto &constant_expr = option.second->Cast<ConstantExpression>();
+		auto tag = ValidateDuckLakeConfigOption(context, option.first, constant_expr.value);
+		with_options[tag.key] = tag.value;
+	}
+}
+
+bool DuckLakeCopyInput::GetEffectiveOption(const string &key, string &out) const {
+	auto it = with_options.find(key);
+	if (it != with_options.end()) {
+		out = it->second;
+		return true;
+	}
+	return catalog.TryGetConfigOption(key, out, schema_id, table_id);
 }
 
 static void StripTrailingSeparator(FileSystem &fs, string &path) {
@@ -502,32 +531,38 @@ DuckLakeCopyOptions DuckLakeInsert::GetCopyOptions(ClientContext &context, DuckL
 	auto &schema_id = copy_input.schema_id;
 	auto &table_id = copy_input.table_id;
 	string parquet_compression;
-	if (catalog.TryGetConfigOption("parquet_compression", parquet_compression, schema_id, table_id)) {
+	if (copy_input.GetEffectiveOption("parquet_compression", parquet_compression)) {
 		info->options["compression"].emplace_back(parquet_compression);
 	}
 	string parquet_version;
-	if (catalog.TryGetConfigOption("parquet_version", parquet_version, schema_id, table_id)) {
+	if (copy_input.GetEffectiveOption("parquet_version", parquet_version)) {
 		info->options["parquet_version"].emplace_back(parquet_version);
 	}
 	string parquet_compression_level;
-	if (catalog.TryGetConfigOption("parquet_compression_level", parquet_compression_level, schema_id, table_id)) {
+	if (copy_input.GetEffectiveOption("parquet_compression_level", parquet_compression_level)) {
 		info->options["compression_level"].emplace_back(parquet_compression_level);
 	}
 	string row_group_size;
-	if (catalog.TryGetConfigOption("parquet_row_group_size", row_group_size, schema_id, table_id)) {
+	if (copy_input.GetEffectiveOption("parquet_row_group_size", row_group_size)) {
 		info->options["row_group_size"].emplace_back(row_group_size);
 	}
 	string row_group_size_bytes;
-	if (catalog.TryGetConfigOption("parquet_row_group_size_bytes", row_group_size_bytes, schema_id, table_id)) {
+	if (copy_input.GetEffectiveOption("parquet_row_group_size_bytes", row_group_size_bytes)) {
 		info->options["row_group_size_bytes"].emplace_back(row_group_size_bytes + " bytes");
 	}
 	string per_thread_output_str;
 	bool per_thread_output = false;
-	if (catalog.TryGetConfigOption("per_thread_output", per_thread_output_str, schema_id, table_id)) {
+	if (copy_input.GetEffectiveOption("per_thread_output", per_thread_output_str)) {
 		per_thread_output = per_thread_output_str == "true";
 	}
-	idx_t target_file_size = catalog.GetConfigOption<idx_t>("target_file_size", schema_id, table_id,
-	                                                        DuckLakeCatalog::DEFAULT_TARGET_FILE_SIZE);
+	string target_file_size_str;
+	idx_t target_file_size = DuckLakeCatalog::DEFAULT_TARGET_FILE_SIZE;
+	if (copy_input.GetEffectiveOption("target_file_size", target_file_size_str)) {
+		target_file_size = Value(target_file_size_str).GetValue<idx_t>();
+	} else {
+		target_file_size =
+		    catalog.GetConfigOption<idx_t>("target_file_size", schema_id, table_id, target_file_size);
+	}
 
 	// Always use native parquet geometry for writing
 	info->options["geoparquet_version"].emplace_back("NONE");
@@ -901,7 +936,8 @@ PhysicalOperator &DuckLakeCatalog::PlanCreateTableAs(ClientContext &context, Phy
 	auto table_data_path =
 	    duck_schema.DataPath() + DuckLakeCatalog::GeneratePathFromName(table_uuid, create_info.table);
 
-	DuckLakeCopyInput copy_input(context, duck_schema, columns, table_data_path, *field_data, partition_data.get());
+	DuckLakeCopyInput copy_input(context, duck_schema, columns, table_data_path, *field_data, partition_data.get(),
+	                             create_info.options);
 	auto &physical_copy = DuckLakeInsert::PlanCopyForInsert(context, planner, copy_input, root.get());
 
 	optional_idx insert_partition_id;
diff --git a/src/storage/ducklake_metadata_manager.cpp b/src/storage/ducklake_metadata_manager.cpp
index 4dd5163a6ae..aeed0a15e20 100644
--- a/src/storage/ducklake_metadata_manager.cpp
+++ b/src/storage/ducklake_metadata_manager.cpp
@@ -4573,6 +4573,25 @@ WHERE {SNAPSHOT_ID} >= begin_snapshot AND ({SNAPSHOT_ID} < end_snapshot OR end_s
 	return table_sizes;
 }
 
+string DuckLakeMetadataManager::WriteNewTableConfigOptions(const vector<DuckLakeConfigOption> &new_options) {
+	if (new_options.empty()) {
+		return string();
+	}
+	// fresh committed table-id from LocalChangeType::CREATED only — no UPSERT needed
+	string query = "INSERT INTO {METADATA_CATALOG}.ducklake_metadata VALUES ";
+	for (idx_t i = 0; i < new_options.size(); i++) {
+		auto &opt = new_options[i];
+		D_ASSERT(opt.table_id.IsValid() && !opt.table_id.IsTransactionLocal());
+		if (i > 0) {
+			query += ", ";
+		}
+		query += StringUtil::Format("(%s, %s, 'table', %d)", SQLString(opt.option.key), SQLString(opt.option.value),
+		                            opt.table_id.index);
+	}
+	query += ";\n";
+	return query;
+}
+
 void DuckLakeMetadataManager::SetConfigOption(const DuckLakeConfigOption &option) {
 	// check if the option already exists
 	auto &option_key = option.option.key;
diff --git a/src/storage/ducklake_schema_entry.cpp b/src/storage/ducklake_schema_entry.cpp
index b7b03fc962b..875ce91fb0e 100644
--- a/src/storage/ducklake_schema_entry.cpp
+++ b/src/storage/ducklake_schema_entry.cpp
@@ -5,7 +5,9 @@
 #include "duckdb/parser/parsed_data/create_view_info.hpp"
 #include "duckdb/parser/parsed_data/drop_info.hpp"
 #include "duckdb/parser/result_modifier.hpp"
+#include "duckdb/parser/expression/constant_expression.hpp"
 #include "duckdb/planner/parsed_data/bound_create_table_info.hpp"
+#include "functions/ducklake_table_functions.hpp"
 #include "storage/ducklake_catalog.hpp"
 #include "storage/ducklake_table_entry.hpp"
 #include "storage/ducklake_transaction.hpp"
@@ -62,6 +64,27 @@ bool DuckLakeSchemaEntry::HandleCreateConflict(CatalogTransaction transaction, C
 	return true;
 }
 
+static vector<DuckLakeTag> ValidateCreateTableOptions(ClientContext &context, BoundCreateTableInfo &info) {
+	vector<DuckLakeTag> result;
+	auto &base_info = info.Base();
+	if (base_info.options.empty()) {
+		return result;
+	}
+	for (auto &option : base_info.options) {
+		if (!option.second) {
+			throw BinderException("WITH option \"%s\" requires a value", option.first);
+		}
+		if (option.second->GetExpressionClass() != ExpressionClass::CONSTANT) {
+			throw BinderException(
+			    "WITH option \"%s\" must be a constant expression in DuckLake CREATE TABLE / CTAS", option.first);
+		}
+		auto &constant_expr = option.second->Cast<ConstantExpression>();
+		auto tag = ValidateDuckLakeConfigOption(context, option.first, constant_expr.value);
+		result.push_back(std::move(tag));
+	}
+	return result;
+}
+
 optional_ptr<CatalogEntry>
 DuckLakeSchemaEntry::CreateTableExtended(CatalogTransaction transaction, BoundCreateTableInfo &info, string table_uuid,
                                          string table_data_path, unique_ptr<DuckLakePartition> prebuilt_partition_data,
@@ -78,6 +101,8 @@ DuckLakeSchemaEntry::CreateTableExtended(CatalogTransaction transaction, BoundCr
 			throw BinderException("Column name \"%s\" is reserved by DuckLake for internal use", col.Name());
 		}
 	}
+	// validate WITH (...) options before any catalog mutation
+	auto validated_options = ValidateCreateTableOptions(transaction.GetContext(), info);
 	//! get a local table-id
 	auto table_id = TableIndex(duck_transaction.GetLocalCatalogId());
 	// generate field ids based on the column ids
@@ -114,6 +139,14 @@ DuckLakeSchemaEntry::CreateTableExtended(CatalogTransaction transaction, BoundCr
 		table_entry->SetSortData(
 		    DuckLakeTableEntry::BuildSortData(duck_transaction, table_entry->GetColumns(), orders));
 	}
+	// stash on the entry; same-transaction writers pick these up via DuckLakeCopyInput, and at commit
+	// time WriteNewTableConfigOptions persists them under the freshly-allocated committed table_id.
+	// Routing through the entry (instead of mutating catalog options under the transaction-local id)
+	// keeps the catalog free of dead entries on rollback, matching how new_tables / local_changes
+	// already scope transaction state.
+	if (!validated_options.empty()) {
+		table_entry->SetPendingTableOptions(validated_options);
+	}
 	auto result = table_entry.get();
 	duck_transaction.CreateEntry(std::move(table_entry));
 	return result;
diff --git a/src/storage/ducklake_table_entry.cpp b/src/storage/ducklake_table_entry.cpp
index 628b23a1b56..c54790c2d6f 100644
--- a/src/storage/ducklake_table_entry.cpp
+++ b/src/storage/ducklake_table_entry.cpp
@@ -75,6 +75,7 @@ DuckLakeTableEntry::DuckLakeTableEntry(DuckLakeTableEntry &parent, CreateTableIn
 	if (parent.sort_data) {
 		sort_data = make_uniq<DuckLakeSort>(*parent.sort_data);
 	}
+	pending_table_options = parent.pending_table_options;
 	CheckSupportedTypes();
 	if (local_change.type == LocalChangeType::ADD_COLUMN) {
 		LogicalIndex new_col_idx(columns.LogicalColumnCount() - 1);
@@ -99,6 +100,7 @@ DuckLakeTableEntry::DuckLakeTableEntry(DuckLakeTableEntry &parent, CreateTableIn
 	if (parent.sort_data) {
 		sort_data = make_uniq<DuckLakeSort>(*parent.sort_data);
 	}
+	pending_table_options = parent.pending_table_options;
 	CheckSupportedTypes();
 
 	auto changed_id = local_change.field_index;
diff --git a/src/storage/ducklake_transaction.cpp b/src/storage/ducklake_transaction.cpp
index 13df5788c42..f043dff2b48 100644
--- a/src/storage/ducklake_transaction.cpp
+++ b/src/storage/ducklake_transaction.cpp
@@ -1522,6 +1522,7 @@ struct NewTableInfo {
 	vector<DuckLakeNewColumn> new_columns;
 	vector<DuckLakeTableInfo> new_inlined_data_tables;
 	vector<DuckLakeSortInfo> new_sort_keys;
+	vector<DuckLakeConfigOption> new_table_options;
 };
 
 struct NewMacroInfo {
@@ -1772,6 +1773,18 @@ void DuckLakeTransaction::GetNewTableInfo(DuckLakeCommitState &commit_state, Duc
 					result.new_sort_keys.push_back(std::move(sort_key));
 				}
 			}
+			// CREATE TABLE / CTAS WITH (...) options. AlterEntryInternal drops the CREATED entry from
+			// the transaction-local set on rename, so a CREATE+RENAME chain arrives here as a single
+			// RENAMED entry with a still-transaction-local id — emit whenever old_table_id is local.
+			if (old_table_id.IsTransactionLocal()) {
+				auto &pending_options = table.GetPendingTableOptions();
+				for (auto &tag : pending_options) {
+					DuckLakeConfigOption opt;
+					opt.option = tag;
+					opt.table_id = new_table_id;
+					result.new_table_options.push_back(std::move(opt));
+				}
+			}
 			break;
 		}
 		default:
@@ -2383,6 +2396,11 @@ string DuckLakeTransaction::CommitChanges(DuckLakeCommitState &commit_state,
 		batch_queries += metadata_manager->WriteNewColumns(result.new_columns);
 		batch_queries += metadata_manager->WriteNewInlinedTables(commit_snapshot, result.new_inlined_data_tables);
 		batch_queries += metadata_manager->WriteNewSortKeys(commit_snapshot, result.new_sort_keys);
+		batch_queries += metadata_manager->WriteNewTableConfigOptions(result.new_table_options);
+		// re-key in-memory options from the local id to the committed id
+		for (auto &opt : result.new_table_options) {
+			ducklake_catalog.SetConfigOption(opt);
+		}
 		new_tables_result = result.new_tables;
 		new_inlined_data_tables_result = result.new_inlined_data_tables;
 	}
diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_errors.test b/test/sql/create_table_inline_partition_sort/create_table_inline_errors.test
index 85343564a51..e5bda9a2439 100644
--- a/test/sql/create_table_inline_partition_sort/create_table_inline_errors.test
+++ b/test/sql/create_table_inline_partition_sort/create_table_inline_errors.test
@@ -28,11 +28,11 @@ CREATE TABLE bad_sort (a INTEGER) SORTED BY (nonexistent)
 ----
 <REGEX>:.*nonexistent.*
 
-# WITH (...) clause is rejected by SupportsCreateTable override (DuckLake has no WITH semantics)
+# WITH (...) accepts known DuckLake config option keys; unknown keys are rejected during validation
 statement error
 CREATE TABLE bad_with (a INTEGER) WITH (some_option='x')
 ----
-WITH clause is not supported for tables in a ducklake catalog
+<REGEX>:.*Unsupported option.*
 
 # unsupported transform function
 statement error
diff --git a/test/sql/settings/create_table_with_options.test b/test/sql/settings/create_table_with_options.test
new file mode 100644
index 00000000000..f666f195d1e
--- /dev/null
+++ b/test/sql/settings/create_table_with_options.test
@@ -0,0 +1,144 @@
+# name: test/sql/settings/create_table_with_options.test
+# description: Test CREATE TABLE / CTAS with WITH (...) table-scoped config options
+# group: [settings]
+
+require ducklake
+
+require parquet
+
+statement ok
+ATTACH 'ducklake:__TEST_DIR__/ducklake_create_with_options.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_create_with_options_files');
+
+# CREATE TABLE with a single WITH option persists to ducklake_metadata
+statement ok
+CREATE TABLE ducklake.t_compress(i INTEGER) WITH (parquet_compression='zstd');
+
+query III
+SELECT option_name, value, scope FROM ducklake.options() WHERE option_name='parquet_compression' AND scope='TABLE' AND scope_entry='main.t_compress'
+----
+parquet_compression	zstd	TABLE
+
+# inserting into the table uses the table-scoped option
+statement ok
+INSERT INTO ducklake.t_compress SELECT range FROM range(1000)
+
+query I
+SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_compress/**')
+----
+ZSTD
+
+# CREATE TABLE with multiple WITH options
+statement ok
+CREATE TABLE ducklake.t_multi(i INTEGER) WITH (parquet_compression='gzip', parquet_row_group_size=512);
+
+query III
+SELECT option_name, value, scope FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_multi' ORDER BY option_name
+----
+parquet_compression	gzip	TABLE
+parquet_row_group_size	512	TABLE
+
+# CTAS with WITH options - validates the option both takes effect and is persisted
+statement ok
+SET threads=1
+
+statement ok
+SET preserve_insertion_order=false
+
+statement ok
+CREATE TABLE ducklake.t_ctas WITH (parquet_compression='gzip', parquet_row_group_size=2048) AS SELECT i, 'hello' || i s FROM range(4000) t(i);
+
+query III
+SELECT option_name, value, scope FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_ctas' ORDER BY option_name
+----
+parquet_compression	gzip	TABLE
+parquet_row_group_size	2048	TABLE
+
+# parquet files written by the CTAS use the configured compression
+query I
+SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_ctas/**')
+----
+GZIP
+
+# 4000 rows at row_group_size=2048 produces at least 2 row groups
+query I
+SELECT COUNT(DISTINCT row_group_id) >= 2 FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_ctas/**')
+----
+true
+
+# unsupported option key raises an error
+statement error
+CREATE TABLE ducklake.t_bad(i INTEGER) WITH (not_a_real_option='x');
+----
+Unsupported option
+
+# bad value also fails (parquet_compression rejects unknown codec)
+statement error
+CREATE TABLE ducklake.t_bad(i INTEGER) WITH (parquet_compression='bogus');
+----
+Unsupported codec
+
+statement ok
+DETACH ducklake
+
+# options survive a restart
+statement ok
+ATTACH 'ducklake:__TEST_DIR__/ducklake_create_with_options.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_create_with_options_files');
+
+query I
+SELECT value FROM ducklake.options() WHERE option_name='parquet_compression' AND scope='TABLE' AND scope_entry='main.t_compress'
+----
+zstd
+
+# CREATE OR REPLACE swaps in new options for the new table
+statement ok
+CREATE TABLE ducklake.t_repl(i INTEGER) WITH (parquet_compression='zstd');
+
+statement ok
+CREATE OR REPLACE TABLE ducklake.t_repl(i INTEGER) WITH (parquet_compression='gzip');
+
+query I
+SELECT value FROM ducklake.options() WHERE option_name='parquet_compression' AND scope='TABLE' AND scope_entry='main.t_repl'
+----
+gzip
+
+# CREATE TABLE IF NOT EXISTS does not mutate options on an existing table
+statement ok
+CREATE TABLE ducklake.t_ine(i INTEGER) WITH (parquet_compression='zstd');
+
+statement ok
+CREATE TABLE IF NOT EXISTS ducklake.t_ine(i INTEGER) WITH (parquet_compression='gzip');
+
+query I
+SELECT value FROM ducklake.options() WHERE option_name='parquet_compression' AND scope='TABLE' AND scope_entry='main.t_ine'
+----
+zstd
+
+# WITH (...) on a CTAS beats a schema-scope option for the same key
+statement ok
+CREATE SCHEMA ducklake.s_prec
+
+statement ok
+CALL ducklake.set_option('parquet_compression', 'gzip', schema => 's_prec')
+
+statement ok
+CREATE TABLE ducklake.s_prec.t_prec WITH (parquet_compression='zstd') AS SELECT i FROM range(1000) t(i)
+
+query I
+SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/s_prec/t_prec/**')
+----
+ZSTD
+
+# option keys are case-insensitive and the value is canonicalized to lower
+statement ok
+CREATE TABLE ducklake.t_case(i INTEGER) WITH (PARQUET_COMPRESSION='GZIP')
+
+query II
+SELECT option_name, value FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_case'
+----
+parquet_compression	gzip
+
+# non-constant value is rejected at plan time
+statement error
+CREATE TABLE ducklake.t_nonconst(i INTEGER) WITH (parquet_compression=upper('zstd'))
+----
+must be a constant expression
diff --git a/test/sql/settings/create_table_with_options_rollback.test b/test/sql/settings/create_table_with_options_rollback.test
new file mode 100644
index 00000000000..0df7ee6dbf6
--- /dev/null
+++ b/test/sql/settings/create_table_with_options_rollback.test
@@ -0,0 +1,59 @@
+# name: test/sql/settings/create_table_with_options_rollback.test
+# description: A rolled-back CREATE TABLE WITH (...) must not leak across transactions.
+# group: [settings]
+
+require ducklake
+
+require parquet
+
+statement ok
+ATTACH 'ducklake:__TEST_DIR__/ducklake_with_rollback.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_with_rollback_files');
+
+statement ok
+SET threads=1
+
+statement ok
+SET preserve_insertion_order=false
+
+# create a table with a WITH override, then ROLLBACK. The transaction-local table_id used in this
+# transaction is reset for the next DuckLakeTransaction (each starts at TRANSACTION_LOCAL_ID_START),
+# so a leaked override under that local id would silently contaminate any subsequent transaction
+# whose first new table happens to land on the same local id.
+statement ok
+BEGIN
+
+statement ok
+CREATE TABLE ducklake.t_rolled (i INTEGER) WITH (parquet_compression='gzip')
+
+statement ok
+INSERT INTO ducklake.t_rolled SELECT i FROM range(100) t(i)
+
+statement ok
+ROLLBACK
+
+# nothing about the rolled-back table should remain in metadata
+query I
+SELECT COUNT(*) FROM ducklake.options() WHERE scope='TABLE'
+----
+0
+
+# next transaction creates a plain table (no WITH) and writes to it BEFORE commit, so the
+# INSERT planner looks up options under the transaction-local id. With the leak, that id
+# resolves to the rolled-back gzip override; the parquet write would silently be gzip.
+statement ok
+BEGIN
+
+statement ok
+CREATE TABLE ducklake.t_clean(i INTEGER)
+
+statement ok
+INSERT INTO ducklake.t_clean SELECT i FROM range(2000) t(i)
+
+statement ok
+COMMIT
+
+# default parquet compression in DuckDB is snappy
+query I
+SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_with_rollback_files/main/t_clean/**')
+----
+SNAPPY
diff --git a/test/sql/settings/create_table_with_options_transaction.test b/test/sql/settings/create_table_with_options_transaction.test
new file mode 100644
index 00000000000..15eb479c475
--- /dev/null
+++ b/test/sql/settings/create_table_with_options_transaction.test
@@ -0,0 +1,94 @@
+# name: test/sql/settings/create_table_with_options_transaction.test
+# description: WITH (...) options on CREATE / CTAS persist correctly across same-transaction ALTER and rename
+# group: [settings]
+
+require ducklake
+
+require parquet
+
+statement ok
+ATTACH 'ducklake:__TEST_DIR__/ducklake_create_with_options_txn.db' AS ducklake (DATA_PATH '__TEST_DIR__/ducklake_create_with_options_txn_files');
+
+statement ok
+SET threads=1
+
+statement ok
+SET preserve_insertion_order=false
+
+# CTAS + RENAME within the same transaction: options must follow the new name and the parquet
+# files written during the CTAS must already honor the configured row group size.
+statement ok
+BEGIN
+
+statement ok
+CREATE TABLE ducklake.t_rename WITH (parquet_compression='gzip', parquet_row_group_size=500) AS SELECT i FROM range(2500) t(i);
+
+statement ok
+ALTER TABLE ducklake.t_rename RENAME TO t_renamed
+
+statement ok
+COMMIT
+
+query III rowsort
+SELECT option_name, value, scope FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_renamed' ORDER BY option_name
+----
+parquet_compression	gzip	TABLE
+parquet_row_group_size	500	TABLE
+
+# no leftover options under the original name
+query I
+SELECT COUNT(*) FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_rename'
+----
+0
+
+# data files were written with the configured compression (the CTAS happened before the rename)
+query I
+SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_txn_files/main/**')
+----
+GZIP
+
+# CREATE TABLE + insert + rename within the same transaction
+statement ok
+BEGIN
+
+statement ok
+CREATE TABLE ducklake.t2(i INTEGER) WITH (parquet_compression='zstd');
+
+statement ok
+INSERT INTO ducklake.t2 SELECT range FROM range(1000);
+
+statement ok
+ALTER TABLE ducklake.t2 RENAME TO t2_after
+
+statement ok
+COMMIT
+
+query II
+SELECT option_name, value FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t2_after'
+----
+parquet_compression	zstd
+
+query I
+SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_txn_files/main/t2*/**')
+----
+ZSTD
+
+# CREATE TABLE WITH (...) + same-transaction INSERT must apply the override even though the table
+# is still transaction-local (committed table_id has not been allocated yet). DuckLakeCopyInput pulls
+# the override from DuckLakeTableEntry::pending_table_options.
+statement ok
+BEGIN
+
+statement ok
+CREATE TABLE ducklake.t_local(i INTEGER) WITH (parquet_compression='gzip')
+
+statement ok
+INSERT INTO ducklake.t_local SELECT i FROM range(1000) t(i)
+
+statement ok
+COMMIT
+
+query I
+SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_txn_files/main/t_local/**')
+----
+GZIP

From 83b82f6b850ccb2cec38a69b05e7252c68364f3c Mon Sep 17 00:00:00 2001
From: Alex-Monahan <alex@motherduck.com>
Date: Mon, 4 May 2026 17:44:24 -0700
Subject: [PATCH 04/12] Apply review feedback: rename to
 options_in_create_with, support folded expressions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Naming pass — every WITH-clause-related identifier now uses options_in_create_with /
OptionsInCreateWith, replacing pending_table_options, GetPendingTableOptions,
SetPendingTableOptions, with_options, new_table_options, WriteNewTableConfigOptions,
ValidateCreateTableOptions, and create_with_options.

Architectural cleanup:
- Push WITH-clause validation+population into the schema-flavored DuckLakeCopyInput
  constructor; PlanCreateTableAs no longer mutates copy_input post-construction.
- Replace the inlined get_option lambda in PlanCopyForInsert with a method on
  DuckLakeCopyInput::GetEffectiveOption (the only inlined-helper lambda in DuckLake's
  src/ tree before this change).
- Mirror the partition_data / sort_data copy guard pattern on the alter constructors:
  only copy options_in_create_with when the parent has any.

Functionality:
- Accept foldable expressions (constants, getvariable(...), upper('zstd'), etc.) as
  WITH (...) values, mirroring upstream CREATE SECRET / ATTACH binding. Use
  ConstantBinder + ExpressionExecutor::EvaluateScalar; reject unbindable expressions
  (column refs, subqueries) and prepared-statement parameters.
- Lift the raw-options-map validation loop into a single shared free function
  ValidateOptionsInCreateWith; both call sites collapse to one line each.

Tests:
- Add cases for getvariable() WITH values, function-call folding (upper('zstd')),
  unbound column-ref rejection, and the explicit CREATE TABLE WITH + same-txn INSERT
  scenario.
---
 src/functions/ducklake_set_option.cpp         | 32 +++++++++++++++++++
 .../functions/ducklake_table_functions.hpp    | 10 ++++++
 src/include/storage/ducklake_insert.hpp       | 15 +++++----
 .../storage/ducklake_metadata_manager.hpp     |  2 +-
 src/include/storage/ducklake_table_entry.hpp  | 12 +++----
 src/storage/ducklake_insert.cpp               | 21 ++++--------
 src/storage/ducklake_metadata_manager.cpp     |  8 ++---
 src/storage/ducklake_schema_entry.cpp         | 30 +++--------------
 src/storage/ducklake_table_entry.cpp          |  8 +++--
 src/storage/ducklake_transaction.cpp          | 12 +++----
 .../settings/create_table_with_options.test   | 32 +++++++++++++++++--
 ...create_table_with_options_transaction.test |  2 +-
 12 files changed, 114 insertions(+), 70 deletions(-)

diff --git a/src/functions/ducklake_set_option.cpp b/src/functions/ducklake_set_option.cpp
index 83771a1edf7..db3a5fd6d57 100644
--- a/src/functions/ducklake_set_option.cpp
+++ b/src/functions/ducklake_set_option.cpp
@@ -4,8 +4,40 @@
 #include "storage/ducklake_table_entry.hpp"
 #include "storage/ducklake_schema_entry.hpp"
 
+#include "duckdb/execution/expression_executor.hpp"
+#include "duckdb/parser/parsed_expression.hpp"
+#include "duckdb/planner/binder.hpp"
+#include "duckdb/planner/expression_binder/constant_binder.hpp"
+
 namespace duckdb {
 
+vector<DuckLakeTag> ValidateOptionsInCreateWith(
+    ClientContext &context, const case_insensitive_map_t<unique_ptr<ParsedExpression>> &options) {
+	// Bind via ConstantBinder so each value expression must fold to a literal (no column refs, no
+	// subqueries) — mirrors upstream ATTACH (bind_attach.cpp) and CREATE SECRET (bind_create.cpp).
+	// `allow_unfoldable=true` on EvaluateScalar lets volatile functions like random() through; that's
+	// fine here because we only persist the resulting Value, not the expression tree.
+	// Each expression is copied before binding because CTAS calls this twice (plan time + sink init)
+	// over the same options map; consuming the map (the upstream pattern) would break the second call.
+	auto binder = Binder::CreateBinder(context);
+	ConstantBinder option_binder(*binder, context, "DuckLake WITH option");
+	vector<DuckLakeTag> result;
+	result.reserve(options.size());
+	for (auto &option : options) {
+		if (!option.second) {
+			throw BinderException("WITH option \"%s\" requires a value", option.first);
+		}
+		auto expr_copy = option.second->Copy();
+		auto bound_expr = option_binder.Bind(expr_copy);
+		if (bound_expr->HasParameter()) {
+			throw ParameterNotResolvedException();
+		}
+		auto value = ExpressionExecutor::EvaluateScalar(context, *bound_expr, true);
+		result.push_back(ValidateDuckLakeConfigOption(context, option.first, value));
+	}
+	return result;
+}
+
 struct DuckLakeSetOptionData : public TableFunctionData {
 	DuckLakeSetOptionData(Catalog &catalog, DuckLakeConfigOption option_p)
 	    : catalog(catalog), option(std::move(option_p)) {
diff --git a/src/include/functions/ducklake_table_functions.hpp b/src/include/functions/ducklake_table_functions.hpp
index 571c8d73687..5aeb385ca5b 100644
--- a/src/include/functions/ducklake_table_functions.hpp
+++ b/src/include/functions/ducklake_table_functions.hpp
@@ -14,6 +14,7 @@
 
 namespace duckdb {
 class DuckLakeCatalog;
+class ParsedExpression;
 struct DuckLakeSnapshotInfo;
 struct DuckLakeTag;
 
@@ -21,6 +22,15 @@ struct DuckLakeTag;
 //! Shared by `CALL ducklake.set_option(...)` and CREATE TABLE / CTAS `WITH (...)`.
 DuckLakeTag ValidateDuckLakeConfigOption(ClientContext &context, const string &option_key, const Value &val);
 
+//! Bind, fold, and validate every entry in a CREATE TABLE / CTAS `WITH (...)` options map.
+//! Each value expression must fold to a literal under ConstantBinder (constants, `getvariable(...)`,
+//! `upper('zstd')`, etc.); each key/value is then run through ValidateDuckLakeConfigOption.
+//! Empty input → empty output. Throws BinderException on any unbindable expression or unknown key.
+//! The input map is left intact — copies each expression before binding because CTAS calls this
+//! both at plan time (PlanCreateTableAs) and again during sink-state-init (CreateTableExtended).
+vector<DuckLakeTag> ValidateOptionsInCreateWith(
+    ClientContext &context, const case_insensitive_map_t<unique_ptr<ParsedExpression>> &options);
+
 class DuckLakeTableFunctionUtil {
 public:
 	// Conform timestamp to ISO-8601 extended format with optional fractional seconds and timezone offset, e.g.:
diff --git a/src/include/storage/ducklake_insert.hpp b/src/include/storage/ducklake_insert.hpp
index e2c33c173e6..f1b9a4b76fc 100644
--- a/src/include/storage/ducklake_insert.hpp
+++ b/src/include/storage/ducklake_insert.hpp
@@ -155,17 +155,18 @@ struct DuckLakeCopyInput {
 	//! CTAS-flavored: take the field-id mapping and (optional) partition spec from the planning-time-built
 	//! spec rather than from a DuckLakeTableEntry that hasn't been created yet. field_data is required
 	//! because the parquet writer always needs it; partition_data is null when CREATE TABLE AS has no
-	//! inline PARTITIONED BY clause. `create_with_options` is the raw `WITH (...)` clause from the parser;
-	//! it is validated and surfaced through `with_options` so the parquet write honors it.
+	//! inline PARTITIONED BY clause. `options_in_create_with` is the raw `WITH (...)` clause from the
+	//! parser; it is validated and surfaced through the `options_in_create_with` map so the parquet
+	//! write honors it.
 	DuckLakeCopyInput(ClientContext &context, DuckLakeSchemaEntry &schema, const ColumnList &columns,
 	                  const string &data_path_p, DuckLakeFieldData &field_data,
 	                  optional_ptr<DuckLakePartition> partition_data = nullptr,
-	                  const case_insensitive_map_t<unique_ptr<ParsedExpression>> &create_with_options = {});
+	                  const case_insensitive_map_t<unique_ptr<ParsedExpression>> &options_in_create_with = {});
 
 	//! Look up an effective DuckLake config option for this write: returns the WITH override if one is
-	//! present in `with_options`, otherwise falls through to the catalog's table/schema/global lookup.
-	//! Used by PlanCopyForInsert so a CTAS write whose new table_id is not yet allocated still picks
-	//! up the user's WITH (...) values.
+	//! present in `options_in_create_with`, otherwise falls through to the catalog's table/schema/global
+	//! lookup. Used by PlanCopyForInsert so a CTAS write whose new table_id is not yet allocated still
+	//! picks up the user's WITH (...) values.
 	bool GetEffectiveOption(const string &key, string &out) const;
 
 	DuckLakeCatalog &catalog;
@@ -180,7 +181,7 @@ struct DuckLakeCopyInput {
 	optional_idx get_table_index;
 	//! CREATE TABLE / CTAS WITH (...) options that override same-key catalog options for this write.
 	//! Empty for plain INSERT. Validated at plan time before reaching here.
-	option_map_t with_options;
+	option_map_t options_in_create_with;
 };
 
 } // namespace duckdb
diff --git a/src/include/storage/ducklake_metadata_manager.hpp b/src/include/storage/ducklake_metadata_manager.hpp
index 5192794220c..ccfed486648 100644
--- a/src/include/storage/ducklake_metadata_manager.hpp
+++ b/src/include/storage/ducklake_metadata_manager.hpp
@@ -174,7 +174,7 @@ class DuckLakeMetadataManager {
 	virtual string WriteNewPartitionKeys(DuckLakeSnapshot commit_snapshot,
 	                                     const vector<DuckLakePartitionInfo> &new_partitions);
 	virtual string WriteNewSortKeys(DuckLakeSnapshot commit_snapshot, const vector<DuckLakeSortInfo> &new_sorts);
-	virtual string WriteNewTableConfigOptions(const vector<DuckLakeConfigOption> &new_options);
+	virtual string WriteOptionsInCreateWith(const vector<DuckLakeConfigOption> &options_in_create_with);
 	virtual string WriteDroppedColumns(const vector<DuckLakeDroppedColumn> &dropped_columns);
 	virtual string WriteNewColumns(const vector<DuckLakeNewColumn> &new_columns);
 	virtual string WriteNewTags(const vector<DuckLakeTagInfo> &new_tags);
diff --git a/src/include/storage/ducklake_table_entry.hpp b/src/include/storage/ducklake_table_entry.hpp
index d1d2c082db8..5cd952aa669 100644
--- a/src/include/storage/ducklake_table_entry.hpp
+++ b/src/include/storage/ducklake_table_entry.hpp
@@ -84,12 +84,12 @@ class DuckLakeTableEntry : public TableCatalogEntry {
 	optional_ptr<const DuckLakeFieldId> GetFieldId(FieldIndex field_index) const;
 	void SetPartitionData(unique_ptr<DuckLakePartition> partition_data);
 	void SetSortData(unique_ptr<DuckLakeSort> sort_data);
-	//! Pending table-scoped config options collected from a CREATE TABLE / CTAS WITH (...) clause.
-	void SetPendingTableOptions(vector<DuckLakeTag> options) {
-		pending_table_options = std::move(options);
+	//! Table-scoped config options collected from a CREATE TABLE / CTAS WITH (...) clause.
+	void SetOptionsInCreateWith(vector<DuckLakeTag> options) {
+		options_in_create_with = std::move(options);
 	}
-	const vector<DuckLakeTag> &GetPendingTableOptions() const {
-		return pending_table_options;
+	const vector<DuckLakeTag> &GetOptionsInCreateWith() const {
+		return options_in_create_with;
 	}
 	shared_ptr<DuckLakeTableStats> GetTableStats(ClientContext &context);
 	shared_ptr<DuckLakeTableStats> GetTableStats(DuckLakeTransaction &transaction);
@@ -195,7 +195,7 @@ class DuckLakeTableEntry : public TableCatalogEntry {
 	// only set for REMOVED_COLUMN
 	unique_ptr<ColumnChangeInfo> changed_fields;
 	// table-scoped config options collected from CREATE TABLE / CTAS WITH (...), pending until commit
-	vector<DuckLakeTag> pending_table_options;
+	vector<DuckLakeTag> options_in_create_with;
 };
 
 } // namespace duckdb
diff --git a/src/storage/ducklake_insert.cpp b/src/storage/ducklake_insert.cpp
index ac12159b77d..5e5da2f8c46 100644
--- a/src/storage/ducklake_insert.cpp
+++ b/src/storage/ducklake_insert.cpp
@@ -16,7 +16,6 @@
 #include "duckdb/execution/operator/order/physical_order.hpp"
 #include "duckdb/execution/operator/projection/physical_projection.hpp"
 #include "duckdb/execution/operator/scan/physical_table_scan.hpp"
-#include "duckdb/parser/expression/constant_expression.hpp"
 #include "duckdb/parser/parser.hpp"
 #include "duckdb/planner/binder.hpp"
 #include "duckdb/planner/expression_binder.hpp"
@@ -354,15 +353,15 @@ DuckLakeCopyInput::DuckLakeCopyInput(ClientContext &context, DuckLakeTableEntry
 	encryption_key = catalog.GenerateEncryptionKey(context);
 	// Surface CREATE TABLE / CTAS WITH (...) options for transaction-local tables: the catalog
 	// is not mutated for transaction-local ids, so the only source of truth is the table entry.
-	for (auto &tag : table.GetPendingTableOptions()) {
-		with_options[tag.key] = tag.value;
+	for (auto &tag : table.GetOptionsInCreateWith()) {
+		options_in_create_with[tag.key] = tag.value;
 	}
 }
 
 DuckLakeCopyInput::DuckLakeCopyInput(ClientContext &context, DuckLakeSchemaEntry &schema, const ColumnList &columns,
                                      const string &data_path_p, DuckLakeFieldData &field_data_p,
                                      optional_ptr<DuckLakePartition> partition_data_p,
-                                     const case_insensitive_map_t<unique_ptr<ParsedExpression>> &create_with_options)
+                                     const case_insensitive_map_t<unique_ptr<ParsedExpression>> &options_in_create_with)
     : catalog(schema.ParentCatalog().Cast<DuckLakeCatalog>()), columns(columns), data_path(data_path_p) {
 	field_data = &field_data_p;
 	partition_data = partition_data_p;
@@ -371,20 +370,14 @@ DuckLakeCopyInput::DuckLakeCopyInput(ClientContext &context, DuckLakeSchemaEntry
 	// Validate CREATE TABLE AS ... WITH (key=value, ...) and surface as per-write overrides so the
 	// parquet files land with the requested compression / row group size, even though the new table_id
 	// won't be allocated until DuckLakeInsert::GetGlobalSinkState (long after this plan is built).
-	for (auto &option : create_with_options) {
-		if (!option.second || option.second->GetExpressionClass() != ExpressionClass::CONSTANT) {
-			throw BinderException(
-			    "WITH option \"%s\" must be a constant expression in DuckLake CREATE TABLE / CTAS", option.first);
-		}
-		auto &constant_expr = option.second->Cast<ConstantExpression>();
-		auto tag = ValidateDuckLakeConfigOption(context, option.first, constant_expr.value);
-		with_options[tag.key] = tag.value;
+	for (auto &tag : ValidateOptionsInCreateWith(context, options_in_create_with)) {
+		this->options_in_create_with[tag.key] = tag.value;
 	}
 }
 
 bool DuckLakeCopyInput::GetEffectiveOption(const string &key, string &out) const {
-	auto it = with_options.find(key);
-	if (it != with_options.end()) {
+	auto it = options_in_create_with.find(key);
+	if (it != options_in_create_with.end()) {
 		out = it->second;
 		return true;
 	}
diff --git a/src/storage/ducklake_metadata_manager.cpp b/src/storage/ducklake_metadata_manager.cpp
index aeed0a15e20..b624f38a8c3 100644
--- a/src/storage/ducklake_metadata_manager.cpp
+++ b/src/storage/ducklake_metadata_manager.cpp
@@ -4573,14 +4573,14 @@ WHERE {SNAPSHOT_ID} >= begin_snapshot AND ({SNAPSHOT_ID} < end_snapshot OR end_s
 	return table_sizes;
 }
 
-string DuckLakeMetadataManager::WriteNewTableConfigOptions(const vector<DuckLakeConfigOption> &new_options) {
-	if (new_options.empty()) {
+string DuckLakeMetadataManager::WriteOptionsInCreateWith(const vector<DuckLakeConfigOption> &options_in_create_with) {
+	if (options_in_create_with.empty()) {
 		return string();
 	}
 	// fresh committed table-id from LocalChangeType::CREATED only — no UPSERT needed
 	string query = "INSERT INTO {METADATA_CATALOG}.ducklake_metadata VALUES ";
-	for (idx_t i = 0; i < new_options.size(); i++) {
-		auto &opt = new_options[i];
+	for (idx_t i = 0; i < options_in_create_with.size(); i++) {
+		auto &opt = options_in_create_with[i];
 		D_ASSERT(opt.table_id.IsValid() && !opt.table_id.IsTransactionLocal());
 		if (i > 0) {
 			query += ", ";
diff --git a/src/storage/ducklake_schema_entry.cpp b/src/storage/ducklake_schema_entry.cpp
index 875ce91fb0e..06856717a4f 100644
--- a/src/storage/ducklake_schema_entry.cpp
+++ b/src/storage/ducklake_schema_entry.cpp
@@ -5,7 +5,6 @@
 #include "duckdb/parser/parsed_data/create_view_info.hpp"
 #include "duckdb/parser/parsed_data/drop_info.hpp"
 #include "duckdb/parser/result_modifier.hpp"
-#include "duckdb/parser/expression/constant_expression.hpp"
 #include "duckdb/planner/parsed_data/bound_create_table_info.hpp"
 #include "functions/ducklake_table_functions.hpp"
 #include "storage/ducklake_catalog.hpp"
@@ -64,27 +63,6 @@ bool DuckLakeSchemaEntry::HandleCreateConflict(CatalogTransaction transaction, C
 	return true;
 }
 
-static vector<DuckLakeTag> ValidateCreateTableOptions(ClientContext &context, BoundCreateTableInfo &info) {
-	vector<DuckLakeTag> result;
-	auto &base_info = info.Base();
-	if (base_info.options.empty()) {
-		return result;
-	}
-	for (auto &option : base_info.options) {
-		if (!option.second) {
-			throw BinderException("WITH option \"%s\" requires a value", option.first);
-		}
-		if (option.second->GetExpressionClass() != ExpressionClass::CONSTANT) {
-			throw BinderException(
-			    "WITH option \"%s\" must be a constant expression in DuckLake CREATE TABLE / CTAS", option.first);
-		}
-		auto &constant_expr = option.second->Cast<ConstantExpression>();
-		auto tag = ValidateDuckLakeConfigOption(context, option.first, constant_expr.value);
-		result.push_back(std::move(tag));
-	}
-	return result;
-}
-
 optional_ptr<CatalogEntry>
 DuckLakeSchemaEntry::CreateTableExtended(CatalogTransaction transaction, BoundCreateTableInfo &info, string table_uuid,
                                          string table_data_path, unique_ptr<DuckLakePartition> prebuilt_partition_data,
@@ -102,7 +80,7 @@ DuckLakeSchemaEntry::CreateTableExtended(CatalogTransaction transaction, BoundCr
 		}
 	}
 	// validate WITH (...) options before any catalog mutation
-	auto validated_options = ValidateCreateTableOptions(transaction.GetContext(), info);
+	auto options_in_create_with = ValidateOptionsInCreateWith(transaction.GetContext(), base_info.options);
 	//! get a local table-id
 	auto table_id = TableIndex(duck_transaction.GetLocalCatalogId());
 	// generate field ids based on the column ids
@@ -140,12 +118,12 @@ DuckLakeSchemaEntry::CreateTableExtended(CatalogTransaction transaction, BoundCr
 		    DuckLakeTableEntry::BuildSortData(duck_transaction, table_entry->GetColumns(), orders));
 	}
 	// stash on the entry; same-transaction writers pick these up via DuckLakeCopyInput, and at commit
-	// time WriteNewTableConfigOptions persists them under the freshly-allocated committed table_id.
+	// time WriteOptionsInCreateWith persists them under the freshly-allocated committed table_id.
 	// Routing through the entry (instead of mutating catalog options under the transaction-local id)
 	// keeps the catalog free of dead entries on rollback, matching how new_tables / local_changes
 	// already scope transaction state.
-	if (!validated_options.empty()) {
-		table_entry->SetPendingTableOptions(validated_options);
+	if (!options_in_create_with.empty()) {
+		table_entry->SetOptionsInCreateWith(options_in_create_with);
 	}
 	auto result = table_entry.get();
 	duck_transaction.CreateEntry(std::move(table_entry));
diff --git a/src/storage/ducklake_table_entry.cpp b/src/storage/ducklake_table_entry.cpp
index c54790c2d6f..e45c6852806 100644
--- a/src/storage/ducklake_table_entry.cpp
+++ b/src/storage/ducklake_table_entry.cpp
@@ -75,7 +75,9 @@ DuckLakeTableEntry::DuckLakeTableEntry(DuckLakeTableEntry &parent, CreateTableIn
 	if (parent.sort_data) {
 		sort_data = make_uniq<DuckLakeSort>(*parent.sort_data);
 	}
-	pending_table_options = parent.pending_table_options;
+	if (!parent.options_in_create_with.empty()) {
+		options_in_create_with = parent.options_in_create_with;
+	}
 	CheckSupportedTypes();
 	if (local_change.type == LocalChangeType::ADD_COLUMN) {
 		LogicalIndex new_col_idx(columns.LogicalColumnCount() - 1);
@@ -100,7 +102,9 @@ DuckLakeTableEntry::DuckLakeTableEntry(DuckLakeTableEntry &parent, CreateTableIn
 	if (parent.sort_data) {
 		sort_data = make_uniq<DuckLakeSort>(*parent.sort_data);
 	}
-	pending_table_options = parent.pending_table_options;
+	if (!parent.options_in_create_with.empty()) {
+		options_in_create_with = parent.options_in_create_with;
+	}
 	CheckSupportedTypes();
 
 	auto changed_id = local_change.field_index;
diff --git a/src/storage/ducklake_transaction.cpp b/src/storage/ducklake_transaction.cpp
index f043dff2b48..1d1ec739ffb 100644
--- a/src/storage/ducklake_transaction.cpp
+++ b/src/storage/ducklake_transaction.cpp
@@ -1522,7 +1522,7 @@ struct NewTableInfo {
 	vector<DuckLakeNewColumn> new_columns;
 	vector<DuckLakeTableInfo> new_inlined_data_tables;
 	vector<DuckLakeSortInfo> new_sort_keys;
-	vector<DuckLakeConfigOption> new_table_options;
+	vector<DuckLakeConfigOption> options_in_create_with;
 };
 
 struct NewMacroInfo {
@@ -1777,12 +1777,12 @@ void DuckLakeTransaction::GetNewTableInfo(DuckLakeCommitState &commit_state, Duc
 			// the transaction-local set on rename, so a CREATE+RENAME chain arrives here as a single
 			// RENAMED entry with a still-transaction-local id — emit whenever old_table_id is local.
 			if (old_table_id.IsTransactionLocal()) {
-				auto &pending_options = table.GetPendingTableOptions();
-				for (auto &tag : pending_options) {
+				auto &options_in_create_with = table.GetOptionsInCreateWith();
+				for (auto &tag : options_in_create_with) {
 					DuckLakeConfigOption opt;
 					opt.option = tag;
 					opt.table_id = new_table_id;
-					result.new_table_options.push_back(std::move(opt));
+					result.options_in_create_with.push_back(std::move(opt));
 				}
 			}
 			break;
@@ -2396,9 +2396,9 @@ string DuckLakeTransaction::CommitChanges(DuckLakeCommitState &commit_state,
 		batch_queries += metadata_manager->WriteNewColumns(result.new_columns);
 		batch_queries += metadata_manager->WriteNewInlinedTables(commit_snapshot, result.new_inlined_data_tables);
 		batch_queries += metadata_manager->WriteNewSortKeys(commit_snapshot, result.new_sort_keys);
-		batch_queries += metadata_manager->WriteNewTableConfigOptions(result.new_table_options);
+		batch_queries += metadata_manager->WriteOptionsInCreateWith(result.options_in_create_with);
 		// re-key in-memory options from the local id to the committed id
-		for (auto &opt : result.new_table_options) {
+		for (auto &opt : result.options_in_create_with) {
 			ducklake_catalog.SetConfigOption(opt);
 		}
 		new_tables_result = result.new_tables;
diff --git a/test/sql/settings/create_table_with_options.test b/test/sql/settings/create_table_with_options.test
index f666f195d1e..2f7c05848b4 100644
--- a/test/sql/settings/create_table_with_options.test
+++ b/test/sql/settings/create_table_with_options.test
@@ -137,8 +137,34 @@ SELECT option_name, value FROM ducklake.options() WHERE scope='TABLE' AND scope_
 ----
 parquet_compression	gzip
 
-# non-constant value is rejected at plan time
+# foldable expressions (function calls) are accepted and folded to a literal
+statement ok
+CREATE TABLE ducklake.t_fold(i INTEGER) WITH (parquet_compression=upper('zstd'))
+
+query II
+SELECT option_name, value FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_fold'
+----
+parquet_compression	zstd
+
+# session variables are read at bind time via getvariable(...) and feed the WITH clause
+statement ok
+SET VARIABLE an_option = (SELECT 'zstd')
+
+statement ok
+CREATE OR REPLACE TABLE ducklake.t_var WITH (parquet_compression=getvariable('an_option')) AS FROM range(42)
+
+query II
+SELECT option_name, value FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_var'
+----
+parquet_compression	zstd
+
+query I
+SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_var/**')
+----
+ZSTD
+
+# expressions that cannot fold to a literal (e.g. a column reference with no FROM) are rejected
 statement error
-CREATE TABLE ducklake.t_nonconst(i INTEGER) WITH (parquet_compression=upper('zstd'))
+CREATE TABLE ducklake.t_bad_col(i INTEGER) WITH (parquet_compression=some_unknown_column)
 ----
-must be a constant expression
+some_unknown_column
diff --git a/test/sql/settings/create_table_with_options_transaction.test b/test/sql/settings/create_table_with_options_transaction.test
index 15eb479c475..9bd9fc24e98 100644
--- a/test/sql/settings/create_table_with_options_transaction.test
+++ b/test/sql/settings/create_table_with_options_transaction.test
@@ -75,7 +75,7 @@ ZSTD
 
 # CREATE TABLE WITH (...) + same-transaction INSERT must apply the override even though the table
 # is still transaction-local (committed table_id has not been allocated yet). DuckLakeCopyInput pulls
-# the override from DuckLakeTableEntry::pending_table_options.
+# the override from DuckLakeTableEntry::options_in_create_with.
 statement ok
 BEGIN
 

From 3b4fe7f46f6a8728d47bd76a3e85fe0e8a67db52 Mon Sep 17 00:00:00 2001
From: Alex-Monahan <alexm91@vt.edu>
Date: Mon, 4 May 2026 17:50:10 -0700
Subject: [PATCH 05/12] formatting fixes

---
 src/functions/ducklake_set_option.cpp              | 7 +++----
 src/include/functions/ducklake_table_functions.hpp | 4 ++--
 src/storage/ducklake_insert.cpp                    | 3 +--
 src/storage/ducklake_transaction.cpp               | 4 ++--
 4 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/functions/ducklake_set_option.cpp b/src/functions/ducklake_set_option.cpp
index db3a5fd6d57..eda3cd0eccd 100644
--- a/src/functions/ducklake_set_option.cpp
+++ b/src/functions/ducklake_set_option.cpp
@@ -11,8 +11,8 @@
 
 namespace duckdb {
 
-vector<DuckLakeTag> ValidateOptionsInCreateWith(
-    ClientContext &context, const case_insensitive_map_t<unique_ptr<ParsedExpression>> &options) {
+vector<DuckLakeTag> ValidateOptionsInCreateWith(ClientContext &context,
+                                                const case_insensitive_map_t<unique_ptr<ParsedExpression>> &options) {
 	// Bind via ConstantBinder so each value expression must fold to a literal (no column refs, no
 	// subqueries) — mirrors upstream ATTACH (bind_attach.cpp) and CREATE SECRET (bind_create.cpp).
 	// `allow_unfoldable=true` on EvaluateScalar lets volatile functions like random() through; that's
@@ -136,8 +136,7 @@ static unique_ptr<FunctionData> DuckLakeSetOptionBind(ClientContext &context, Ta
                                                       vector<LogicalType> &return_types, vector<string> &names) {
 	auto &catalog = DuckLakeBaseMetadataFunction::GetCatalog(context, input.inputs[0]);
 	DuckLakeConfigOption config_option;
-	config_option.option =
-	    ValidateDuckLakeConfigOption(context, StringValue::Get(input.inputs[1]), input.inputs[2]);
+	config_option.option = ValidateDuckLakeConfigOption(context, StringValue::Get(input.inputs[1]), input.inputs[2]);
 	auto &option = config_option.option.key;
 
 	// read the scope
diff --git a/src/include/functions/ducklake_table_functions.hpp b/src/include/functions/ducklake_table_functions.hpp
index 5aeb385ca5b..639136061e9 100644
--- a/src/include/functions/ducklake_table_functions.hpp
+++ b/src/include/functions/ducklake_table_functions.hpp
@@ -28,8 +28,8 @@ DuckLakeTag ValidateDuckLakeConfigOption(ClientContext &context, const string &o
 //! Empty input → empty output. Throws BinderException on any unbindable expression or unknown key.
 //! The input map is left intact — copies each expression before binding because CTAS calls this
 //! both at plan time (PlanCreateTableAs) and again during sink-state-init (CreateTableExtended).
-vector<DuckLakeTag> ValidateOptionsInCreateWith(
-    ClientContext &context, const case_insensitive_map_t<unique_ptr<ParsedExpression>> &options);
+vector<DuckLakeTag> ValidateOptionsInCreateWith(ClientContext &context,
+                                                const case_insensitive_map_t<unique_ptr<ParsedExpression>> &options);
 
 class DuckLakeTableFunctionUtil {
 public:
diff --git a/src/storage/ducklake_insert.cpp b/src/storage/ducklake_insert.cpp
index 5e5da2f8c46..e260b37e403 100644
--- a/src/storage/ducklake_insert.cpp
+++ b/src/storage/ducklake_insert.cpp
@@ -553,8 +553,7 @@ DuckLakeCopyOptions DuckLakeInsert::GetCopyOptions(ClientContext &context, DuckL
 	if (copy_input.GetEffectiveOption("target_file_size", target_file_size_str)) {
 		target_file_size = Value(target_file_size_str).GetValue<idx_t>();
 	} else {
-		target_file_size =
-		    catalog.GetConfigOption<idx_t>("target_file_size", schema_id, table_id, target_file_size);
+		target_file_size = catalog.GetConfigOption<idx_t>("target_file_size", schema_id, table_id, target_file_size);
 	}
 
 	// Always use native parquet geometry for writing
diff --git a/src/storage/ducklake_transaction.cpp b/src/storage/ducklake_transaction.cpp
index 1d1ec739ffb..35f929d4b3f 100644
--- a/src/storage/ducklake_transaction.cpp
+++ b/src/storage/ducklake_transaction.cpp
@@ -736,8 +736,8 @@ Connection &DuckLakeTransaction::GetConnection() {
 
 bool DuckLakeTransaction::SchemaChangesMade() const {
 	return !new_tables.empty() || !dropped_tables.empty() || new_schemas || !dropped_schemas.empty() ||
-	       !dropped_views.empty() || !renamed_views.empty() || !new_macros.empty() ||
-	       !dropped_scalar_macros.empty() || !dropped_table_macros.empty();
+	       !dropped_views.empty() || !renamed_views.empty() || !new_macros.empty() || !dropped_scalar_macros.empty() ||
+	       !dropped_table_macros.empty();
 }
 
 bool DuckLakeTransaction::ChangesMade() const {

From 624417e99a9c24600728cf226c2804c0d8aee3b7 Mon Sep 17 00:00:00 2001
From: Alex-Monahan <alex@motherduck.com>
Date: Tue, 5 May 2026 13:18:02 -0700
Subject: [PATCH 06/12] Add combined PARTITIONED BY + SORTED BY + WITH coverage
 to create_table_with_options.test
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two new test cases — one plain CREATE TABLE, one CTAS — exercise all three
clauses in the same statement. Verifies:

- Parser accepts the three-clause combination (no syntax conflicts).
- WITH options (parquet_compression, parquet_row_group_size) persist to
  ducklake_metadata under the new table_id.
- PARTITIONED BY produces hive 'i=N/' directories in the data path; for
  20 distinct values we see 20 partition directories.
- WITH compression overrides the default (gzip on the plain path, zstd on
  the CTAS path) and is observed in parquet_metadata().

SORT correctness is exercised here only by not-erroring; full SORT
verification lives in test/sql/sorted_table/* and
test/sql/create_table_inline_partition_sort/*_sort_verification.test.
---
 .../settings/create_table_with_options.test   | 61 +++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/test/sql/settings/create_table_with_options.test b/test/sql/settings/create_table_with_options.test
index 2f7c05848b4..94bb532a0bb 100644
--- a/test/sql/settings/create_table_with_options.test
+++ b/test/sql/settings/create_table_with_options.test
@@ -168,3 +168,64 @@ statement error
 CREATE TABLE ducklake.t_bad_col(i INTEGER) WITH (parquet_compression=some_unknown_column)
 ----
 some_unknown_column
+
+# All three clauses together: PARTITIONED BY + SORTED BY + WITH on plain CREATE TABLE.
+# Verifies the parser accepts the combination, the WITH options are persisted, and a follow-up
+# INSERT uses all three (compression from WITH, hive layout from PARTITIONED BY).
+statement ok
+CREATE TABLE ducklake.t_combo(i INTEGER, s VARCHAR)
+PARTITIONED BY (i)
+SORTED BY (s)
+WITH (parquet_compression='gzip', parquet_row_group_size=2048)
+
+query III rowsort
+SELECT option_name, value, scope FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_combo' ORDER BY option_name
+----
+parquet_compression	gzip	TABLE
+parquet_row_group_size	2048	TABLE
+
+statement ok
+INSERT INTO ducklake.t_combo SELECT i, 'row' || i FROM range(20) t(i)
+
+# follow-up INSERT honors WITH compression
+query I
+SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo/**')
+----
+GZIP
+
+# PARTITIONED BY (i) produces hive-style i=N/ directories, one per distinct i (20 here)
+query I
+SELECT COUNT(DISTINCT regexp_extract(file_name, 'i=(\d+)', 1)) FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo/**')
+----
+20
+
+# SORTED BY data ordering is exercised here only via not-erroring; full SORT correctness lives in
+# test/sql/sorted_table/* and test/sql/create_table_inline_partition_sort/*_sort_verification.test.
+
+# All three clauses together on a CTAS — verifies plan-time PARTITIONED BY layout, SORTED BY data
+# ordering, and WITH compression all coexist on a CREATE TABLE AS SELECT.
+statement ok
+CREATE TABLE ducklake.t_combo_ctas
+PARTITIONED BY (i)
+SORTED BY (s)
+WITH (parquet_compression='zstd', parquet_row_group_size=2048)
+AS SELECT range AS i, 'row' || range AS s FROM range(20)
+
+query III rowsort
+SELECT option_name, value, scope FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_combo_ctas' ORDER BY option_name
+----
+parquet_compression	zstd	TABLE
+parquet_row_group_size	2048	TABLE
+
+# CTAS-written parquet files use the WITH compression
+query I
+SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo_ctas/**')
+----
+ZSTD
+
+# rows landed in their hive partitions (one parquet directory per i value)
+query I
+SELECT COUNT(DISTINCT regexp_extract(file_name, 'i=(\d+)', 1)) FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo_ctas/**')
+----
+20
+

From 63b467869d72e157a3c9262e4296cf15e879ab10 Mon Sep 17 00:00:00 2001
From: Alex-Monahan <alex@motherduck.com>
Date: Tue, 5 May 2026 13:29:31 -0700
Subject: [PATCH 07/12] Validate SORTED BY in the combined PARTITIONED BY +
 SORTED BY + WITH tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both the plain-INSERT and CTAS combined-clause tests now hardcode per-rowgroup
stats_min/max for the sort key, mirroring the technique in
create_table_inline_ctas_sort_verification.test.

Layout: 6144 rows, p = i // 3072 (two partitions), b = 6143 - i (input is b
DESCENDING). With parquet_row_group_size=2048 from WITH and
preserve_insertion_order=true, each partition file has exactly 2 rowgroups.
After SORTED BY (b), per-rowgroup b stats are disjoint and ascending:

  Partition p=0 (b ∈ [3072, 6143]):
    rowgroup 0: b ∈ [3072, 5119]
    rowgroup 1: b ∈ [5120, 6143]
  Partition p=1 (b ∈ [0, 3071]):
    rowgroup 0: b ∈ [0, 2047]
    rowgroup 1: b ∈ [2048, 3071]

Without sort, rowgroup 0 of each file would carry the full input range of b
(descending), so the disjoint-ascending shape is a tight regression test for
'sort actually reordered rows on disk' on both code paths.
---
 .../settings/create_table_with_options.test   | 88 +++++++++++++++----
 1 file changed, 69 insertions(+), 19 deletions(-)

diff --git a/test/sql/settings/create_table_with_options.test b/test/sql/settings/create_table_with_options.test
index 94bb532a0bb..4adf367c99b 100644
--- a/test/sql/settings/create_table_with_options.test
+++ b/test/sql/settings/create_table_with_options.test
@@ -169,13 +169,26 @@ CREATE TABLE ducklake.t_bad_col(i INTEGER) WITH (parquet_compression=some_unknow
 ----
 some_unknown_column
 
+# preserve_insertion_order=true keeps rowgroup boundaries deterministic so we can hardcode
+# per-rowgroup stats_min/max below to verify the sort actually reordered rows on disk.
+statement ok
+SET preserve_insertion_order=true
+
 # All three clauses together: PARTITIONED BY + SORTED BY + WITH on plain CREATE TABLE.
-# Verifies the parser accepts the combination, the WITH options are persisted, and a follow-up
-# INSERT uses all three (compression from WITH, hive layout from PARTITIONED BY).
+# Verifies the parser accepts the combination, the WITH options are persisted, the partition layout
+# matches PARTITIONED BY, the WITH compression takes effect, and SORTED BY actually reorders rows
+# (per-rowgroup b stats are disjoint and ascending after sort).
+#
+# Layout (6144 rows, parquet_row_group_size=2048, single thread, preserve_insertion_order=true):
+#   PARTITIONED BY (p):  p = i // 3072 ∈ {0, 1}  → 2 hive directories (p=0/, p=1/)
+#   SORTED BY (b):       b = 6143 - i            → input is b DESCENDING; after sort, b ASCENDING
+#   Rowgroups per partition: 3072 rows ÷ 2048 = 2 rowgroups (sizes 2048, 1024)
+#   Partition p=0: b ∈ [3072, 6143]   →  rg 0: b ∈ [3072,5119]   rg 1: b ∈ [5120,6143]
+#   Partition p=1: b ∈ [0, 3071]      →  rg 0: b ∈ [0,2047]      rg 1: b ∈ [2048,3071]
 statement ok
-CREATE TABLE ducklake.t_combo(i INTEGER, s VARCHAR)
-PARTITIONED BY (i)
-SORTED BY (s)
+CREATE TABLE ducklake.t_combo(p INTEGER, b INTEGER)
+PARTITIONED BY (p)
+SORTED BY (b)
 WITH (parquet_compression='gzip', parquet_row_group_size=2048)
 
 query III rowsort
@@ -185,31 +198,49 @@ parquet_compression	gzip	TABLE
 parquet_row_group_size	2048	TABLE
 
 statement ok
-INSERT INTO ducklake.t_combo SELECT i, 'row' || i FROM range(20) t(i)
+INSERT INTO ducklake.t_combo SELECT i // 3072 AS p, 6143 - i AS b FROM range(6144) t(i)
 
 # follow-up INSERT honors WITH compression
 query I
-SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo/**')
+SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo/**') WHERE path_in_schema = 'b'
 ----
 GZIP
 
-# PARTITIONED BY (i) produces hive-style i=N/ directories, one per distinct i (20 here)
+# PARTITIONED BY (p) produces 2 hive directories
 query I
-SELECT COUNT(DISTINCT regexp_extract(file_name, 'i=(\d+)', 1)) FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo/**')
+SELECT COUNT(DISTINCT regexp_extract(file_name, 'p=(\d+)', 1)) FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo/**')
 ----
-20
+2
 
-# SORTED BY data ordering is exercised here only via not-erroring; full SORT correctness lives in
-# test/sql/sorted_table/* and test/sql/create_table_inline_partition_sort/*_sort_verification.test.
+# parquet_row_group_size=2048 from WITH produces 2 rowgroups per partition file (4 total)
+query I
+SELECT COUNT(*) FROM (SELECT DISTINCT file_name, row_group_id FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo/**'))
+----
+4
+
+# SORTED BY (b) reordered the rows: per-rowgroup b stats are disjoint and ascending within each
+# partition file. (Input was b DESCENDING; without sort, rg 0's b would span the full input range.)
+query IIII
+SELECT regexp_extract(file_name, 'p=(\d+)', 1) AS partition,
+       row_group_id, stats_min::INTEGER AS rg_min, stats_max::INTEGER AS rg_max
+FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo/**')
+WHERE path_in_schema = 'b'
+ORDER BY partition, row_group_id
+----
+0	0	3072	5119
+0	1	5120	6143
+1	0	0	2047
+1	1	2048	3071
 
 # All three clauses together on a CTAS — verifies plan-time PARTITIONED BY layout, SORTED BY data
 # ordering, and WITH compression all coexist on a CREATE TABLE AS SELECT.
+# Same data layout as t_combo (6144 rows, p=i//3072, b=6143-i) but compression=zstd this time.
 statement ok
 CREATE TABLE ducklake.t_combo_ctas
-PARTITIONED BY (i)
-SORTED BY (s)
+PARTITIONED BY (p)
+SORTED BY (b)
 WITH (parquet_compression='zstd', parquet_row_group_size=2048)
-AS SELECT range AS i, 'row' || range AS s FROM range(20)
+AS SELECT i // 3072 AS p, 6143 - i AS b FROM range(6144) t(i)
 
 query III rowsort
 SELECT option_name, value, scope FROM ducklake.options() WHERE scope='TABLE' AND scope_entry='main.t_combo_ctas' ORDER BY option_name
@@ -219,13 +250,32 @@ parquet_row_group_size	2048	TABLE
 
 # CTAS-written parquet files use the WITH compression
 query I
-SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo_ctas/**')
+SELECT DISTINCT compression FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo_ctas/**') WHERE path_in_schema = 'b'
 ----
 ZSTD
 
-# rows landed in their hive partitions (one parquet directory per i value)
+# 2 hive partition directories
 query I
-SELECT COUNT(DISTINCT regexp_extract(file_name, 'i=(\d+)', 1)) FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo_ctas/**')
+SELECT COUNT(DISTINCT regexp_extract(file_name, 'p=(\d+)', 1)) FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo_ctas/**')
+----
+2
+
+# 2 rowgroups per partition file (4 total)
+query I
+SELECT COUNT(*) FROM (SELECT DISTINCT file_name, row_group_id FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo_ctas/**'))
+----
+4
+
+# SORTED BY (b) on the CTAS path: same per-rowgroup hardcoded stats as t_combo.
+query IIII
+SELECT regexp_extract(file_name, 'p=(\d+)', 1) AS partition,
+       row_group_id, stats_min::INTEGER AS rg_min, stats_max::INTEGER AS rg_max
+FROM parquet_metadata('__TEST_DIR__/ducklake_create_with_options_files/main/t_combo_ctas/**')
+WHERE path_in_schema = 'b'
+ORDER BY partition, row_group_id
 ----
-20
+0	0	3072	5119
+0	1	5120	6143
+1	0	0	2047
+1	1	2048	3071
 

From 962a8bb0d98917407e480256c1086e5ff2e03ffd Mon Sep 17 00:00:00 2001
From: Alex-Monahan <alexm91@vt.edu>
Date: Tue, 5 May 2026 13:32:49 -0700
Subject: [PATCH 08/12] Remove unnecessary comment

---
 test/sql/settings/create_table_with_options.test | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/test/sql/settings/create_table_with_options.test b/test/sql/settings/create_table_with_options.test
index 4adf367c99b..b71b67e3cf4 100644
--- a/test/sql/settings/create_table_with_options.test
+++ b/test/sql/settings/create_table_with_options.test
@@ -178,13 +178,6 @@ SET preserve_insertion_order=true
 # Verifies the parser accepts the combination, the WITH options are persisted, the partition layout
 # matches PARTITIONED BY, the WITH compression takes effect, and SORTED BY actually reorders rows
 # (per-rowgroup b stats are disjoint and ascending after sort).
-#
-# Layout (6144 rows, parquet_row_group_size=2048, single thread, preserve_insertion_order=true):
-#   PARTITIONED BY (p):  p = i // 3072 ∈ {0, 1}  → 2 hive directories (p=0/, p=1/)
-#   SORTED BY (b):       b = 6143 - i            → input is b DESCENDING; after sort, b ASCENDING
-#   Rowgroups per partition: 3072 rows ÷ 2048 = 2 rowgroups (sizes 2048, 1024)
-#   Partition p=0: b ∈ [3072, 6143]   →  rg 0: b ∈ [3072,5119]   rg 1: b ∈ [5120,6143]
-#   Partition p=1: b ∈ [0, 3071]      →  rg 0: b ∈ [0,2047]      rg 1: b ∈ [2048,3071]
 statement ok
 CREATE TABLE ducklake.t_combo(p INTEGER, b INTEGER)
 PARTITIONED BY (p)

From 3ac58257557be58a9d123f92d648eb2aaae62eed Mon Sep 17 00:00:00 2001
From: Alex-Monahan <alexm91@vt.edu>
Date: Thu, 4 Jun 2026 11:34:57 -0700
Subject: [PATCH 09/12] Fix bad conflict resolution from github.com web merge

The web-based merge of v1.5-variegata (2f205dbb) resolved the conflict
in ducklake_transaction.cpp by keeping a 295-line block (NewTableInfo,
CancelOrDropField, RenameEmittedEntry, HandleChangedFields,
GetNewTableInfo) that upstream had refactored into
ducklake_commit_state.hpp / ducklake_transaction_state.cpp, causing
duplicate definitions and breaking compilation.

Remove the obsolete block and port the one piece the upstream refactor
did not carry over: the CREATED-case handling that persists partition
and sort metadata for CREATE TABLE ... PARTITIONED BY / SORTED BY, now
in DuckLakeTransactionState::GetNewTableInfo.

Also includes make-format output limited to code added by this PR.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../ducklake_compaction_functions.cpp         |   3 +-
 src/storage/ducklake_transaction.cpp          | 295 ------------------
 src/storage/ducklake_transaction_state.cpp    |  12 +
 .../create_table_inline_transactions.test     |   3 +-
 .../create_table_inline_with_inlining.test    |   3 +-
 5 files changed, 17 insertions(+), 299 deletions(-)

diff --git a/src/functions/ducklake_compaction_functions.cpp b/src/functions/ducklake_compaction_functions.cpp
index dcf8ecf10c8..f79ae1ddb08 100644
--- a/src/functions/ducklake_compaction_functions.cpp
+++ b/src/functions/ducklake_compaction_functions.cpp
@@ -69,8 +69,7 @@ vector<BoundOrderByNode> DuckLakeCompactor::BindSortOrders(Binder &binder, const
 	return orders;
 }
 
-vector<BoundOrderByNode> DuckLakeCompactor::BindSortOrders(Binder &binder, DuckLakeTableEntry &table,
-                                                           idx_t table_index,
+vector<BoundOrderByNode> DuckLakeCompactor::BindSortOrders(Binder &binder, DuckLakeTableEntry &table, idx_t table_index,
                                                            vector<OrderByNode> &pre_bound_orders) {
 	return BindSortOrders(binder, table.GetColumns(), table.name, TableIndex(table_index), pre_bound_orders);
 }
diff --git a/src/storage/ducklake_transaction.cpp b/src/storage/ducklake_transaction.cpp
index b84febadf5f..a8aeb2f7e0a 100644
--- a/src/storage/ducklake_transaction.cpp
+++ b/src/storage/ducklake_transaction.cpp
@@ -1130,301 +1130,6 @@ DuckLakeTableInfo DuckLakeTransaction::GetNewTable(DuckLakeCommitState &commit_s
 	return table_entry;
 }
 
-struct NewTableInfo {
-	vector<DuckLakeTableInfo> new_tables;
-	vector<DuckLakeViewInfo> new_views;
-	vector<DuckLakePartitionInfo> new_partition_keys;
-	vector<DuckLakeTagInfo> new_tags;
-	vector<DuckLakeColumnTagInfo> new_column_tags;
-	vector<DuckLakeDroppedColumn> dropped_columns;
-	vector<DuckLakeNewColumn> new_columns;
-	vector<DuckLakeTableInfo> new_inlined_data_tables;
-	vector<DuckLakeSortInfo> new_sort_keys;
-};
-
-struct NewMacroInfo {
-	vector<DuckLakeMacroInfo> new_macros;
-};
-
-void CancelOrDropField(TableIndex table_id, FieldIndex field_id, NewTableInfo &result,
-                       unordered_map<idx_t, idx_t> &txn_added_fields) {
-	auto it = txn_added_fields.find(field_id.index);
-	if (it != txn_added_fields.end()) {
-		auto erased_idx = it->second;
-		result.new_columns.erase(result.new_columns.begin() + erased_idx);
-		txn_added_fields.erase(it);
-		for (auto &entry : txn_added_fields) {
-			if (entry.second > erased_idx) {
-				entry.second--;
-			}
-		}
-		return;
-	}
-	// Column was not added in the same transaction, we emit a drop.
-	DuckLakeDroppedColumn dropped_col;
-	dropped_col.table_id = table_id;
-	dropped_col.field_id = field_id;
-	result.dropped_columns.push_back(dropped_col);
-}
-
-template <typename T>
-void RenameEmittedEntry(vector<T> &entries, TableIndex remapped_id, const string &new_name) {
-	for (auto &entry : entries) {
-		if (entry.id == remapped_id) {
-			entry.name = new_name;
-			return;
-		}
-	}
-}
-
-void HandleChangedFields(TableIndex table_id, const ColumnChangeInfo &change_info, NewTableInfo &result,
-                         unordered_map<idx_t, idx_t> &txn_added_fields) {
-	for (auto &dropped_field_id : change_info.dropped_fields) {
-		CancelOrDropField(table_id, dropped_field_id, result, txn_added_fields);
-	}
-	for (auto &new_col_info : change_info.new_fields) {
-		DuckLakeNewColumn new_column;
-		new_column.table_id = table_id;
-		new_column.column_info = new_col_info.column_info;
-		new_column.parent_idx = new_col_info.parent_idx;
-		txn_added_fields[new_col_info.column_info.id.index] = result.new_columns.size();
-		result.new_columns.push_back(std::move(new_column));
-	}
-}
-
-void DuckLakeTransaction::GetNewTableInfo(DuckLakeCommitState &commit_state, DuckLakeCatalogSet &catalog_set,
-                                          reference<CatalogEntry> table_entry, NewTableInfo &result,
-                                          TransactionChangeInformation &transaction_changes) {
-	// iterate over the table chain in reverse order when committing
-	// the latest entry is the root entry - but we need to commit starting from the first entry written
-	// gather all tables
-	vector<reference<DuckLakeTableEntry>> tables;
-	while (true) {
-		tables.push_back(table_entry.get().Cast<DuckLakeTableEntry>());
-		if (!table_entry.get().HasChild()) {
-			break;
-		}
-		table_entry = table_entry.get().Child();
-	}
-
-	// Maps from field index to the number of alter operations for that field.
-	map<FieldIndex, idx_t> field_alter_count;
-	// Number of table comment operations.
-	idx_t comment_count = 0;
-	// Maps from field index to the number of column comment operations for that field.
-	map<FieldIndex, idx_t> column_comment_count;
-	for (idx_t table_idx = 0; table_idx < tables.size(); table_idx++) {
-		auto &table = tables[table_idx].get();
-		auto local_change = table.GetLocalChange();
-		switch (local_change.type) {
-		case LocalChangeType::SET_NULL:
-		case LocalChangeType::DROP_NULL:
-		case LocalChangeType::RENAME_COLUMN:
-		case LocalChangeType::SET_DEFAULT:
-			field_alter_count[local_change.field_index]++;
-			break;
-		case LocalChangeType::SET_COMMENT:
-			comment_count++;
-			break;
-		case LocalChangeType::SET_COLUMN_COMMENT:
-			column_comment_count[local_change.field_index]++;
-			break;
-		default:
-			break;
-		}
-	}
-
-	// Maps from field index to the number of alter operations remaining for that field.
-	map<FieldIndex, idx_t> field_alter_remaining(std::move(field_alter_count));
-	// Number of table comment operations remaining.
-	idx_t comment_remaining = comment_count;
-	// Maps from field index to the number of column comment operations remaining for that field.
-	map<FieldIndex, idx_t> column_comment_remaining(std::move(column_comment_count));
-
-	// Used to decide whether a column is a newly added column in this transaction.
-	// Maps from field_index.index to the index of the column in result.new_columns.
-	unordered_map<idx_t, idx_t> txn_added_fields;
-
-	// traverse in reverse order
-	bool column_schema_change = false;
-	for (idx_t table_idx = tables.size(); table_idx > 0; table_idx--) {
-		auto &table = tables[table_idx - 1].get();
-		auto local_change = table.GetLocalChange();
-		auto table_id = table.GetTableId();
-		switch (local_change.type) {
-		case LocalChangeType::SET_PARTITION_KEY: {
-			auto partition_key = GetNewPartitionKey(commit_state, table);
-			result.new_partition_keys.push_back(std::move(partition_key));
-
-			transaction_changes.altered_tables.insert(table_id);
-			transaction_changes.altered_tables_with_schema_version_changes.insert(table_id);
-			column_schema_change = true;
-			break;
-		}
-		case LocalChangeType::SET_SORT_KEY: {
-			auto sort_key = GetNewSortKey(commit_state, table);
-			result.new_sort_keys.push_back(std::move(sort_key));
-			transaction_changes.altered_tables.insert(table_id);
-			break;
-		}
-		case LocalChangeType::SET_COMMENT: {
-			comment_remaining--;
-			if (comment_remaining > 0) {
-				break;
-			}
-			DuckLakeTagInfo comment_info;
-			comment_info.id = commit_state.GetTableId(table).index;
-			comment_info.key = "comment";
-			comment_info.value = table.comment;
-			result.new_tags.push_back(std::move(comment_info));
-
-			transaction_changes.altered_tables.insert(table_id);
-			break;
-		}
-		case LocalChangeType::SET_COLUMN_COMMENT: {
-			auto &col_comment_rem = column_comment_remaining[local_change.field_index];
-			col_comment_rem--;
-			if (col_comment_rem > 0) {
-				break;
-			}
-			DuckLakeColumnTagInfo comment_info;
-			comment_info.table_id = commit_state.GetTableId(table);
-			comment_info.field_index = local_change.field_index;
-			comment_info.key = "comment";
-			comment_info.value = table.GetColumnByFieldId(local_change.field_index).Comment();
-			result.new_column_tags.push_back(std::move(comment_info));
-
-			transaction_changes.altered_tables.insert(table_id);
-			break;
-		}
-		case LocalChangeType::SET_NULL:
-		case LocalChangeType::DROP_NULL:
-		case LocalChangeType::RENAME_COLUMN:
-		case LocalChangeType::SET_DEFAULT: {
-			auto &remaining = field_alter_remaining[local_change.field_index];
-			remaining--;
-			// This is an older thus superseded entry for a field that is altered for multiple times in this
-			// transaction, so we skip it; the newest entry will emit the definitive value.
-			if (remaining > 0) {
-				break;
-			}
-
-			auto committed_table_id = commit_state.GetTableId(table);
-
-			// Cancel the previous txn-local add for this field, or drop the committed column.
-			CancelOrDropField(committed_table_id, local_change.field_index, result, txn_added_fields);
-
-			// Insert the new column with the updated info and register it.
-			DuckLakeNewColumn new_col;
-			new_col.table_id = committed_table_id;
-			new_col.column_info = table.GetColumnInfo(local_change.field_index);
-			txn_added_fields[local_change.field_index.index] = result.new_columns.size();
-			result.new_columns.push_back(std::move(new_col));
-
-			transaction_changes.altered_tables.insert(table_id);
-			transaction_changes.altered_tables_with_schema_version_changes.insert(table_id);
-			if (local_change.type == LocalChangeType::RENAME_COLUMN) {
-				column_schema_change = true;
-				// persist updated sort expressions (column name was updated in the table entry)
-				if (table.GetSortData()) {
-					auto sort_key = GetNewSortKey(commit_state, table);
-					result.new_sort_keys.push_back(std::move(sort_key));
-				}
-			}
-			break;
-		}
-		case LocalChangeType::REMOVE_COLUMN:
-		case LocalChangeType::CHANGE_COLUMN_TYPE: {
-			// drop the indicated column
-			// note that in case of nested types we might be dropping multiple columns here
-			HandleChangedFields(commit_state.GetTableId(table), table.GetChangedFields(), result, txn_added_fields);
-			transaction_changes.altered_tables_with_schema_version_changes.insert(table_id);
-			column_schema_change = true;
-			break;
-		}
-		case LocalChangeType::ADD_COLUMN: {
-			// insert the new column
-			DuckLakeNewColumn new_col;
-			new_col.table_id = commit_state.GetTableId(table);
-			new_col.column_info = table.GetAddColumnInfo();
-			txn_added_fields[new_col.column_info.id.index] = result.new_columns.size();
-			result.new_columns.push_back(std::move(new_col));
-
-			transaction_changes.altered_tables.insert(table.GetTableId());
-			transaction_changes.altered_tables_with_schema_version_changes.insert(table_id);
-			column_schema_change = true;
-			break;
-		}
-		case LocalChangeType::NONE:
-		case LocalChangeType::CREATED:
-		case LocalChangeType::RENAMED: {
-			auto old_table_id = table.GetTableId();
-			if (local_change.type == LocalChangeType::RENAMED && old_table_id.IsTransactionLocal()) {
-				auto existing = commit_state.committed_tables.find(old_table_id);
-				if (existing != commit_state.committed_tables.end()) {
-					// If we are rename a table in the same transaction it was created, we need to patch it
-					RenameEmittedEntry(result.new_tables, existing->second, table.name);
-					break;
-				}
-			}
-			auto new_table = GetNewTable(commit_state, table);
-			auto new_table_id = new_table.id;
-			result.new_tables.push_back(std::move(new_table));
-
-			// remap the table in the commit state
-			commit_state.committed_tables.emplace(old_table_id, new_table_id);
-
-			// create an inlined data table entry with the latest columns
-			// (uses tables.front() to get the most up-to-date schema including any ALTER TABLE changes)
-			auto &latest_table = tables.front().get();
-			DuckLakeTableInfo inlined_entry;
-			inlined_entry.id = new_table_id;
-			inlined_entry.schema_id = latest_table.ParentSchema().Cast<DuckLakeSchemaEntry>().GetSchemaId();
-			inlined_entry.uuid = latest_table.GetTableUUID();
-			inlined_entry.columns = latest_table.GetTableColumns();
-			result.new_inlined_data_tables.push_back(std::move(inlined_entry));
-
-			// CREATE TABLE ... PARTITIONED BY / SORTED BY
-			if (local_change.type == LocalChangeType::CREATED) {
-				if (table.GetPartitionData()) {
-					auto partition_key = GetNewPartitionKey(commit_state, table);
-					result.new_partition_keys.push_back(std::move(partition_key));
-				}
-				if (table.GetSortData()) {
-					auto sort_key = GetNewSortKey(commit_state, table);
-					result.new_sort_keys.push_back(std::move(sort_key));
-				}
-			}
-			break;
-		}
-		default:
-			throw NotImplementedException("Unsupported transaction local change");
-		}
-	}
-	if (column_schema_change) {
-		// we changed the column definitions of an existing table - we need to create a new inlined data table
-		// (if data inlining is enabled)
-		// for newly created tables this is already handled in the CREATED case above
-		auto &table = tables.front().get();
-		auto committed_id = commit_state.GetTableId(table);
-		bool already_added = false;
-		for (auto &entry : result.new_inlined_data_tables) {
-			if (entry.id == committed_id) {
-				already_added = true;
-				break;
-			}
-		}
-		if (!already_added) {
-			DuckLakeTableInfo table_entry;
-			table_entry.id = committed_id;
-			table_entry.schema_id = table.ParentSchema().Cast<DuckLakeSchemaEntry>().GetSchemaId();
-			table_entry.uuid = table.GetTableUUID();
-			table_entry.columns = table.GetTableColumns();
-			result.new_inlined_data_tables.push_back(std::move(table_entry));
-		}
-	}
-}
-
 DuckLakeViewInfo DuckLakeTransaction::GetNewView(DuckLakeCommitState &commit_state, DuckLakeViewEntry &view) {
 	auto &schema = view.ParentSchema().Cast<DuckLakeSchemaEntry>();
 	DuckLakeViewInfo view_entry;
diff --git a/src/storage/ducklake_transaction_state.cpp b/src/storage/ducklake_transaction_state.cpp
index 2b708c3ad82..2cade30015d 100644
--- a/src/storage/ducklake_transaction_state.cpp
+++ b/src/storage/ducklake_transaction_state.cpp
@@ -1017,6 +1017,18 @@ void DuckLakeTransactionState::GetNewTableInfo(DuckLakeCommitState &commit_state
 			inlined_entry.uuid = latest_table.GetTableUUID();
 			inlined_entry.columns = latest_table.GetTableColumns();
 			result.new_inlined_data_tables.push_back(std::move(inlined_entry));
+
+			// CREATE TABLE ... PARTITIONED BY / SORTED BY
+			if (local_change.type == LocalChangeType::CREATED) {
+				if (table.GetPartitionData()) {
+					auto partition_key = DuckLakeTransaction::GetNewPartitionKey(commit_state, table);
+					result.new_partition_keys.push_back(std::move(partition_key));
+				}
+				if (table.GetSortData()) {
+					auto sort_key = DuckLakeTransaction::GetNewSortKey(commit_state, table);
+					result.new_sort_keys.push_back(std::move(sort_key));
+				}
+			}
 			break;
 		}
 		default:
diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test b/test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test
index d04656cfc00..06372b84d36 100644
--- a/test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test
+++ b/test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test
@@ -1,9 +1,10 @@
 # name: test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test
 # description: Transaction semantics for inline PARTITIONED BY / SORTED BY on CREATE TABLE / CTAS:
+# group: [create_table_inline_partition_sort]
+
 #              rollback discards everything (entry, files, transaction-local ids); concurrent CTAS with
 #              distinct names cannot conflict; concurrent CTAS with the same name produces a commit-time
 #              conflict; 
-# group: [create_table_inline_partition_sort]
 
 require ducklake
 
diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test b/test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test
index 7538a72c6da..cd5cb78026d 100644
--- a/test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test
+++ b/test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test
@@ -1,12 +1,13 @@
 # name: test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test
 # description: CTAS with inline PARTITIONED BY / SORTED BY interacting with the data-inlining row limit.
+# group: [create_table_inline_partition_sort]
+
 #              When a CTAS's row count is below DATA_INLINING_ROW_LIMIT, rows go to the per-table
 #              ducklake_inlined_data_<id>_<seq> catalog table instead of Parquet. Partition and sort
 #              metadata must still be recorded; the inline operator coordinates with the about-to-be-created
 #              table_id at sink-state-init time. Once a flush is called, inlined rows materialize as
 #              partition-aware Parquet files. (On-disk hive directory verification is intentionally omitted -
 #              that's covered by create_table_inline_basic.test / create_table_inline_ctas.test.)
-# group: [create_table_inline_partition_sort]
 
 require ducklake
 

From 19ded6a6cb08c711e4e36dd7f4d57fca025f8d0a Mon Sep 17 00:00:00 2001
From: Alex-Monahan <alexm91@vt.edu>
Date: Thu, 4 Jun 2026 14:27:59 -0700
Subject: [PATCH 10/12] Refactor inline partition/sort tests to avoid
 multi-table metadata scans

The Quack RPC catalog test runner does not support multiple streaming
scans in a single query. Resolve table_ids into SQL variables with
SET VARIABLE first, then reference them via getvariable() so each
metadata query scans only one metadata table.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../create_table_inline_basic.test            | 106 ++++++++++++------
 .../create_table_inline_ctas.test             |  67 +++++++----
 .../create_table_inline_errors.test           |  12 +-
 .../create_table_inline_transactions.test     |  69 +++++++++---
 .../create_table_inline_with_inlining.test    |  54 +++++----
 5 files changed, 213 insertions(+), 95 deletions(-)

diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_basic.test b/test/sql/create_table_inline_partition_sort/create_table_inline_basic.test
index 38b97dd29f4..d008d5e4548 100644
--- a/test/sql/create_table_inline_partition_sort/create_table_inline_basic.test
+++ b/test/sql/create_table_inline_partition_sort/create_table_inline_basic.test
@@ -30,26 +30,31 @@ SELECT i, v FROM t_part ORDER BY i, v
 1	c
 2	b
 
+# resolve the table_id in a separate statement - some metadata catalogs (e.g. Quack
+# ones) cannot scan two of their tables in a single query
+statement ok
+SET VARIABLE t_part_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_part')
+
 # verify partition metadata was persisted at CREATE time (no ALTER ran)
 query I
-SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_part')
+SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = getvariable('t_part_id')
 ----
 1
 
 query I
-SELECT count(*) FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_part')
+SELECT count(*) FROM ducklake_metadata.ducklake_partition_column WHERE table_id = getvariable('t_part_id')
 ----
 1
 
 # verify the on-disk Parquet files landed in hive-style partition-specific directories
 # (DATA_INLINING_ROW_LIMIT 0 forces INSERT to write Parquet directly; one file per partition value)
 query I
-SELECT count(*) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_part')
+SELECT count(*) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('t_part_id')
 ----
 2
 
 query I
-SELECT DISTINCT regexp_extract(path, '.*(i=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_part') ORDER BY 1
+SELECT DISTINCT regexp_extract(path, '.*(i=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('t_part_id') ORDER BY 1
 ----
 i=1
 i=2
@@ -71,20 +76,23 @@ SELECT i, v FROM t_sort
 2	z
 3	x
 
+statement ok
+SET VARIABLE t_sort_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_sort')
+
 query I
-SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_sort')
+SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = getvariable('t_sort_id')
 ----
 1
 
 query II
-SELECT sort_key_index, expression FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_sort') ORDER BY sort_key_index
+SELECT sort_key_index, expression FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = getvariable('t_sort_id') ORDER BY sort_key_index
 ----
 0	i
 
 # verify the inline path persisted the same defaults a bare ALTER TABLE ... SET SORTED BY (col) would
 # (the metadata writer collapses OrderType::ASCENDING -> "ASC" and OrderByNullType::ORDER_DEFAULT -> "NULLS_LAST")
 query III
-SELECT sort_direction, null_order, dialect FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_sort')
+SELECT sort_direction, null_order, dialect FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = getvariable('t_sort_id')
 ----
 ASC	NULLS_LAST	duckdb
 
@@ -104,9 +112,12 @@ SELECT a, b, v FROM t_both1 WHERE a = 1
 1	20	r
 1	30	p
 
+statement ok
+SET VARIABLE t_both1_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_both1')
+
 # on-disk: one Parquet file per distinct partition value (a=1 and a=2), each in its own a=N directory
 query I
-SELECT DISTINCT regexp_extract(path, '.*(a=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_both1') ORDER BY 1
+SELECT DISTINCT regexp_extract(path, '.*(a=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('t_both1_id') ORDER BY 1
 ----
 a=1
 a=2
@@ -115,13 +126,16 @@ a=2
 statement ok
 CREATE TABLE t_both2 (a INTEGER, b INTEGER) SORTED BY (b) PARTITIONED BY (a)
 
+statement ok
+SET VARIABLE t_both2_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_both2')
+
 query I
-SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_both2')
+SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = getvariable('t_both2_id')
 ----
 1
 
 query I
-SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_both2')
+SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = getvariable('t_both2_id')
 ----
 1
 
@@ -129,13 +143,16 @@ SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELE
 statement ok
 CREATE TABLE t_multi (a INTEGER, b INTEGER, c INTEGER) PARTITIONED BY (a, b) SORTED BY (c, a)
 
+statement ok
+SET VARIABLE t_multi_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_multi')
+
 query I
-SELECT count(*) FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_multi')
+SELECT count(*) FROM ducklake_metadata.ducklake_partition_column WHERE table_id = getvariable('t_multi_id')
 ----
 2
 
 query I
-SELECT count(*) FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_multi')
+SELECT count(*) FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = getvariable('t_multi_id')
 ----
 2
 
@@ -143,7 +160,7 @@ statement ok
 INSERT INTO t_multi VALUES (1, 1, 30), (1, 1, 10), (1, 2, 20), (2, 1, 5)
 
 # t_multi is PARTITIONED BY (a, b), so each (a, b) combination lives in its own file. SORTED BY
-# (c, a) means rows within a partition's file are (c, a)-ascending. 
+# (c, a) means rows within a partition's file are (c, a)-ascending.
 query III
 SELECT a, b, c FROM t_multi WHERE a = 1 AND b = 1
 ----
@@ -154,7 +171,7 @@ SELECT a, b, c FROM t_multi WHERE a = 1 AND b = 1
 query II
 SELECT regexp_extract(path, '.*(a=[0-9]+)[/\\].*', 1) AS a_dir, regexp_extract(path, '.*(b=[0-9]+)[/\\].*', 1) AS b_dir
 FROM ducklake_metadata.ducklake_data_file
-WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_multi')
+WHERE table_id = getvariable('t_multi_id')
 ORDER BY a_dir, b_dir
 ----
 a=1	b=1
@@ -164,7 +181,7 @@ a=2	b=1
 # the a directory must come *before* the b directory in the path (PARTITIONED BY (a, b) ordering)
 query I
 SELECT count(*) FROM ducklake_metadata.ducklake_data_file
-WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_multi')
+WHERE table_id = getvariable('t_multi_id')
   AND regexp_matches(path, 'a=[0-9]+/b=[0-9]+')
 ----
 3
@@ -173,8 +190,11 @@ WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE ta
 statement ok
 CREATE TABLE t_transform (ts TIMESTAMP, v INTEGER) PARTITIONED BY (year(ts))
 
+statement ok
+SET VARIABLE t_transform_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_transform')
+
 query II
-SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_transform')
+SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = getvariable('t_transform_id')
 ----
 0	year
 
@@ -183,7 +203,7 @@ INSERT INTO t_transform VALUES (TIMESTAMP '2023-06-01', 1), (TIMESTAMP '2023-09-
 
 # year() transform produces hive directories named with the partition key NAME (year, not ts)
 query I
-SELECT DISTINCT regexp_extract(path, '.*(year=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_transform') ORDER BY 1
+SELECT DISTINCT regexp_extract(path, '.*(year=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('t_transform_id') ORDER BY 1
 ----
 year=2023
 year=2024
@@ -192,13 +212,16 @@ year=2024
 statement ok
 CREATE TABLE t_only_part (a INTEGER) PARTITIONED BY (a)
 
+statement ok
+SET VARIABLE t_only_part_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_part')
+
 query I
-SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_part')
+SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = getvariable('t_only_part_id')
 ----
 1
 
 query I
-SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_part')
+SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = getvariable('t_only_part_id')
 ----
 0
 
@@ -206,7 +229,7 @@ statement ok
 INSERT INTO t_only_part VALUES (10), (20), (10)
 
 query I
-SELECT DISTINCT regexp_extract(path, '.*(a=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_part') ORDER BY 1
+SELECT DISTINCT regexp_extract(path, '.*(a=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('t_only_part_id') ORDER BY 1
 ----
 a=10
 a=20
@@ -215,13 +238,16 @@ a=20
 statement ok
 CREATE TABLE t_only_sort (a INTEGER) SORTED BY (a)
 
+statement ok
+SET VARIABLE t_only_sort_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_sort')
+
 query I
-SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_sort')
+SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = getvariable('t_only_sort_id')
 ----
 0
 
 query I
-SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_only_sort')
+SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = getvariable('t_only_sort_id')
 ----
 1
 
@@ -229,8 +255,11 @@ SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELE
 statement ok
 CREATE TABLE t_bucket (id BIGINT, v VARCHAR) PARTITIONED BY (bucket(8, id))
 
+statement ok
+SET VARIABLE t_bucket_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_bucket')
+
 query II
-SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_bucket')
+SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = getvariable('t_bucket_id')
 ----
 0	bucket(8)
 
@@ -240,13 +269,13 @@ INSERT INTO t_bucket VALUES (1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e'), (6
 # bucket() transform writes into hive directories named "bucket=N" (modulo bucket count)
 # we don't fix specific bucket assignments (hash-dependent) but we assert at least 1 and at most 8 distinct buckets
 query I
-SELECT count(DISTINCT regexp_extract(path, '.*(bucket=[0-9]+)[/\\].*', 1)) BETWEEN 1 AND 8 FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_bucket')
+SELECT count(DISTINCT regexp_extract(path, '.*(bucket=[0-9]+)[/\\].*', 1)) BETWEEN 1 AND 8 FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('t_bucket_id')
 ----
 true
 
 # every file must live in a bucket=N directory
 query I
-SELECT count(*) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_bucket') AND NOT regexp_matches(path, 'bucket=[0-9]+')
+SELECT count(*) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('t_bucket_id') AND NOT regexp_matches(path, 'bucket=[0-9]+')
 ----
 0
 
@@ -267,30 +296,35 @@ ALTER TABLE t_create_then_alter SET SORTED BY (b DESC)
 statement ok
 COMMIT
 
+statement ok
+SET VARIABLE t_create_then_alter_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter')
+
 # exactly one partition_info row (the ALTER's), not two
 query I
-SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter')
+SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = getvariable('t_create_then_alter_id')
 ----
 1
 
+# resolve the column_id of b separately so the check below only scans one metadata table
+statement ok
+SET VARIABLE t_create_then_alter_b_col = (SELECT column_id FROM ducklake_metadata.ducklake_column WHERE table_id = getvariable('t_create_then_alter_id') AND column_name = 'b' AND end_snapshot IS NULL)
+
 # the surviving partition row references column b (the ALTER's choice), not a (the inline create table's).
 query III
-SELECT pc.partition_key_index, c.column_name, pc.transform
-FROM ducklake_metadata.ducklake_partition_column pc
-JOIN ducklake_metadata.ducklake_column c
-  ON pc.table_id = c.table_id AND pc.column_id = c.column_id AND c.end_snapshot IS NULL
-WHERE pc.table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter')
+SELECT partition_key_index, column_id = getvariable('t_create_then_alter_b_col'), transform
+FROM ducklake_metadata.ducklake_partition_column
+WHERE table_id = getvariable('t_create_then_alter_id')
 ----
-0	b	identity
+0	true	identity
 
 # exactly one sort_info row, and its expression is b DESC (the ALTER's), not a
 query I
-SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter')
+SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = getvariable('t_create_then_alter_id')
 ----
 1
 
 query III
-SELECT expression, sort_direction, null_order FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter')
+SELECT expression, sort_direction, null_order FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = getvariable('t_create_then_alter_id')
 ----
 b	DESC	NULLS_LAST
 
@@ -299,13 +333,13 @@ statement ok
 INSERT INTO t_create_then_alter VALUES (1, 100), (2, 100), (3, 200)
 
 query I
-SELECT DISTINCT regexp_extract(path, '.*(b=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter') ORDER BY 1
+SELECT DISTINCT regexp_extract(path, '.*(b=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('t_create_then_alter_id') ORDER BY 1
 ----
 b=100
 b=200
 
 # none of the files should be under an a=N directory (regression guard for the dedup logic)
 query I
-SELECT count(*) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 't_create_then_alter') AND regexp_matches(path, 'a=[0-9]+/')
+SELECT count(*) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('t_create_then_alter_id') AND regexp_matches(path, 'a=[0-9]+/')
 ----
 0
diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_ctas.test b/test/sql/create_table_inline_partition_sort/create_table_inline_ctas.test
index d93155f7202..64519581ffe 100644
--- a/test/sql/create_table_inline_partition_sort/create_table_inline_ctas.test
+++ b/test/sql/create_table_inline_partition_sort/create_table_inline_ctas.test
@@ -29,14 +29,19 @@ SELECT i, v FROM ctas_part ORDER BY i
 3	3
 4	4
 
+# resolve the table_id in a separate statement - some metadata catalogs (e.g. Quack
+# ones) cannot scan two of their tables in a single query
+statement ok
+SET VARIABLE ctas_part_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_part')
+
 query I
-SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_part')
+SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = getvariable('ctas_part_id')
 ----
 1
 
 # CTAS must hive-partition on the initial write: 5 distinct values from range(5) -> 5 hive directories
 query I
-SELECT DISTINCT regexp_extract(path, '.*(i=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_part') ORDER BY 1
+SELECT DISTINCT regexp_extract(path, '.*(i=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('ctas_part_id') ORDER BY 1
 ----
 i=0
 i=1
@@ -64,8 +69,11 @@ SELECT i, j FROM ctas_sort
 1	8
 0	9
 
+statement ok
+SET VARIABLE ctas_sort_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_sort')
+
 query I
-SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_sort')
+SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = getvariable('ctas_sort_id')
 ----
 1
 
@@ -75,16 +83,22 @@ statement ok
 CREATE TABLE ctas_both PARTITIONED BY (g) SORTED BY (v) AS
 SELECT range % 3 AS g, 29 - range AS v FROM range(30)
 
-query II
-SELECT
-  (SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_both')),
-  (SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_both'))
+statement ok
+SET VARIABLE ctas_both_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_both')
+
+query I
+SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = getvariable('ctas_both_id')
 ----
-1	1
+1
+
+query I
+SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = getvariable('ctas_both_id')
+----
+1
 
 # 3 distinct partition values -> 3 hive directories
 query I
-SELECT count(DISTINCT regexp_extract(path, '.*(g=[0-9]+)[/\\].*', 1)) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_both')
+SELECT count(DISTINCT regexp_extract(path, '.*(g=[0-9]+)[/\\].*', 1)) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('ctas_both_id')
 ----
 3
 
@@ -136,15 +150,21 @@ statement ok
 CREATE TABLE ctas_swap SORTED BY (v) PARTITIONED BY (g) AS
 SELECT range % 3 AS g, 29 - range AS v FROM range(30)
 
-query II
-SELECT
-  (SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_swap')),
-  (SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_swap'))
+statement ok
+SET VARIABLE ctas_swap_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_swap')
+
+query I
+SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = getvariable('ctas_swap_id')
 ----
-1	1
+1
 
 query I
-SELECT count(DISTINCT regexp_extract(path, '.*(g=[0-9]+)[/\\].*', 1)) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_swap')
+SELECT count(*) FROM ducklake_metadata.ducklake_sort_info WHERE table_id = getvariable('ctas_swap_id')
+----
+1
+
+query I
+SELECT count(DISTINCT regexp_extract(path, '.*(g=[0-9]+)[/\\].*', 1)) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('ctas_swap_id')
 ----
 3
 
@@ -168,14 +188,17 @@ statement ok
 CREATE TABLE ctas_transform PARTITIONED BY (year(ts)) AS
 SELECT TIMESTAMP '2024-01-01' + INTERVAL (i) YEAR AS ts, i AS v FROM range(3) t(i)
 
+statement ok
+SET VARIABLE ctas_transform_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_transform')
+
 query II
-SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_transform')
+SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = getvariable('ctas_transform_id')
 ----
 0	year
 
 # CTAS with year() transform must hive-partition on the initial write
 query I
-SELECT DISTINCT regexp_extract(path, '.*(year=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_transform') ORDER BY 1
+SELECT DISTINCT regexp_extract(path, '.*(year=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('ctas_transform_id') ORDER BY 1
 ----
 year=2024
 year=2025
@@ -196,8 +219,11 @@ SELECT a, b FROM ctas_multi
 2	tag_2
 3	tag_3
 
+statement ok
+SET VARIABLE ctas_multi_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_multi')
+
 query II
-SELECT sort_key_index, expression FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_multi') ORDER BY sort_key_index
+SELECT sort_key_index, expression FROM ducklake_metadata.ducklake_sort_expression WHERE table_id = getvariable('ctas_multi_id') ORDER BY sort_key_index
 ----
 0	a
 1	b
@@ -239,9 +265,12 @@ SELECT g, v FROM ctas_cte WHERE g = 2
 2	5
 2	8
 
+statement ok
+SET VARIABLE ctas_cte_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_cte')
+
 # CTE-sourced CTAS must still partition on the initial write (regression: planner passes the spec through)
 query I
-SELECT DISTINCT regexp_extract(path, '.*(g=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ctas_cte') ORDER BY 1
+SELECT DISTINCT regexp_extract(path, '.*(g=[0-9]+)[/\\].*', 1) FROM ducklake_metadata.ducklake_data_file WHERE table_id = getvariable('ctas_cte_id') ORDER BY 1
 ----
 g=0
 g=1
diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_errors.test b/test/sql/create_table_inline_partition_sort/create_table_inline_errors.test
index 85343564a51..e9d2fa75d61 100644
--- a/test/sql/create_table_inline_partition_sort/create_table_inline_errors.test
+++ b/test/sql/create_table_inline_partition_sort/create_table_inline_errors.test
@@ -79,8 +79,13 @@ SELECT count(*) FROM ducklake_metadata.ducklake_sort_info
 statement ok
 CREATE TABLE ok_after_errors (a INTEGER) PARTITIONED BY (a)
 
+# resolve the table_id in a separate statement - some metadata catalogs (e.g. Quack
+# ones) cannot scan two of their tables in a single query
+statement ok
+SET VARIABLE ok_after_errors_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ok_after_errors')
+
 query I
-SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ok_after_errors')
+SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id = getvariable('ok_after_errors_id')
 ----
 1
 
@@ -89,8 +94,11 @@ SELECT count(*) FROM ducklake_metadata.ducklake_partition_info WHERE table_id =
 statement ok
 CREATE TABLE ok_diff_transforms (ts TIMESTAMP) PARTITIONED BY (year(ts), month(ts))
 
+statement ok
+SET VARIABLE ok_diff_transforms_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ok_diff_transforms')
+
 query II
-SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = (SELECT table_id FROM ducklake_metadata.ducklake_table WHERE table_name = 'ok_diff_transforms') ORDER BY partition_key_index
+SELECT partition_key_index, transform FROM ducklake_metadata.ducklake_partition_column WHERE table_id = getvariable('ok_diff_transforms_id') ORDER BY partition_key_index
 ----
 0	year
 1	month
diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test b/test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test
index 06372b84d36..f805cfcc661 100644
--- a/test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test
+++ b/test/sql/create_table_inline_partition_sort/create_table_inline_transactions.test
@@ -4,7 +4,7 @@
 
 #              rollback discards everything (entry, files, transaction-local ids); concurrent CTAS with
 #              distinct names cannot conflict; concurrent CTAS with the same name produces a commit-time
-#              conflict; 
+#              conflict;
 
 require ducklake
 
@@ -88,16 +88,21 @@ SELECT * FROM rollback_ctas
 ----
 <REGEX>:.*not exist.*
 
+# resolve the (now nonexistent -> NULL) table_id in a separate statement - some metadata catalogs
+# (e.g. Quack ones) cannot scan two of their tables in a single query
+statement ok
+SET VARIABLE rollback_ctas_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'rollback_ctas')
+
 # verify the metadata catalog never saw the rolled-back partition_info / sort_info rows
 query I
 SELECT count(*) FROM ducklake_meta.ducklake_partition_info
-WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'rollback_ctas')
+WHERE table_id = getvariable('rollback_ctas_id')
 ----
 0
 
 query I
 SELECT count(*) FROM ducklake_meta.ducklake_sort_info
-WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'rollback_ctas')
+WHERE table_id = getvariable('rollback_ctas_id')
 ----
 0
 
@@ -149,10 +154,13 @@ statement error con2
 COMMIT
 ----
 
+statement ok
+SET VARIABLE conflict_same_name_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'conflict_same_name')
+
 # con1's table is the surviving one - check its inline partition spec landed
 query I
 SELECT count(*) FROM ducklake_meta.ducklake_partition_info
-WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'conflict_same_name')
+WHERE table_id = getvariable('conflict_same_name_id')
 ----
 1
 
@@ -185,21 +193,30 @@ COMMIT
 statement ok con2
 COMMIT
 
+statement ok
+SET VARIABLE concurrent_a_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'concurrent_a')
+
+statement ok
+SET VARIABLE concurrent_b_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'concurrent_b')
+
 # both tables exist and have their own partition_info rows
-query II
-SELECT
-  (SELECT count(*) FROM ducklake_meta.ducklake_partition_info
-   WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'concurrent_a')),
-  (SELECT count(*) FROM ducklake_meta.ducklake_partition_info
-   WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'concurrent_b'))
+query I
+SELECT count(*) FROM ducklake_meta.ducklake_partition_info
+WHERE table_id = getvariable('concurrent_a_id')
 ----
-1	1
+1
+
+query I
+SELECT count(*) FROM ducklake_meta.ducklake_partition_info
+WHERE table_id = getvariable('concurrent_b_id')
+----
+1
 
 # the two tables have DISTINCT committed partition_ids (proves the commit-time remap deduplicates the
 # transaction-local ids that started identical at planning time on each connection).
 query I
 SELECT count(DISTINCT partition_id) FROM ducklake_meta.ducklake_partition_info
-WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name IN ('concurrent_a', 'concurrent_b'))
+WHERE table_id IN (getvariable('concurrent_a_id'), getvariable('concurrent_b_id'))
 ----
 2
 
@@ -235,23 +252,26 @@ ALTER TABLE chain_winner SET SORTED BY (b DESC)
 statement ok
 COMMIT
 
+statement ok
+SET VARIABLE chain_winner_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'chain_winner')
+
 # exactly one partition_info row (the last ALTER's) and one sort_info row (the last ALTER's)
 query I
 SELECT count(*) FROM ducklake_meta.ducklake_partition_info
-WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'chain_winner')
+WHERE table_id = getvariable('chain_winner_id')
 ----
 1
 
 query I
 SELECT count(*) FROM ducklake_meta.ducklake_sort_info
-WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'chain_winner')
+WHERE table_id = getvariable('chain_winner_id')
 ----
 1
 
 # the surviving sort row is `b DESC`, not the inline `a ASC`
 query III
 SELECT expression, sort_direction, null_order FROM ducklake_meta.ducklake_sort_expression
-WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'chain_winner')
+WHERE table_id = getvariable('chain_winner_id')
 ----
 b	DESC	NULLS_LAST
 
@@ -272,9 +292,12 @@ SELECT * FROM error_ctas
 ----
 <REGEX>:.*not exist.*
 
+statement ok
+SET VARIABLE error_ctas_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'error_ctas')
+
 query I
 SELECT count(*) FROM ducklake_meta.ducklake_partition_info
-WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'error_ctas')
+WHERE table_id = getvariable('error_ctas_id')
 ----
 0
 
@@ -376,10 +399,17 @@ SELECT * FROM rb_b
 ----
 <REGEX>:.*not exist.*
 
+# both lookups yield NULL (the tables were rolled back), so the IN below matches no rows
+statement ok
+SET VARIABLE rb_a_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'rb_a')
+
+statement ok
+SET VARIABLE rb_b_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'rb_b')
+
 # neither rolled-back table left a partition_info row
 query I
 SELECT count(*) FROM ducklake_meta.ducklake_partition_info
-WHERE table_id IN (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name IN ('rb_a','rb_b'))
+WHERE table_id IN (getvariable('rb_a_id'), getvariable('rb_b_id'))
 ----
 0
 
@@ -436,10 +466,13 @@ INSERT INTO same_tx_part VALUES (10, 110), (10, 111), (11, 112)
 statement ok
 COMMIT
 
+statement ok
+SET VARIABLE same_tx_part_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name='same_tx_part')
+
 # every data file references the partition_id - CTAS-written and INSERT-written alike
 query I
 SELECT count(*) FROM ducklake_meta.ducklake_data_file
-WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name='same_tx_part')
+WHERE table_id = getvariable('same_tx_part_id')
   AND partition_id IS NULL
 ----
 0
diff --git a/test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test b/test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test
index cd5cb78026d..bcd053dca22 100644
--- a/test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test
+++ b/test/sql/create_table_inline_partition_sort/create_table_inline_with_inlining.test
@@ -42,30 +42,35 @@ SELECT count(*) FROM small_part
 ----
 50
 
+# resolve the table_id in a separate statement - some metadata catalogs (e.g. Quack
+# ones) cannot scan two of their tables in a single query
+statement ok
+SET VARIABLE small_part_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part')
+
 # partition metadata recorded for the inlined CTAS
 query I
 SELECT count(*) FROM ducklake_meta.ducklake_partition_info
-WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part')
+WHERE table_id = getvariable('small_part_id')
 ----
 1
 
 query II
 SELECT partition_key_index, transform FROM ducklake_meta.ducklake_partition_column
-WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part')
+WHERE table_id = getvariable('small_part_id')
 ----
 0	identity
 
 # zero data_file rows: rows are sitting in the inlined-data catalog table, not on Parquet
 query I
 SELECT count(*) FROM ducklake_meta.ducklake_data_file
-WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part')
+WHERE table_id = getvariable('small_part_id')
 ----
 0
 
 # the per-table inlined-data table exists in the metadata catalog
 query I
 SELECT count(*) FROM ducklake_meta.ducklake_inlined_data_tables
-WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part')
+WHERE table_id = getvariable('small_part_id')
 ----
 1
 
@@ -83,29 +88,32 @@ SELECT count(*) FROM small_both
 ----
 20
 
+statement ok
+SET VARIABLE small_both_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_both')
+
 query I
 SELECT count(*) FROM ducklake_meta.ducklake_partition_info
-WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_both')
+WHERE table_id = getvariable('small_both_id')
 ----
 1
 
 query I
 SELECT count(*) FROM ducklake_meta.ducklake_sort_info
-WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_both')
+WHERE table_id = getvariable('small_both_id')
 ----
 1
 
 # the sort_expression row carries the planning-time-allocated direction/null_order
 query III
 SELECT expression, sort_direction, null_order FROM ducklake_meta.ducklake_sort_expression
-WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_both')
+WHERE table_id = getvariable('small_both_id')
 ----
 v	ASC	NULLS_LAST
 
 # fully inlined: zero Parquet files
 query I
 SELECT count(*) FROM ducklake_meta.ducklake_data_file
-WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_both')
+WHERE table_id = getvariable('small_both_id')
 ----
 0
 
@@ -122,13 +130,13 @@ CALL ducklake_flush_inlined_data('ducklake')
 # path hands the spec through correctly)
 query I
 SELECT count(*) > 0 FROM ducklake_meta.ducklake_data_file
-WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part')
+WHERE table_id = getvariable('small_part_id')
 ----
 true
 
 query I
 SELECT count(*) FROM ducklake_meta.ducklake_data_file
-WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'small_part')
+WHERE table_id = getvariable('small_part_id')
   AND partition_id IS NULL
 ----
 0
@@ -154,26 +162,32 @@ SELECT count(*) FROM overflow_part
 ----
 5000
 
+statement ok
+SET VARIABLE overflow_part_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part')
+
 # partition + sort metadata persist
-query II
-SELECT
-  (SELECT count(*) FROM ducklake_meta.ducklake_partition_info
-   WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part')),
-  (SELECT count(*) FROM ducklake_meta.ducklake_sort_info
-   WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part'))
+query I
+SELECT count(*) FROM ducklake_meta.ducklake_partition_info
+WHERE table_id = getvariable('overflow_part_id')
 ----
-1	1
+1
+
+query I
+SELECT count(*) FROM ducklake_meta.ducklake_sort_info
+WHERE table_id = getvariable('overflow_part_id')
+----
+1
 
 # overflow path: at least one Parquet file was written, and each has a non-null partition_id
 query I
 SELECT count(*) > 0 FROM ducklake_meta.ducklake_data_file
-WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part')
+WHERE table_id = getvariable('overflow_part_id')
 ----
 true
 
 query I
 SELECT count(*) FROM ducklake_meta.ducklake_data_file
-WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part')
+WHERE table_id = getvariable('overflow_part_id')
   AND partition_id IS NULL
 ----
 0
@@ -190,7 +204,7 @@ CALL ducklake_flush_inlined_data('ducklake')
 # overflow_part: zero rows in any inlined-data table after flush
 query I
 SELECT count(*) FROM ducklake_meta.ducklake_data_file
-WHERE table_id = (SELECT table_id FROM ducklake_meta.ducklake_table WHERE table_name = 'overflow_part')
+WHERE table_id = getvariable('overflow_part_id')
   AND partition_id IS NULL
 ----
 0

From 705169066397287272266ee1f27ceea57969d2f1 Mon Sep 17 00:00:00 2001
From: Alex-Monahan <alexm91@vt.edu>
Date: Fri, 5 Jun 2026 16:20:10 -0700
Subject: [PATCH 11/12] Fix target_file_size precedence after merge: session
 setting wins

The merge resolution in GetCopyOptions consulted the table-scoped option
(WITH (...) / metadata) before the ducklake_target_file_size session
setting, inverting the precedence pinned by
test/sql/insert/insert_session_target_file_size.test. Check the session
setting first; fall back to the table-scoped option, then
GetTargetFileSize (global option / default).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/storage/ducklake_insert.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/storage/ducklake_insert.cpp b/src/storage/ducklake_insert.cpp
index 525ba211949..0a496cecd03 100644
--- a/src/storage/ducklake_insert.cpp
+++ b/src/storage/ducklake_insert.cpp
@@ -550,9 +550,15 @@ DuckLakeCopyOptions DuckLakeInsert::GetCopyOptions(ClientContext &context, DuckL
 	}
 	string target_file_size_str;
 	idx_t target_file_size;
-	if (copy_input.GetEffectiveOption("target_file_size", target_file_size_str)) {
+	Value session_target_file_size;
+	bool has_session_target_file_size =
+	    context.TryGetCurrentSetting("ducklake_target_file_size", session_target_file_size) &&
+	    !session_target_file_size.IsNull() && !session_target_file_size.ToString().empty();
+	if (!has_session_target_file_size && copy_input.GetEffectiveOption("target_file_size", target_file_size_str)) {
+		// WITH (...) / metadata option - used unless the ducklake_target_file_size session setting overrides it
 		target_file_size = Value(target_file_size_str).GetValue<idx_t>();
 	} else {
+		// session setting, then global metadata option, then the default
 		target_file_size = catalog.GetTargetFileSize(context, schema_id, table_id);
 	}
 

From 39d70c7afb1d008f74e21f6503b21ddfd236eeba Mon Sep 17 00:00:00 2001
From: Alex-Monahan <alexm91@vt.edu>
Date: Fri, 5 Jun 2026 16:21:44 -0700
Subject: [PATCH 12/12] Format fix

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/storage/ducklake_table_entry.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/storage/ducklake_table_entry.cpp b/src/storage/ducklake_table_entry.cpp
index eff6b7a2f7f..faefb12c9d5 100644
--- a/src/storage/ducklake_table_entry.cpp
+++ b/src/storage/ducklake_table_entry.cpp
@@ -695,7 +695,8 @@ unique_ptr<CatalogEntry> DuckLakeTableEntry::AlterTable(ClientContext &context,
 	if (transaction.HasTransactionLocalInserts(GetTableId())) {
 		VerifyNoNullValues(context, transaction, *this, col);
 		table_info.constraints.push_back(make_uniq<NotNullConstraint>(col.Logical()));
-		auto new_entry = make_uniq<DuckLakeTableEntry>(*this, table_info, LocalChange::SetNull(field_id.GetFieldIndex()));
+		auto new_entry =
+		    make_uniq<DuckLakeTableEntry>(*this, table_info, LocalChange::SetNull(field_id.GetFieldIndex()));
 		return std::move(new_entry);
 	}