From 28b91501fc55fe5498b17e681a24d6dead5b8f6a Mon Sep 17 00:00:00 2001 From: Patrick Lucas Date: Thu, 11 Sep 2025 17:44:08 +0200 Subject: [PATCH 01/12] Fix expression filter pushdown Closes #464 --- src/iceberg_predicate.cpp | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/src/iceberg_predicate.cpp b/src/iceberg_predicate.cpp index db9031ff..bd5c3aad 100644 --- a/src/iceberg_predicate.cpp +++ b/src/iceberg_predicate.cpp @@ -1,4 +1,5 @@ #include "iceberg_predicate.hpp" +#include "duckdb/planner/expression/bound_operator_expression.hpp" #include "duckdb/planner/filter/constant_filter.hpp" #include "duckdb/planner/filter/conjunction_filter.hpp" #include "duckdb/planner/filter/null_filter.hpp" @@ -80,15 +81,41 @@ bool MatchBoundsTemplated(const TableFilter &filter, const IcebergPredicateStats return MatchBoundsIsNotNullFilter(stats, transform); } case TableFilterType::EXPRESSION_FILTER: { + //! Expressions can be arbitrarily complex, and we currently only support IS NULL/IS NOT NULL checks against the + //! column itself, i.e. where the expression is a BOUND_OPERATOR with type OPERATOR_IS_NULL/_IS_NOT_NULL with a + //! single child expression of type BOUND_REF. + //! + //! See duckdb/duckdb-iceberg#464 auto &expression_filter = filter.Cast(); auto &expr = *expression_filter.expr; + + if (expr.type != ExpressionType::OPERATOR_IS_NULL && expr.type != ExpressionType::OPERATOR_IS_NOT_NULL) { + return true; + } + + if (expr.expression_class != ExpressionClass::BOUND_OPERATOR) { + return true; + } + + auto &bound_operator_expr = expr.Cast(); + if (bound_operator_expr.children.size() != 1) { + return true; + } + + auto &child_expr = bound_operator_expr.children[0]; + if (child_expr->type != ExpressionType::BOUND_REF) { + return true; + } + if (expr.type == ExpressionType::OPERATOR_IS_NULL) { return MatchBoundsIsNullFilter(stats, transform); } + if (expr.type == ExpressionType::OPERATOR_IS_NOT_NULL) { return MatchBoundsIsNotNullFilter(stats, transform); } - //! Any other expression can not be filtered + + //! Should be unreachable return true; } default: From 98c7f4dc8e673be1ff7f12c39ddf8ec3f722ef26 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Thu, 11 Sep 2025 19:34:18 +0200 Subject: [PATCH 02/12] reduce calls to show all tables --- src/include/storage/irc_catalog.hpp | 11 ++++ src/storage/irc_table_set.cpp | 24 ++++++--- ..._duckdb_catalog_functions_and_iceberg.test | 11 ++-- test/sql/local/irc/test_show_all_tables.test | 54 +++++++++++++++++++ 4 files changed, 87 insertions(+), 13 deletions(-) create mode 100644 test/sql/local/irc/test_show_all_tables.test diff --git a/src/include/storage/irc_catalog.hpp b/src/include/storage/irc_catalog.hpp index d6f8b8be..83edf208 100644 --- a/src/include/storage/irc_catalog.hpp +++ b/src/include/storage/irc_catalog.hpp @@ -48,6 +48,17 @@ class IRCatalog : public Catalog { bool SetCachedValue(const string &url, const string &value, const rest_api_objects::LoadTableResult &result); static void SetAWSCatalogOptions(IcebergAttachOptions &attach_options, case_insensitive_set_t &set_by_attach_options); + //! Whether or not this catalog should search a specific type with the standard priority + CatalogLookupBehavior CatalogTypeLookupRule(CatalogType type) const override{ + switch (type) { + case CatalogType::TABLE_FUNCTION_ENTRY: + case CatalogType::SCALAR_FUNCTION_ENTRY: + case CatalogType::AGGREGATE_FUNCTION_ENTRY: + return CatalogLookupBehavior::NEVER_LOOKUP; + default: + return CatalogLookupBehavior::STANDARD; + } + } public: static unique_ptr Attach(optional_ptr storage_info, ClientContext &context, diff --git a/src/storage/irc_table_set.cpp b/src/storage/irc_table_set.cpp index f1a9c22c..31af7861 100644 --- a/src/storage/irc_table_set.cpp +++ b/src/storage/irc_table_set.cpp @@ -67,13 +67,25 @@ void ICTableSet::Scan(ClientContext &context, const std::function columns; + auto col = ColumnDefinition(string("__"), LogicalType::UNKNOWN); + columns.push_back(std::move(col)); + info.columns = ColumnList(std::move(columns)); + auto table_entry = make_uniq(table_info, catalog, schema, info); + + if (!table_entry->internal) { + table_entry->internal = schema.internal; } + auto result = table_entry.get(); + if (result->name.empty()) { + throw InternalException("ICTableSet::CreateEntry called with empty name"); + } + table_info.schema_versions.emplace(0, std::move(table_entry)); + + // this looks a little crazy. Maybe just crazy enough to work? + auto &optional = table_info.schema_versions[0].get()->Cast(); + callback(optional); } // erase not iceberg tables for (auto &entry : non_iceberg_tables) { diff --git a/test/sql/local/irc/test_duckdb_catalog_functions_and_iceberg.test b/test/sql/local/irc/test_duckdb_catalog_functions_and_iceberg.test index 2841b937..921d5e25 100644 --- a/test/sql/local/irc/test_duckdb_catalog_functions_and_iceberg.test +++ b/test/sql/local/irc/test_duckdb_catalog_functions_and_iceberg.test @@ -51,21 +51,18 @@ select count(*) from duckdb_logs_parsed('HTTP'); query I select count(*) from duckdb_logs_parsed('HTTP'); ---- -5 +3 statement ok use memory; -# 3 more requests are made, -# 2 from previous duckdb_logs_parsed call for 'main', 'default', -# and 1 for 'memory' -# requests no longer go up +# namespace 'memory' is looked up in the iceberg catalog query I select count(*) from duckdb_logs_parsed('HTTP'); ---- -8 +4 query I select count(*) from duckdb_logs_parsed('HTTP'); ---- -8 \ No newline at end of file +4 \ No newline at end of file diff --git a/test/sql/local/irc/test_show_all_tables.test b/test/sql/local/irc/test_show_all_tables.test new file mode 100644 index 00000000..d691ffb5 --- /dev/null +++ b/test/sql/local/irc/test_show_all_tables.test @@ -0,0 +1,54 @@ +# name: test/sql/local/irc/test_show_all_tables.test +# description: test integration with iceberg catalog read +# group: [irc] + +require-env ICEBERG_SERVER_AVAILABLE + +require avro + +require parquet + +require iceberg + +require httpfs + +# Do not ignore 'HTTP' error messages! +set ignore_error_messages + +statement ok +set enable_logging=true + +statement ok +set logging_level='debug' + +statement ok +CALL enable_logging('HTTP'); + +statement ok +pragma threads=1; + +statement ok +CREATE SECRET ( + TYPE S3, + KEY_ID 'admin', + SECRET 'password', + ENDPOINT '127.0.0.1:9000', + URL_STYLE 'path', + USE_SSL 0 +); + + +statement ok +ATTACH '' AS my_datalake ( + TYPE ICEBERG, + CLIENT_ID 'admin', + CLIENT_SECRET 'password', + ENDPOINT 'http://127.0.0.1:8181' +); + +statement ok +show all tables; + + +# statement ok +# show all tables; From b9a58bdefc3d220a3053b18cb80335317c694504 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Thu, 11 Sep 2025 19:36:33 +0200 Subject: [PATCH 03/12] ff --- src/include/storage/irc_catalog.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/storage/irc_catalog.hpp b/src/include/storage/irc_catalog.hpp index 83edf208..f84b0359 100644 --- a/src/include/storage/irc_catalog.hpp +++ b/src/include/storage/irc_catalog.hpp @@ -49,7 +49,7 @@ class IRCatalog : public Catalog { static void SetAWSCatalogOptions(IcebergAttachOptions &attach_options, case_insensitive_set_t &set_by_attach_options); //! Whether or not this catalog should search a specific type with the standard priority - CatalogLookupBehavior CatalogTypeLookupRule(CatalogType type) const override{ + CatalogLookupBehavior CatalogTypeLookupRule(CatalogType type) const override { switch (type) { case CatalogType::TABLE_FUNCTION_ENTRY: case CatalogType::SCALAR_FUNCTION_ENTRY: From d1656ed12c4cf17ec7147ef71ad41780c6d922d4 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Thu, 11 Sep 2025 23:07:59 +0200 Subject: [PATCH 04/12] fix test --- test/sql/local/irc/test_show_all_tables.test | 11 +++- test/sql/local/test_iceberg_and_ducklake.test | 57 +++++++++++++++++++ 2 files changed, 65 insertions(+), 3 deletions(-) create mode 100644 test/sql/local/test_iceberg_and_ducklake.test diff --git a/test/sql/local/irc/test_show_all_tables.test b/test/sql/local/irc/test_show_all_tables.test index d691ffb5..e55122e7 100644 --- a/test/sql/local/irc/test_show_all_tables.test +++ b/test/sql/local/irc/test_show_all_tables.test @@ -49,6 +49,11 @@ ATTACH '' AS my_datalake ( statement ok show all tables; - -# statement ok -# show all tables; +# 1 call for oath, 1 call for config +# 1 call to list namespaces +# 1 call to list tables in default +# 1 call to list tables in level1 namespace (no recursive namespace calls) +query I +select count(*) = 5 from duckdb_logs_parsed('HTTP'); +---- +1 diff --git a/test/sql/local/test_iceberg_and_ducklake.test b/test/sql/local/test_iceberg_and_ducklake.test new file mode 100644 index 00000000..53a6dc80 --- /dev/null +++ b/test/sql/local/test_iceberg_and_ducklake.test @@ -0,0 +1,57 @@ +# name: test/sql/local/test_iceberg_and_ducklake.test +# description: test integration with iceberg catalog read +# group: [local] + +require-env ICEBERG_SERVER_AVAILABLE + +require avro + +require parquet + +require iceberg + +require httpfs + +require ducklake + +# Do not ignore 'HTTP' error messages! +set ignore_error_messages + +statement ok +pragma threads=1; + +statement ok +CALL enable_logging('HTTP'); + +statement ok +set logging_level='debug'; + +statement ok +CREATE SECRET ( + TYPE S3, + KEY_ID 'admin', + SECRET 'password', + ENDPOINT '127.0.0.1:9000', + URL_STYLE 'path', + USE_SSL 0 +); + + +statement ok +ATTACH '' AS my_datalake ( + TYPE ICEBERG, + CLIENT_ID 'admin', + CLIENT_SECRET 'password', + ENDPOINT 'http://127.0.0.1:8181' +); + +statement ok +ATTACH 'ducklake:duckdb:__TEST_DIR__/ducklake.duckdb' as my_ducklake (DATA_PATH '__TEST_DIR__/data_path'); + +# select request.url, request.type, response.status from +# one request to get the auth token, one to the config endpoint on attach +duckdb_logs_parsed('HTTP'); +query I +select count(*) from duckdb_logs_parsed('HTTP'); +---- +2 \ No newline at end of file From 31a9980cf21c3bcf5e1f74e6d3e30ce4b9842ff9 Mon Sep 17 00:00:00 2001 From: Patrick Lucas Date: Fri, 12 Sep 2025 13:43:54 +0200 Subject: [PATCH 05/12] Add test for expression filter pushdown --- ...-c3e4-4e6d-a22b-d85e4a813169-00001.parquet | Bin 0 -> 705 bytes ...-3a9d-4b9b-ad87-daf78583a550.metadata.json | 1 + ...-f528-4429-84cc-377ffdd24c75.metadata.json | 1 + ...30f58e-7333-4451-983d-eaf657a21a11-m0.avro | Bin 0 -> 7026 bytes ...-8d30f58e-7333-4451-983d-eaf657a21a11.avro | Bin 0 -> 4445 bytes .../metadata/version-hint.text | 1 + .../iceberg_scans/expression_filter.test | 40 ++++++++++++++++++ 7 files changed, 43 insertions(+) create mode 100644 data/persistent/expression_filter/data/00000-0-1406cdaa-c3e4-4e6d-a22b-d85e4a813169-00001.parquet create mode 100644 data/persistent/expression_filter/metadata/00000-acdf842e-3a9d-4b9b-ad87-daf78583a550.metadata.json create mode 100644 data/persistent/expression_filter/metadata/00001-19739cda-f528-4429-84cc-377ffdd24c75.metadata.json create mode 100644 data/persistent/expression_filter/metadata/8d30f58e-7333-4451-983d-eaf657a21a11-m0.avro create mode 100644 data/persistent/expression_filter/metadata/snap-8096310958539014181-1-8d30f58e-7333-4451-983d-eaf657a21a11.avro create mode 100644 data/persistent/expression_filter/metadata/version-hint.text create mode 100644 test/sql/local/iceberg_scans/expression_filter.test diff --git a/data/persistent/expression_filter/data/00000-0-1406cdaa-c3e4-4e6d-a22b-d85e4a813169-00001.parquet b/data/persistent/expression_filter/data/00000-0-1406cdaa-c3e4-4e6d-a22b-d85e4a813169-00001.parquet new file mode 100644 index 0000000000000000000000000000000000000000..6fc304b53c5ddf2aa90b6fd6cf9996a3b3eb4be4 GIT binary patch literal 705 zcmZ9K&ubGw6vy9gl64P3uylqUyU;L@CDCozO|xkda?z9YAVnw$qGW%}#)VDNZjyqu zh2DB7UOe~@h~5RSUc{St^{#l5-laF+WHnKTWqI@7`^@{kH?w!=flGimq(k0+{Q13P z5)HRH(E&Cu@V8iDbOu)KbNRJ${`}Kz1AUm&Cils!H(xHU<%sql5)1}|c&NQduC6M# z>TmVC`wD!0xH;HBn%z*Dp(|*~g=QhQfT)ZDqZ)pBQX%j&W)$MdWErvmVLv8DG{qE<>N)Rt0LUeti&z~a!a=hA^*s$`x1P{=OG0%hibuCd<=7DXa@VvYN4 zsL9?8+2&uz&^0*&*(lJn0=Y@wsiXt+gF-b+r{9!y*|%{$2}K~XVPhIT6(iqxsT}j! zvjf3RP)6Ol$!-g|V+rN_1T*dNJ!+}S)6hw~&87(iOsf6huL z;~_t_bELUM7Q_69iv4uD!camY!N;dy>>l{p;atqNMp@-ZWYc6auDA`?YcwmhV4kE= yrQ>ylAG&QXa5{0fxfAyOY;%GZg=zJa!^!k1BJHMCTdL@wsU)P$=bc$b9XN# zt$`NWKmYPqB}9OBkPtvYu_qfhjb z=HVt?5C4_4fV|&!0s=2!Hz_5yT;E&JR!V$}A+{`Gw@p&J8rTixHVDC?8|POJ;ibSo z!Xvp_5ithD_5vDG&PM_J<*Ki@$2QXf(oaP+gD4bLL~FH#{J^GvPzwie z_UR(i%W84Jiil+|qR+=(FQ?F-Ur7=?lNdo1QxvsZE-CR0F{|7?6<5{(3IB}6XZ>+?@j2~f_ zl?w`CIW?awR}W1s2HSVrP0ymsrPZX-1hO~?Fr5%iM4C{?;(2zYT-5amYf0BDNj9mX zzeuQ!oKXTh6*QA!7cY)sRFcpuFzD!9S#WIZ&hRPR#4VVA2^yzxi(<_)L{1$=KKv*` z3>?}&PIwU_${$MQN;W2bz`Fz0BtT`9`+;gQ8L&@NoT0B(Q3LIdHFJ z;Otb-gs9sq=}3A{39eWtt&r>WTp*MNRW+xg?jRH;qEErHo@BO=ad(*cdkfQ~1G z)%Ke}vYZg^V|Krz6UV5Rg4onD4*dYo3hGeH-E;>HLLAUEtOb$@Yr#jMi&1qN2c$s) zsz>TpC-G0l3RQqqehFt&aN{i_mL#NAM{dHYU+N^)!*uRp&P6E~1m=k&gyEu%89ork zIELb|;lNi^f=(~4XP6qa{W`Jt1E?;J$YIXFN0vwsf{qw8G(mGmD9wfP87v=mYyu;B zAr_Kf91*M!Qtd(Pg8WUCVWJ{)1TQ@#I+@ZEIZWwExizhN8Qn=Xo6=>6-s{Q0s4&39 zOs8t>$%@qMYQ4y{C7qr)4 zByxayRD?2zNs$AyE47Ps&|3~*h$@mUIT+AbyOTq)R1-<*vXi^1vTbH&K-cDF1`W2; z%sMobNW+A(i0_v!a%r1cm{3SLS(tEiJI~^g;zs0UjETH5I%b>O2g$na&`XB39C+*8XK`7L8AEz^Jasvr>IoAMHZdQiE4PWYM#fRC6$ z5g^^jyC=){bNq2m%?NticTTm&UpuV%O-P z5M^{}Ss?ADf-4pWBF<4c%Cb5@8cqco-y!_0(t1EzRWISL)IZO7hW7kl5&%d&{0W`D9lTK-D->V;fbmOUG`rUH5eC@eG^vl(OC+E)Ao<4W|`6KH$HxA4^xWDJ@ JZv!21{|B)JAw~cI literal 0 HcmV?d00001 diff --git a/data/persistent/expression_filter/metadata/snap-8096310958539014181-1-8d30f58e-7333-4451-983d-eaf657a21a11.avro b/data/persistent/expression_filter/metadata/snap-8096310958539014181-1-8d30f58e-7333-4451-983d-eaf657a21a11.avro new file mode 100644 index 0000000000000000000000000000000000000000..6d39862fb3c6d99fb54786a6d707fdfe717a4852 GIT binary patch literal 4445 zcmbVPO>f*p7;bawg`QBkR4`Z-LLxzSl1+D4>H*Or6;)8!w3JP&)vP^^ogLeYJx5s}Yg z;+v%7$6l9&GVX#Tt?U#^PoaTB`~oQ&;0R&MBK+n_Qd5eYd@~&OldTHxq5M76r9pQhrO|+!yqp*S zGq8wx7SyJgk#qU{h0DEB*BfgqhTf>vYby=iST*X#dAOQ~+0r%%BiNk_%dh4Ge<|HF z3&wfZB(Rn?GiUMj>!NwU5X2^5t*Pfo^wSH5BHRCE?~yr+5LS^J!p3R>6cJrDhtuFC zo3a>X7U3}}cnb5e(sM|PWkDf&S#cW`B&uN;?>+>9aZwOR-!TL1JU&E{)dX3dbLl`T zNDG3s=5QpEbxp92_ad_}&*)7AslFme$GMVOC|fhQ6v?+b!dLVs6?|%;l>LeXmw6L= zdTxBNw^d6dDP)rJl|45@zQV^67_yVngcKY>g+7JiE4djFydje<^qCu8=%Y9g$sp64 z%QH7YF3&i8U5)`UXSlbKy4aXTVqA)OOu@Ipv5|Eq9w@d)Om5}lFP)+dQMuBDq>e|j z7iM+tMy8pOaSli-S|3B3cpu4HxU(=p(`d(dAjt~LD*Grd+MP;L*9SYm7jg0hJd5v< zWO6?hEzaCz)n2FIW+X1p7ucLM=n0-i#wx2|#J)(-k#rQ5ndoC6$_r4}+;J)H{U@J1J#K$={bBp|(c|M+ zj=y=_+HZevOf?^(~M^oL%zyLHlY@zK-6CkM8@^{rzaIL^so@4H90 nZa)0?!lT{Y`+u+h_SKJneDTxsf4 Date: Fri, 12 Sep 2025 14:58:19 +0200 Subject: [PATCH 06/12] PR comments. more tests --- .../storage/iceberg_table_information.hpp | 2 +- src/storage/iceberg_table_information.cpp | 3 +- src/storage/irc_table_set.cpp | 15 +++-- test/sql/local/irc/iceberg_catalog_read.test | 2 +- test/sql/local/irc/test_show_all_tables.test | 57 +++++++++++++++++-- test/sql/local/test_iceberg_and_ducklake.test | 7 +-- 6 files changed, 70 insertions(+), 16 deletions(-) diff --git a/src/include/storage/iceberg_table_information.hpp b/src/include/storage/iceberg_table_information.hpp index 3c0ea2a4..01e257a0 100644 --- a/src/include/storage/iceberg_table_information.hpp +++ b/src/include/storage/iceberg_table_information.hpp @@ -45,7 +45,7 @@ struct IcebergTableInformation { IRCSchemaEntry &schema; string name; string table_id; - // bool deleted; + bool filled = false; rest_api_objects::LoadTableResult load_table_result; IcebergTableMetadata table_metadata; diff --git a/src/storage/iceberg_table_information.cpp b/src/storage/iceberg_table_information.cpp index a56fe264..c4383cae 100644 --- a/src/storage/iceberg_table_information.cpp +++ b/src/storage/iceberg_table_information.cpp @@ -218,6 +218,7 @@ optional_ptr IcebergTableInformation::CreateSchemaVersion(IcebergT optional_ptr IcebergTableInformation::GetSchemaVersion(optional_ptr at) { D_ASSERT(!schema_versions.empty()); + D_ASSERT(filled); auto snapshot_lookup = IcebergSnapshotLookup::FromAtClause(at); int32_t schema_id; @@ -232,7 +233,7 @@ optional_ptr IcebergTableInformation::GetSchemaVersion(optional_pt } IcebergTableInformation::IcebergTableInformation(IRCatalog &catalog, IRCSchemaEntry &schema, const string &name) - : catalog(catalog), schema(schema), name(name) { + : catalog(catalog), schema(schema), name(name), filled(false) { table_id = "uuid-" + schema.name + "-" + name; } diff --git a/src/storage/irc_table_set.cpp b/src/storage/irc_table_set.cpp index 31af7861..c8a9e7a5 100644 --- a/src/storage/irc_table_set.cpp +++ b/src/storage/irc_table_set.cpp @@ -30,10 +30,11 @@ ICTableSet::ICTableSet(IRCSchemaEntry &schema) : schema(schema), catalog(schema. } bool ICTableSet::FillEntry(ClientContext &context, IcebergTableInformation &table) { - if (!table.schema_versions.empty()) { - //! Already filled + if (table.filled) { return true; } + // The table has not been filled yet, clear all dummy schema versions + table.schema_versions.clear(); auto &ic_catalog = catalog.Cast(); @@ -57,6 +58,7 @@ bool ICTableSet::FillEntry(ClientContext &context, IcebergTableInformation &tabl for (auto &table_schema : schemas) { table.CreateSchemaVersion(*table_schema.second); } + table.filled = true; return true; } @@ -67,13 +69,15 @@ void ICTableSet::Scan(ClientContext &context, const std::function columns; auto col = ColumnDefinition(string("__"), LogicalType::UNKNOWN); columns.push_back(std::move(col)); info.columns = ColumnList(std::move(columns)); auto table_entry = make_uniq(table_info, catalog, schema, info); - if (!table_entry->internal) { table_entry->internal = schema.internal; } @@ -82,9 +86,8 @@ void ICTableSet::Scan(ClientContext &context, const std::functionCast(); + table_info.table_metadata.current_schema_id = 0; callback(optional); } // erase not iceberg tables @@ -128,6 +131,8 @@ bool ICTableSet::CreateNewEntry(ClientContext &context, IRCatalog &catalog, IRCS auto table_entry = make_uniq(table_info, catalog, schema, info); auto optional_entry = table_entry.get(); + // Since we are creating the schema, we set the table information to filled + table_info.filled = true; optional_entry->table_info.schema_versions[0] = std::move(table_entry); optional_entry->table_info.table_metadata.schemas[0] = diff --git a/test/sql/local/irc/iceberg_catalog_read.test b/test/sql/local/irc/iceberg_catalog_read.test index dc1d7dfe..6fcd215b 100644 --- a/test/sql/local/irc/iceberg_catalog_read.test +++ b/test/sql/local/irc/iceberg_catalog_read.test @@ -19,7 +19,7 @@ statement ok CALL enable_logging('HTTP'); statement ok -set logging_level='debug' +set logging_level='debug'; statement ok CREATE SECRET ( diff --git a/test/sql/local/irc/test_show_all_tables.test b/test/sql/local/irc/test_show_all_tables.test index e55122e7..50d08356 100644 --- a/test/sql/local/irc/test_show_all_tables.test +++ b/test/sql/local/irc/test_show_all_tables.test @@ -24,9 +24,6 @@ set logging_level='debug' statement ok CALL enable_logging('HTTP'); -statement ok -pragma threads=1; - statement ok CREATE SECRET ( TYPE S3, @@ -54,6 +51,58 @@ show all tables; # 1 call to list tables in default # 1 call to list tables in level1 namespace (no recursive namespace calls) query I -select count(*) = 5 from duckdb_logs_parsed('HTTP'); +select count(*) from duckdb_logs_parsed('HTTP'); +---- +5 + +statement ok +call truncate_duckdb_logs(); + +query II +select column_name, column_type from (describe my_datalake.default.supplier); +---- +s_suppkey BIGINT +s_name VARCHAR +s_address VARCHAR +s_nationkey INTEGER +s_phone VARCHAR +s_acctbal DECIMAL(15,2) +s_comment VARCHAR + +# one request to verify the default schema +# another request to verify table default.supplier +# another request to the table information endpoint +# FIXME: apparantly there is also a request to an avro file +query I +select count(*) from duckdb_logs_parsed('HTTP'); +---- +4 + +statement ok +begin; + +statement ok +show all tables; + +query I +select distinct(s_nationkey) from my_datalake.default.supplier order by all limit 5; ---- +0 1 +2 +3 +4 + +statement ok +commit; + +# 5 calls to list the namespaces +# 1 call the the GetTableInformationEndpoint for supploer +# (FIXME) 1 call to an avro file in the warehouse +# 1 call to the manifest file +# 1 call to the manifest list +# 2 calls to read parquet files +query I +select count(*) from duckdb_logs_parsed('HTTP'); +---- +11 \ No newline at end of file diff --git a/test/sql/local/test_iceberg_and_ducklake.test b/test/sql/local/test_iceberg_and_ducklake.test index 53a6dc80..cc51ffea 100644 --- a/test/sql/local/test_iceberg_and_ducklake.test +++ b/test/sql/local/test_iceberg_and_ducklake.test @@ -48,10 +48,9 @@ ATTACH '' AS my_datalake ( statement ok ATTACH 'ducklake:duckdb:__TEST_DIR__/ducklake.duckdb' as my_ducklake (DATA_PATH '__TEST_DIR__/data_path'); -# select request.url, request.type, response.status from -# one request to get the auth token, one to the config endpoint on attach -duckdb_logs_parsed('HTTP'); +# 2 requests to the iceberg catalog for oauth and config +# 3 requests when attaching ducklake because a ducklake attach calls from duckdb_tables() query I select count(*) from duckdb_logs_parsed('HTTP'); ---- -2 \ No newline at end of file +5 \ No newline at end of file From eb307d486fda8c6f2884e9f0ab90675ffdd0225d Mon Sep 17 00:00:00 2001 From: Tmonster Date: Fri, 12 Sep 2025 14:58:50 +0200 Subject: [PATCH 07/12] rename file --- ...how_all_tables.test => test_table_information_requests.test} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename test/sql/local/irc/{test_show_all_tables.test => test_table_information_requests.test} (96%) diff --git a/test/sql/local/irc/test_show_all_tables.test b/test/sql/local/irc/test_table_information_requests.test similarity index 96% rename from test/sql/local/irc/test_show_all_tables.test rename to test/sql/local/irc/test_table_information_requests.test index 50d08356..2067ed39 100644 --- a/test/sql/local/irc/test_show_all_tables.test +++ b/test/sql/local/irc/test_table_information_requests.test @@ -1,4 +1,4 @@ -# name: test/sql/local/irc/test_show_all_tables.test +# name: test/sql/local/irc/test_table_information_requests.test # description: test integration with iceberg catalog read # group: [irc] From e576e90614f1f1781591ed0ebf5863b05ce7cf8d Mon Sep 17 00:00:00 2001 From: Tmonster Date: Fri, 12 Sep 2025 17:04:30 +0200 Subject: [PATCH 08/12] use dummy entry for table listing --- src/include/storage/iceberg_table_information.hpp | 2 ++ src/storage/irc_table_set.cpp | 8 +++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/include/storage/iceberg_table_information.hpp b/src/include/storage/iceberg_table_information.hpp index 01e257a0..32a92165 100644 --- a/src/include/storage/iceberg_table_information.hpp +++ b/src/include/storage/iceberg_table_information.hpp @@ -50,6 +50,8 @@ struct IcebergTableInformation { rest_api_objects::LoadTableResult load_table_result; IcebergTableMetadata table_metadata; unordered_map> schema_versions; + // dummy entry to hold existence of a table, but no schema versions + unique_ptr dummy_entry; public: unique_ptr transaction_data; diff --git a/src/storage/irc_table_set.cpp b/src/storage/irc_table_set.cpp index c8a9e7a5..838f2978 100644 --- a/src/storage/irc_table_set.cpp +++ b/src/storage/irc_table_set.cpp @@ -69,6 +69,9 @@ void ICTableSet::Scan(ClientContext &context, const std::functionname.empty()) { throw InternalException("ICTableSet::CreateEntry called with empty name"); } - table_info.schema_versions.emplace(0, std::move(table_entry)); - auto &optional = table_info.schema_versions[0].get()->Cast(); - table_info.table_metadata.current_schema_id = 0; + table_info.dummy_entry = std::move(table_entry); + auto &optional = table_info.dummy_entry.get()->Cast(); callback(optional); } // erase not iceberg tables From 28eb6b4e3c2513ab79978074f77140a5d7b915e1 Mon Sep 17 00:00:00 2001 From: Tishj Date: Mon, 15 Sep 2025 10:35:50 +0200 Subject: [PATCH 09/12] slight clean up and added explanation --- src/iceberg_predicate.cpp | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/src/iceberg_predicate.cpp b/src/iceberg_predicate.cpp index bd5c3aad..60a563a8 100644 --- a/src/iceberg_predicate.cpp +++ b/src/iceberg_predicate.cpp @@ -93,30 +93,22 @@ bool MatchBoundsTemplated(const TableFilter &filter, const IcebergPredicateStats return true; } - if (expr.expression_class != ExpressionClass::BOUND_OPERATOR) { - return true; - } - + D_ASSERT(expr.GetExpressionClass() == ExpressionClass::BOUND_OPERATOR); auto &bound_operator_expr = expr.Cast(); - if (bound_operator_expr.children.size() != 1) { - return true; - } + D_ASSERT(bound_operator_expr.children.size() == 1); auto &child_expr = bound_operator_expr.children[0]; if (child_expr->type != ExpressionType::BOUND_REF) { + //! We can't evaluate expressions that aren't direct column references return true; } if (expr.type == ExpressionType::OPERATOR_IS_NULL) { return MatchBoundsIsNullFilter(stats, transform); - } - - if (expr.type == ExpressionType::OPERATOR_IS_NOT_NULL) { + } else { + D_ASSERT(expr.type == ExpressionType::OPERATOR_IS_NOT_NULL); return MatchBoundsIsNotNullFilter(stats, transform); } - - //! Should be unreachable - return true; } default: //! Conservative approach: we don't know what this is, just say it doesn't filter anything From dc4b0f0217361df0e78f6d755a630569c2197bf9 Mon Sep 17 00:00:00 2001 From: Tmonster Date: Mon, 15 Sep 2025 15:06:01 +0200 Subject: [PATCH 10/12] fix show all tables --- src/include/storage/iceberg_table_information.hpp | 1 - src/storage/iceberg_table_information.cpp | 3 +-- src/storage/irc_table_set.cpp | 14 ++++++-------- .../local/irc/test_table_information_requests.test | 6 ++++-- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/src/include/storage/iceberg_table_information.hpp b/src/include/storage/iceberg_table_information.hpp index 32a92165..9ed9240e 100644 --- a/src/include/storage/iceberg_table_information.hpp +++ b/src/include/storage/iceberg_table_information.hpp @@ -45,7 +45,6 @@ struct IcebergTableInformation { IRCSchemaEntry &schema; string name; string table_id; - bool filled = false; rest_api_objects::LoadTableResult load_table_result; IcebergTableMetadata table_metadata; diff --git a/src/storage/iceberg_table_information.cpp b/src/storage/iceberg_table_information.cpp index c4383cae..a56fe264 100644 --- a/src/storage/iceberg_table_information.cpp +++ b/src/storage/iceberg_table_information.cpp @@ -218,7 +218,6 @@ optional_ptr IcebergTableInformation::CreateSchemaVersion(IcebergT optional_ptr IcebergTableInformation::GetSchemaVersion(optional_ptr at) { D_ASSERT(!schema_versions.empty()); - D_ASSERT(filled); auto snapshot_lookup = IcebergSnapshotLookup::FromAtClause(at); int32_t schema_id; @@ -233,7 +232,7 @@ optional_ptr IcebergTableInformation::GetSchemaVersion(optional_pt } IcebergTableInformation::IcebergTableInformation(IRCatalog &catalog, IRCSchemaEntry &schema, const string &name) - : catalog(catalog), schema(schema), name(name), filled(false) { + : catalog(catalog), schema(schema), name(name) { table_id = "uuid-" + schema.name + "-" + name; } diff --git a/src/storage/irc_table_set.cpp b/src/storage/irc_table_set.cpp index 838f2978..07af1855 100644 --- a/src/storage/irc_table_set.cpp +++ b/src/storage/irc_table_set.cpp @@ -30,11 +30,9 @@ ICTableSet::ICTableSet(IRCSchemaEntry &schema) : schema(schema), catalog(schema. } bool ICTableSet::FillEntry(ClientContext &context, IcebergTableInformation &table) { - if (table.filled) { + if (!table.schema_versions.empty()) { return true; } - // The table has not been filled yet, clear all dummy schema versions - table.schema_versions.clear(); auto &ic_catalog = catalog.Cast(); @@ -58,7 +56,6 @@ bool ICTableSet::FillEntry(ClientContext &context, IcebergTableInformation &tabl for (auto &table_schema : schemas) { table.CreateSchemaVersion(*table_schema.second); } - table.filled = true; return true; } @@ -70,11 +67,14 @@ void ICTableSet::Scan(ClientContext &context, const std::functionCast(); + callback(optional); continue; } - // create a table entry with fake schema data - // filled stays false + // create a table entry with fake schema data to avoid calling the LoadTableInformation endpoint for every + // table while listing schemas CreateTableInfo info(schema, table_info.name); vector columns; auto col = ColumnDefinition(string("__"), LogicalType::UNKNOWN); @@ -133,8 +133,6 @@ bool ICTableSet::CreateNewEntry(ClientContext &context, IRCatalog &catalog, IRCS auto table_entry = make_uniq(table_info, catalog, schema, info); auto optional_entry = table_entry.get(); - // Since we are creating the schema, we set the table information to filled - table_info.filled = true; optional_entry->table_info.schema_versions[0] = std::move(table_entry); optional_entry->table_info.table_metadata.schemas[0] = diff --git a/test/sql/local/irc/test_table_information_requests.test b/test/sql/local/irc/test_table_information_requests.test index 2067ed39..19f8a270 100644 --- a/test/sql/local/irc/test_table_information_requests.test +++ b/test/sql/local/irc/test_table_information_requests.test @@ -43,8 +43,10 @@ ATTACH '' AS my_datalake ( ENDPOINT 'http://127.0.0.1:8181' ); -statement ok -show all tables; +query I +select count(*) > 10 from (show all tables); +---- +1 # 1 call for oath, 1 call for config # 1 call to list namespaces From ed325e6deecec508286047d38334d7af0407ed05 Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Mon, 15 Sep 2025 17:21:00 +0200 Subject: [PATCH 11/12] Implement also HEAD request --- src/aws.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/aws.cpp b/src/aws.cpp index 413e16a7..f40e8a87 100644 --- a/src/aws.cpp +++ b/src/aws.cpp @@ -257,6 +257,10 @@ unique_ptr AWSInput::ExecuteRequest(ClientContext &context, Aws::H */ } + if (method == Aws::Http::HttpMethod::HTTP_HEAD) { + HeadRequestInfo head_request(request_url, res, *params); + return http_util.Request(head_request); + } if (method == Aws::Http::HttpMethod::HTTP_GET) { GetRequestInfo get_request(request_url, res, *params, nullptr, nullptr); return http_util.Request(get_request); From 4a02d933a9c5b9a40bbf59f825937a5650167910 Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Tue, 16 Sep 2025 04:05:08 +0200 Subject: [PATCH 12/12] Implement DELETE and PUT with body --- src/aws.cpp | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/src/aws.cpp b/src/aws.cpp index f40e8a87..8f796ea6 100644 --- a/src/aws.cpp +++ b/src/aws.cpp @@ -122,6 +122,18 @@ std::shared_ptr AWSInput::CreateSignedRequest(Aws::Http: // return request; } +static string GetPayloadHash(const char *buffer, idx_t buffer_len) { + if (buffer_len > 0) { + hash_bytes payload_hash_bytes; + hash_str payload_hash_str; + sha256(buffer, buffer_len, payload_hash_bytes); + hex256(payload_hash_bytes, payload_hash_str); + return string((char *)payload_hash_str, sizeof(payload_hash_str)); + } else { + return ""; + } +} + unique_ptr AWSInput::ExecuteRequest(ClientContext &context, Aws::Http::HttpMethod method, const string body, string content_type) { @@ -140,6 +152,11 @@ unique_ptr AWSInput::ExecuteRequest(ClientContext &context, Aws::H // If access key is not set, we don't set the headers at all to allow accessing public files through s3 urls string payload_hash = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"; // Empty payload hash + + if (!body.empty()) { + payload_hash = GetPayloadHash(body.c_str(), body.size()); + } + // key_id, secret, session_token // we can pass date/time but this is mostly useful in testing. normally we just get the current datetime // here. @@ -161,6 +178,9 @@ unique_ptr AWSInput::ExecuteRequest(ClientContext &context, Aws::H hash_str canonical_request_hash_str; if (content_type.length() > 0) { signed_headers += "content-type;"; +#ifdef EMSCRIPTEN + res["content-type"] = content_type; +#endif } signed_headers += "host;x-amz-content-sha256;x-amz-date"; if (session_token.length() > 0) { @@ -244,23 +264,14 @@ unique_ptr AWSInput::ExecuteRequest(ClientContext &context, Aws::H params = http_util.InitializeParameters(context, request_url); - if (!body.empty()) { - throw NotImplementedException("CreateSignedRequest with non-empty body is not supported at this time"); - /* - auto bodyStream = Aws::MakeShared(""); - *bodyStream << body; - request->AddContentBody(bodyStream); - request->SetContentLength(std::to_string(body.size())); - if (!content_type.empty()) { - request->SetHeaderValue("Content-Type", content_type); - } - */ - } - if (method == Aws::Http::HttpMethod::HTTP_HEAD) { HeadRequestInfo head_request(request_url, res, *params); return http_util.Request(head_request); } + if (method == Aws::Http::HttpMethod::HTTP_DELETE) { + DeleteRequestInfo delete_request(request_url, res, *params); + return http_util.Request(delete_request); + } if (method == Aws::Http::HttpMethod::HTTP_GET) { GetRequestInfo get_request(request_url, res, *params, nullptr, nullptr); return http_util.Request(get_request);