diff --git a/src/include/storage/iceberg_table_information.hpp b/src/include/storage/iceberg_table_information.hpp index 3c0ea2a4..9ed9240e 100644 --- a/src/include/storage/iceberg_table_information.hpp +++ b/src/include/storage/iceberg_table_information.hpp @@ -45,11 +45,12 @@ struct IcebergTableInformation { IRCSchemaEntry &schema; string name; string table_id; - // bool deleted; rest_api_objects::LoadTableResult load_table_result; IcebergTableMetadata table_metadata; unordered_map> schema_versions; + // dummy entry to hold existence of a table, but no schema versions + unique_ptr dummy_entry; public: unique_ptr transaction_data; diff --git a/src/include/storage/irc_catalog.hpp b/src/include/storage/irc_catalog.hpp index d6f8b8be..f84b0359 100644 --- a/src/include/storage/irc_catalog.hpp +++ b/src/include/storage/irc_catalog.hpp @@ -48,6 +48,17 @@ class IRCatalog : public Catalog { bool SetCachedValue(const string &url, const string &value, const rest_api_objects::LoadTableResult &result); static void SetAWSCatalogOptions(IcebergAttachOptions &attach_options, case_insensitive_set_t &set_by_attach_options); + //! Whether or not this catalog should search a specific type with the standard priority + CatalogLookupBehavior CatalogTypeLookupRule(CatalogType type) const override { + switch (type) { + case CatalogType::TABLE_FUNCTION_ENTRY: + case CatalogType::SCALAR_FUNCTION_ENTRY: + case CatalogType::AGGREGATE_FUNCTION_ENTRY: + return CatalogLookupBehavior::NEVER_LOOKUP; + default: + return CatalogLookupBehavior::STANDARD; + } + } public: static unique_ptr Attach(optional_ptr storage_info, ClientContext &context, diff --git a/src/storage/irc_table_set.cpp b/src/storage/irc_table_set.cpp index f1a9c22c..07af1855 100644 --- a/src/storage/irc_table_set.cpp +++ b/src/storage/irc_table_set.cpp @@ -31,7 +31,6 @@ ICTableSet::ICTableSet(IRCSchemaEntry &schema) : schema(schema), catalog(schema. bool ICTableSet::FillEntry(ClientContext &context, IcebergTableInformation &table) { if (!table.schema_versions.empty()) { - //! Already filled return true; } @@ -67,13 +66,31 @@ void ICTableSet::Scan(ClientContext &context, const std::functionCast(); + callback(optional); + continue; } + + // create a table entry with fake schema data to avoid calling the LoadTableInformation endpoint for every + // table while listing schemas + CreateTableInfo info(schema, table_info.name); + vector columns; + auto col = ColumnDefinition(string("__"), LogicalType::UNKNOWN); + columns.push_back(std::move(col)); + info.columns = ColumnList(std::move(columns)); + auto table_entry = make_uniq(table_info, catalog, schema, info); + if (!table_entry->internal) { + table_entry->internal = schema.internal; + } + auto result = table_entry.get(); + if (result->name.empty()) { + throw InternalException("ICTableSet::CreateEntry called with empty name"); + } + table_info.dummy_entry = std::move(table_entry); + auto &optional = table_info.dummy_entry.get()->Cast(); + callback(optional); } // erase not iceberg tables for (auto &entry : non_iceberg_tables) { diff --git a/test/sql/local/irc/iceberg_catalog_read.test b/test/sql/local/irc/iceberg_catalog_read.test index dc1d7dfe..6fcd215b 100644 --- a/test/sql/local/irc/iceberg_catalog_read.test +++ b/test/sql/local/irc/iceberg_catalog_read.test @@ -19,7 +19,7 @@ statement ok CALL enable_logging('HTTP'); statement ok -set logging_level='debug' +set logging_level='debug'; statement ok CREATE SECRET ( diff --git a/test/sql/local/irc/test_duckdb_catalog_functions_and_iceberg.test b/test/sql/local/irc/test_duckdb_catalog_functions_and_iceberg.test index 2841b937..921d5e25 100644 --- a/test/sql/local/irc/test_duckdb_catalog_functions_and_iceberg.test +++ b/test/sql/local/irc/test_duckdb_catalog_functions_and_iceberg.test @@ -51,21 +51,18 @@ select count(*) from duckdb_logs_parsed('HTTP'); query I select count(*) from duckdb_logs_parsed('HTTP'); ---- -5 +3 statement ok use memory; -# 3 more requests are made, -# 2 from previous duckdb_logs_parsed call for 'main', 'default', -# and 1 for 'memory' -# requests no longer go up +# namespace 'memory' is looked up in the iceberg catalog query I select count(*) from duckdb_logs_parsed('HTTP'); ---- -8 +4 query I select count(*) from duckdb_logs_parsed('HTTP'); ---- -8 \ No newline at end of file +4 \ No newline at end of file diff --git a/test/sql/local/irc/test_table_information_requests.test b/test/sql/local/irc/test_table_information_requests.test new file mode 100644 index 00000000..19f8a270 --- /dev/null +++ b/test/sql/local/irc/test_table_information_requests.test @@ -0,0 +1,110 @@ +# name: test/sql/local/irc/test_table_information_requests.test +# description: test integration with iceberg catalog read +# group: [irc] + +require-env ICEBERG_SERVER_AVAILABLE + +require avro + +require parquet + +require iceberg + +require httpfs + +# Do not ignore 'HTTP' error messages! +set ignore_error_messages + +statement ok +set enable_logging=true + +statement ok +set logging_level='debug' + +statement ok +CALL enable_logging('HTTP'); + +statement ok +CREATE SECRET ( + TYPE S3, + KEY_ID 'admin', + SECRET 'password', + ENDPOINT '127.0.0.1:9000', + URL_STYLE 'path', + USE_SSL 0 +); + + +statement ok +ATTACH '' AS my_datalake ( + TYPE ICEBERG, + CLIENT_ID 'admin', + CLIENT_SECRET 'password', + ENDPOINT 'http://127.0.0.1:8181' +); + +query I +select count(*) > 10 from (show all tables); +---- +1 + +# 1 call for oath, 1 call for config +# 1 call to list namespaces +# 1 call to list tables in default +# 1 call to list tables in level1 namespace (no recursive namespace calls) +query I +select count(*) from duckdb_logs_parsed('HTTP'); +---- +5 + +statement ok +call truncate_duckdb_logs(); + +query II +select column_name, column_type from (describe my_datalake.default.supplier); +---- +s_suppkey BIGINT +s_name VARCHAR +s_address VARCHAR +s_nationkey INTEGER +s_phone VARCHAR +s_acctbal DECIMAL(15,2) +s_comment VARCHAR + +# one request to verify the default schema +# another request to verify table default.supplier +# another request to the table information endpoint +# FIXME: apparantly there is also a request to an avro file +query I +select count(*) from duckdb_logs_parsed('HTTP'); +---- +4 + +statement ok +begin; + +statement ok +show all tables; + +query I +select distinct(s_nationkey) from my_datalake.default.supplier order by all limit 5; +---- +0 +1 +2 +3 +4 + +statement ok +commit; + +# 5 calls to list the namespaces +# 1 call the the GetTableInformationEndpoint for supploer +# (FIXME) 1 call to an avro file in the warehouse +# 1 call to the manifest file +# 1 call to the manifest list +# 2 calls to read parquet files +query I +select count(*) from duckdb_logs_parsed('HTTP'); +---- +11 \ No newline at end of file diff --git a/test/sql/local/test_iceberg_and_ducklake.test b/test/sql/local/test_iceberg_and_ducklake.test new file mode 100644 index 00000000..cc51ffea --- /dev/null +++ b/test/sql/local/test_iceberg_and_ducklake.test @@ -0,0 +1,56 @@ +# name: test/sql/local/test_iceberg_and_ducklake.test +# description: test integration with iceberg catalog read +# group: [local] + +require-env ICEBERG_SERVER_AVAILABLE + +require avro + +require parquet + +require iceberg + +require httpfs + +require ducklake + +# Do not ignore 'HTTP' error messages! +set ignore_error_messages + +statement ok +pragma threads=1; + +statement ok +CALL enable_logging('HTTP'); + +statement ok +set logging_level='debug'; + +statement ok +CREATE SECRET ( + TYPE S3, + KEY_ID 'admin', + SECRET 'password', + ENDPOINT '127.0.0.1:9000', + URL_STYLE 'path', + USE_SSL 0 +); + + +statement ok +ATTACH '' AS my_datalake ( + TYPE ICEBERG, + CLIENT_ID 'admin', + CLIENT_SECRET 'password', + ENDPOINT 'http://127.0.0.1:8181' +); + +statement ok +ATTACH 'ducklake:duckdb:__TEST_DIR__/ducklake.duckdb' as my_ducklake (DATA_PATH '__TEST_DIR__/data_path'); + +# 2 requests to the iceberg catalog for oauth and config +# 3 requests when attaching ducklake because a ducklake attach calls from duckdb_tables() +query I +select count(*) from duckdb_logs_parsed('HTTP'); +---- +5 \ No newline at end of file