Skip to content

Commit 0f4177b

Browse files
committed
Merge remote-tracking branch 'origin/main' into v1.4-andium-wasm
2 parents ed325e6 + 06ca288 commit 0f4177b

15 files changed

+274
-20
lines changed
705 Bytes
Binary file not shown.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"format-version":2,"table-uuid":"d6293bed-4757-4504-9342-f69a447b7759","location":"data/persistent/expression_filter","last-sequence-number":0,"last-updated-ms":1757676428493,"last-column-id":2,"current-schema-id":0,"schemas":[{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"id","required":true,"type":"long"},{"id":2,"name":"value","required":false,"type":"string"}]}],"default-spec-id":0,"partition-specs":[{"spec-id":0,"fields":[]}],"last-partition-id":999,"default-sort-order-id":0,"sort-orders":[{"order-id":0,"fields":[]}],"properties":{"write.parquet.compression-codec":"zstd"},"current-snapshot-id":-1,"refs":{},"snapshots":[],"statistics":[],"partition-statistics":[],"snapshot-log":[],"metadata-log":[]}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"format-version":2,"table-uuid":"d6293bed-4757-4504-9342-f69a447b7759","location":"data/persistent/expression_filter","last-sequence-number":1,"last-updated-ms":1757676429141,"last-column-id":2,"current-schema-id":0,"schemas":[{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"id","required":true,"type":"long"},{"id":2,"name":"value","required":false,"type":"string"}]}],"default-spec-id":0,"partition-specs":[{"spec-id":0,"fields":[]}],"last-partition-id":999,"default-sort-order-id":0,"sort-orders":[{"order-id":0,"fields":[]}],"properties":{"write.parquet.compression-codec":"zstd"},"current-snapshot-id":8096310958539014181,"refs":{"main":{"snapshot-id":8096310958539014181,"type":"branch"}},"snapshots":[{"sequence-number":1,"snapshot-id":8096310958539014181,"timestamp-ms":1757676429141,"summary":{"operation":"append","added-data-files":"1","added-records":"3","added-files-size":"705","changed-partition-count":"1","total-records":"3","total-files-size":"705","total-data-files":"1","total-delete-files":"0","total-position-deletes":"0","total-equality-deletes":"0","iceberg-version":"Apache Iceberg 1.9.2 (commit 071d5606bc6199a0be9b3f274ec7fbf111d88821)"},"manifest-list":"data/persistent/expression_filter/metadata/snap-8096310958539014181-1-8d30f58e-7333-4451-983d-eaf657a21a11.avro","schema-id":0}],"statistics":[],"partition-statistics":[],"snapshot-log":[{"timestamp-ms":1757676429141,"snapshot-id":8096310958539014181}],"metadata-log":[{"timestamp-ms":1757676428493,"metadata-file":"data/persistent/expression_filter/metadata/00000-acdf842e-3a9d-4b9b-ad87-daf78583a550.metadata.json"}]}
6.86 KB
Binary file not shown.
4.34 KB
Binary file not shown.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
00001-19739cda-f528-4429-84cc-377ffdd24c75

src/iceberg_predicate.cpp

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "iceberg_predicate.hpp"
2+
#include "duckdb/planner/expression/bound_operator_expression.hpp"
23
#include "duckdb/planner/filter/constant_filter.hpp"
34
#include "duckdb/planner/filter/conjunction_filter.hpp"
45
#include "duckdb/planner/filter/null_filter.hpp"
@@ -80,16 +81,34 @@ bool MatchBoundsTemplated(const TableFilter &filter, const IcebergPredicateStats
8081
return MatchBoundsIsNotNullFilter<TRANSFORM>(stats, transform);
8182
}
8283
case TableFilterType::EXPRESSION_FILTER: {
84+
//! Expressions can be arbitrarily complex, and we currently only support IS NULL/IS NOT NULL checks against the
85+
//! column itself, i.e. where the expression is a BOUND_OPERATOR with type OPERATOR_IS_NULL/_IS_NOT_NULL with a
86+
//! single child expression of type BOUND_REF.
87+
//!
88+
//! See duckdb/duckdb-iceberg#464
8389
auto &expression_filter = filter.Cast<ExpressionFilter>();
8490
auto &expr = *expression_filter.expr;
91+
92+
if (expr.type != ExpressionType::OPERATOR_IS_NULL && expr.type != ExpressionType::OPERATOR_IS_NOT_NULL) {
93+
return true;
94+
}
95+
96+
D_ASSERT(expr.GetExpressionClass() == ExpressionClass::BOUND_OPERATOR);
97+
auto &bound_operator_expr = expr.Cast<BoundOperatorExpression>();
98+
99+
D_ASSERT(bound_operator_expr.children.size() == 1);
100+
auto &child_expr = bound_operator_expr.children[0];
101+
if (child_expr->type != ExpressionType::BOUND_REF) {
102+
//! We can't evaluate expressions that aren't direct column references
103+
return true;
104+
}
105+
85106
if (expr.type == ExpressionType::OPERATOR_IS_NULL) {
86107
return MatchBoundsIsNullFilter<TRANSFORM>(stats, transform);
87-
}
88-
if (expr.type == ExpressionType::OPERATOR_IS_NOT_NULL) {
108+
} else {
109+
D_ASSERT(expr.type == ExpressionType::OPERATOR_IS_NOT_NULL);
89110
return MatchBoundsIsNotNullFilter<TRANSFORM>(stats, transform);
90111
}
91-
//! Any other expression can not be filtered
92-
return true;
93112
}
94113
default:
95114
//! Conservative approach: we don't know what this is, just say it doesn't filter anything

src/include/storage/iceberg_table_information.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,12 @@ struct IcebergTableInformation {
4545
IRCSchemaEntry &schema;
4646
string name;
4747
string table_id;
48-
// bool deleted;
4948

5049
rest_api_objects::LoadTableResult load_table_result;
5150
IcebergTableMetadata table_metadata;
5251
unordered_map<int32_t, unique_ptr<ICTableEntry>> schema_versions;
52+
// dummy entry to hold existence of a table, but no schema versions
53+
unique_ptr<ICTableEntry> dummy_entry;
5354

5455
public:
5556
unique_ptr<IcebergTransactionData> transaction_data;

src/include/storage/irc_catalog.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,17 @@ class IRCatalog : public Catalog {
4848
bool SetCachedValue(const string &url, const string &value, const rest_api_objects::LoadTableResult &result);
4949
static void SetAWSCatalogOptions(IcebergAttachOptions &attach_options,
5050
case_insensitive_set_t &set_by_attach_options);
51+
//! Whether or not this catalog should search a specific type with the standard priority
52+
CatalogLookupBehavior CatalogTypeLookupRule(CatalogType type) const override {
53+
switch (type) {
54+
case CatalogType::TABLE_FUNCTION_ENTRY:
55+
case CatalogType::SCALAR_FUNCTION_ENTRY:
56+
case CatalogType::AGGREGATE_FUNCTION_ENTRY:
57+
return CatalogLookupBehavior::NEVER_LOOKUP;
58+
default:
59+
return CatalogLookupBehavior::STANDARD;
60+
}
61+
}
5162

5263
public:
5364
static unique_ptr<Catalog> Attach(optional_ptr<StorageExtensionInfo> storage_info, ClientContext &context,

src/storage/irc_table_set.cpp

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@ ICTableSet::ICTableSet(IRCSchemaEntry &schema) : schema(schema), catalog(schema.
3131

3232
bool ICTableSet::FillEntry(ClientContext &context, IcebergTableInformation &table) {
3333
if (!table.schema_versions.empty()) {
34-
//! Already filled
3534
return true;
3635
}
3736

@@ -67,13 +66,31 @@ void ICTableSet::Scan(ClientContext &context, const std::function<void(CatalogEn
6766
auto table_namespace = IRCAPI::GetEncodedSchemaName(schema.namespace_items);
6867
for (auto &entry : entries) {
6968
auto &table_info = entry.second;
70-
if (FillEntry(context, table_info)) {
71-
auto schema_id = table_info.table_metadata.current_schema_id;
72-
callback(*table_info.schema_versions[schema_id]);
73-
} else {
74-
DUCKDB_LOG(context, IcebergLogType, "Table %s.%s not an Iceberg Table", table_namespace, entry.first);
75-
non_iceberg_tables.insert(entry.first);
69+
if (table_info.dummy_entry) {
70+
// FIXME: why do we need to return the same entry again?
71+
auto &optional = table_info.dummy_entry.get()->Cast<CatalogEntry>();
72+
callback(optional);
73+
continue;
7674
}
75+
76+
// create a table entry with fake schema data to avoid calling the LoadTableInformation endpoint for every
77+
// table while listing schemas
78+
CreateTableInfo info(schema, table_info.name);
79+
vector<ColumnDefinition> columns;
80+
auto col = ColumnDefinition(string("__"), LogicalType::UNKNOWN);
81+
columns.push_back(std::move(col));
82+
info.columns = ColumnList(std::move(columns));
83+
auto table_entry = make_uniq<ICTableEntry>(table_info, catalog, schema, info);
84+
if (!table_entry->internal) {
85+
table_entry->internal = schema.internal;
86+
}
87+
auto result = table_entry.get();
88+
if (result->name.empty()) {
89+
throw InternalException("ICTableSet::CreateEntry called with empty name");
90+
}
91+
table_info.dummy_entry = std::move(table_entry);
92+
auto &optional = table_info.dummy_entry.get()->Cast<CatalogEntry>();
93+
callback(optional);
7794
}
7895
// erase not iceberg tables
7996
for (auto &entry : non_iceberg_tables) {

0 commit comments

Comments
 (0)