Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"format-version":2,"table-uuid":"d6293bed-4757-4504-9342-f69a447b7759","location":"data/persistent/expression_filter","last-sequence-number":0,"last-updated-ms":1757676428493,"last-column-id":2,"current-schema-id":0,"schemas":[{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"id","required":true,"type":"long"},{"id":2,"name":"value","required":false,"type":"string"}]}],"default-spec-id":0,"partition-specs":[{"spec-id":0,"fields":[]}],"last-partition-id":999,"default-sort-order-id":0,"sort-orders":[{"order-id":0,"fields":[]}],"properties":{"write.parquet.compression-codec":"zstd"},"current-snapshot-id":-1,"refs":{},"snapshots":[],"statistics":[],"partition-statistics":[],"snapshot-log":[],"metadata-log":[]}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"format-version":2,"table-uuid":"d6293bed-4757-4504-9342-f69a447b7759","location":"data/persistent/expression_filter","last-sequence-number":1,"last-updated-ms":1757676429141,"last-column-id":2,"current-schema-id":0,"schemas":[{"type":"struct","schema-id":0,"fields":[{"id":1,"name":"id","required":true,"type":"long"},{"id":2,"name":"value","required":false,"type":"string"}]}],"default-spec-id":0,"partition-specs":[{"spec-id":0,"fields":[]}],"last-partition-id":999,"default-sort-order-id":0,"sort-orders":[{"order-id":0,"fields":[]}],"properties":{"write.parquet.compression-codec":"zstd"},"current-snapshot-id":8096310958539014181,"refs":{"main":{"snapshot-id":8096310958539014181,"type":"branch"}},"snapshots":[{"sequence-number":1,"snapshot-id":8096310958539014181,"timestamp-ms":1757676429141,"summary":{"operation":"append","added-data-files":"1","added-records":"3","added-files-size":"705","changed-partition-count":"1","total-records":"3","total-files-size":"705","total-data-files":"1","total-delete-files":"0","total-position-deletes":"0","total-equality-deletes":"0","iceberg-version":"Apache Iceberg 1.9.2 (commit 071d5606bc6199a0be9b3f274ec7fbf111d88821)"},"manifest-list":"data/persistent/expression_filter/metadata/snap-8096310958539014181-1-8d30f58e-7333-4451-983d-eaf657a21a11.avro","schema-id":0}],"statistics":[],"partition-statistics":[],"snapshot-log":[{"timestamp-ms":1757676429141,"snapshot-id":8096310958539014181}],"metadata-log":[{"timestamp-ms":1757676428493,"metadata-file":"data/persistent/expression_filter/metadata/00000-acdf842e-3a9d-4b9b-ad87-daf78583a550.metadata.json"}]}
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
00001-19739cda-f528-4429-84cc-377ffdd24c75
27 changes: 23 additions & 4 deletions src/iceberg_predicate.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "iceberg_predicate.hpp"
#include "duckdb/planner/expression/bound_operator_expression.hpp"
#include "duckdb/planner/filter/constant_filter.hpp"
#include "duckdb/planner/filter/conjunction_filter.hpp"
#include "duckdb/planner/filter/null_filter.hpp"
Expand Down Expand Up @@ -80,16 +81,34 @@ bool MatchBoundsTemplated(const TableFilter &filter, const IcebergPredicateStats
return MatchBoundsIsNotNullFilter<TRANSFORM>(stats, transform);
}
case TableFilterType::EXPRESSION_FILTER: {
//! Expressions can be arbitrarily complex, and we currently only support IS NULL/IS NOT NULL checks against the
//! column itself, i.e. where the expression is a BOUND_OPERATOR with type OPERATOR_IS_NULL/_IS_NOT_NULL with a
//! single child expression of type BOUND_REF.
//!
//! See duckdb/duckdb-iceberg#464
auto &expression_filter = filter.Cast<ExpressionFilter>();
auto &expr = *expression_filter.expr;

if (expr.type != ExpressionType::OPERATOR_IS_NULL && expr.type != ExpressionType::OPERATOR_IS_NOT_NULL) {
return true;
}

D_ASSERT(expr.GetExpressionClass() == ExpressionClass::BOUND_OPERATOR);
auto &bound_operator_expr = expr.Cast<BoundOperatorExpression>();

D_ASSERT(bound_operator_expr.children.size() == 1);
auto &child_expr = bound_operator_expr.children[0];
if (child_expr->type != ExpressionType::BOUND_REF) {
//! We can't evaluate expressions that aren't direct column references
return true;
}

if (expr.type == ExpressionType::OPERATOR_IS_NULL) {
return MatchBoundsIsNullFilter<TRANSFORM>(stats, transform);
}
if (expr.type == ExpressionType::OPERATOR_IS_NOT_NULL) {
} else {
D_ASSERT(expr.type == ExpressionType::OPERATOR_IS_NOT_NULL);
return MatchBoundsIsNotNullFilter<TRANSFORM>(stats, transform);
}
//! Any other expression can not be filtered
return true;
}
default:
//! Conservative approach: we don't know what this is, just say it doesn't filter anything
Expand Down
40 changes: 40 additions & 0 deletions test/sql/local/iceberg_scans/expression_filter.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# name: test/sql/local/iceberg_scans/expression_filter.test
# group: [iceberg_scans]

require-env DUCKDB_ICEBERG_HAVE_GENERATED_DATA

require avro

require parquet

require iceberg

statement ok
attach ':memory:' as my_datalake;

statement ok
create schema my_datalake.default;

statement ok
create view my_datalake.default.expression_filter as select * from ICEBERG_SCAN('__WORKING_DIRECTORY__/data/persistent/expression_filter');

# baseline: select all rows
query II
SELECT * FROM my_datalake.default.expression_filter ORDER BY id ASC;
----
1 foo
2 bar
3 baz

# CASE expression should not exclude any data files (prior to #464, IS NULL on CASE expression column resulted in exclusion)
query II
SELECT id, CASE WHEN value = 'foo' THEN 'not null' ELSE NULL END AS role FROM my_datalake.default.expression_filter WHERE role IS NULL ORDER BY id ASC;
----
2 NULL
3 NULL

# Complement to previous query
query II
SELECT id, CASE WHEN value = 'foo' THEN 'not null' ELSE NULL END AS role FROM my_datalake.default.expression_filter WHERE role IS NOT NULL ORDER BY id ASC;
----
1 not null
Loading