@@ -129,18 +129,26 @@ static Value GetParquetSchemaParam(vector<IcebergColumnDefinition> &schema) {
129
129
130
130
// ! Build the Parquet Scan expression for the files we need to scan
131
131
static unique_ptr<TableRef> MakeScanExpression (vector<Value> &data_file_values, vector<Value> &delete_file_values,
132
- vector<IcebergColumnDefinition> &schema, bool allow_moved_paths, string metadata_compression_codec, bool skip_schema_inference) {
132
+ vector<IcebergColumnDefinition> &schema, bool allow_moved_paths,
133
+ string metadata_compression_codec, bool skip_schema_inference,
134
+ int64_t data_cardinality, int64_t delete_cardinality) {
135
+
136
+ auto cardinality = make_uniq<ComparisonExpression>(ExpressionType::COMPARE_EQUAL, make_uniq<ColumnRefExpression>(" explicit_cardinality" ),
137
+ make_uniq<ConstantExpression>(Value (data_cardinality)));
138
+
133
139
// No deletes, just return a TableFunctionRef for a parquet scan of the data files
134
140
if (delete_file_values.empty ()) {
135
141
auto table_function_ref_data = make_uniq<TableFunctionRef>();
136
142
table_function_ref_data->alias = " iceberg_scan_data" ;
137
143
vector<unique_ptr<ParsedExpression>> left_children;
138
144
left_children.push_back (make_uniq<ConstantExpression>(Value::LIST (data_file_values)));
145
+ left_children.push_back (std::move (cardinality));
139
146
if (!skip_schema_inference) {
140
147
left_children.push_back (
141
148
make_uniq<ComparisonExpression>(ExpressionType::COMPARE_EQUAL, make_uniq<ColumnRefExpression>(" schema" ),
142
149
make_uniq<ConstantExpression>(GetParquetSchemaParam (schema))));
143
150
}
151
+
144
152
table_function_ref_data->function = make_uniq<FunctionExpression>(" parquet_scan" , std::move (left_children));
145
153
return std::move (table_function_ref_data);
146
154
}
@@ -165,6 +173,7 @@ static unique_ptr<TableRef> MakeScanExpression(vector<Value> &data_file_values,
165
173
table_function_ref_data->alias = " iceberg_scan_data" ;
166
174
vector<unique_ptr<ParsedExpression>> left_children;
167
175
left_children.push_back (make_uniq<ConstantExpression>(Value::LIST (data_file_values)));
176
+ left_children.push_back (std::move (cardinality));
168
177
left_children.push_back (make_uniq<ComparisonExpression>(ExpressionType::COMPARE_EQUAL,
169
178
make_uniq<ColumnRefExpression>(" filename" ),
170
179
make_uniq<ConstantExpression>(Value (1 ))));
@@ -184,6 +193,8 @@ static unique_ptr<TableRef> MakeScanExpression(vector<Value> &data_file_values,
184
193
table_function_ref_deletes->alias = " iceberg_scan_deletes" ;
185
194
vector<unique_ptr<ParsedExpression>> right_children;
186
195
right_children.push_back (make_uniq<ConstantExpression>(Value::LIST (delete_file_values)));
196
+ right_children.push_back (make_uniq<ComparisonExpression>(ExpressionType::COMPARE_EQUAL, make_uniq<ColumnRefExpression>(" explicit_cardinality" ),
197
+ make_uniq<ConstantExpression>(Value (delete_cardinality))));
187
198
table_function_ref_deletes->function = make_uniq<FunctionExpression>(" parquet_scan" , std::move (right_children));
188
199
join_node->right = std::move (table_function_ref_deletes);
189
200
@@ -269,7 +280,19 @@ static unique_ptr<TableRef> IcebergScanBindReplace(ClientContext &context, Table
269
280
if (mode == " list_files" ) {
270
281
return MakeListFilesExpression (data_file_values, delete_file_values);
271
282
} else if (mode == " default" ) {
272
- return MakeScanExpression (data_file_values, delete_file_values, snapshot_to_scan.schema , allow_moved_paths, metadata_compression_codec, skip_schema_inference);
283
+ int64_t data_cardinality = 0 , delete_cardinality = 0 ;
284
+ for (auto &manifest : iceberg_table.entries ) {
285
+ for (auto &entry : manifest.manifest_entries ) {
286
+ if (entry.status != IcebergManifestEntryStatusType::DELETED) {
287
+ if (entry.content == IcebergManifestEntryContentType::DATA) {
288
+ data_cardinality += entry.record_count ;
289
+ } else { // DELETES
290
+ delete_cardinality += entry.record_count ;
291
+ }
292
+ }
293
+ }
294
+ }
295
+ return MakeScanExpression (data_file_values, delete_file_values, snapshot_to_scan.schema , allow_moved_paths, metadata_compression_codec, skip_schema_inference, data_cardinality, delete_cardinality);
273
296
} else {
274
297
throw NotImplementedException (" Unknown mode type for ICEBERG_SCAN bind : '" + mode + " '" );
275
298
}
0 commit comments