@@ -286,12 +286,18 @@ ReaderInitializeType IcebergMultiFileReader::InitializeReader(MultiFileReaderDat
286
286
287
287
// ! Add the columns needed by the equality deletes if not present
288
288
auto new_global_column_ids = global_column_ids;
289
+ auto &equality_to_result_id = multi_file_list.equality_id_to_result_id ;
290
+ new_global_column_ids.resize (global_column_ids.size () + equality_to_result_id.size ());
289
291
for (auto field_id : equality_delete_ids) {
292
+ auto it = equality_to_result_id.find (field_id);
293
+ if (it == equality_to_result_id.end ()) {
294
+ // ! Already selected, no need to add
295
+ continue ;
296
+ }
290
297
auto global_column_id = id_to_global_column[field_id];
291
298
ColumnIndex equality_index (global_column_id);
292
- if (std::find (global_column_ids.begin (), global_column_ids.end (), equality_index) == global_column_ids.end ()) {
293
- new_global_column_ids.push_back (equality_index);
294
- }
299
+ // ! FIXME: is this correct?
300
+ new_global_column_ids[it->second ] = equality_index;
295
301
}
296
302
297
303
return CreateMapping (context, reader_data, global_columns, new_global_column_ids, table_filters, gstate.file_list ,
@@ -339,8 +345,7 @@ void IcebergMultiFileReader::FinalizeBind(MultiFileReaderData &reader_data, cons
339
345
void IcebergMultiFileReader::ApplyEqualityDeletes (ClientContext &context, DataChunk &output_chunk,
340
346
const IcebergMultiFileList &multi_file_list,
341
347
const IcebergManifestEntry &data_file,
342
- const vector<MultiFileColumnDefinition> &local_columns,
343
- const unordered_map<idx_t , idx_t > &field_id_to_result_id) {
348
+ const vector<MultiFileColumnDefinition> &local_columns) {
344
349
auto delete_rows = multi_file_list.GetEqualityDeletesForFile (data_file);
345
350
346
351
if (delete_rows.empty ()) {
@@ -384,25 +389,7 @@ void IcebergMultiFileReader::ApplyEqualityDeletes(ClientContext &context, DataCh
384
389
}
385
390
continue ;
386
391
}
387
- if (field_id_to_result_id.empty ()) {
388
- equalities.push_back (expression->Copy ());
389
- continue ;
390
- }
391
- idx_t index = field_id_to_result_id.at (field_id);
392
- if (expression->type == ExpressionType::COMPARE_NOTEQUAL) {
393
- auto &expr = expression->Cast <BoundComparisonExpression>();
394
- auto bound_ref = make_uniq<BoundReferenceExpression>(expr.left ->return_type , index);
395
- unique_ptr<Expression> equality_filter = make_uniq<BoundComparisonExpression>(
396
- ExpressionType::COMPARE_NOTEQUAL, std::move (bound_ref), expr.right ->Copy ());
397
- equalities.push_back (std::move (equality_filter));
398
- } else if (expression->type == ExpressionType::OPERATOR_IS_NOT_NULL) {
399
- auto &expr = expression->Cast <BoundOperatorExpression>();
400
- auto bound_ref = make_uniq<BoundReferenceExpression>(expr.children [0 ]->return_type , index);
401
- auto is_not_null =
402
- make_uniq<BoundOperatorExpression>(ExpressionType::OPERATOR_IS_NOT_NULL, LogicalType::BOOLEAN);
403
- is_not_null->children .push_back (std::move (bound_ref));
404
- equalities.push_back (std::move (is_not_null));
405
- }
392
+ equalities.push_back (expression->Copy ());
406
393
}
407
394
408
395
unique_ptr<Expression> filter;
@@ -440,43 +427,29 @@ void IcebergMultiFileReader::FinalizeChunk(ClientContext &context, const MultiFi
440
427
DataChunk &input_chunk, DataChunk &output_chunk,
441
428
ExpressionExecutor &executor,
442
429
optional_ptr<MultiFileReaderGlobalState> global_state) {
430
+ D_ASSERT (global_state);
431
+ // Get the metadata for this file
432
+ const auto &multi_file_list = global_state->file_list ->Cast <IcebergMultiFileList>();
443
433
444
434
// ! Add the extra equality delete fields to output chunk.
445
- int32_t diff = 0 ;
446
- if (executor.expressions .size () != output_chunk.ColumnCount ()) {
447
- diff = executor.expressions .size () - output_chunk.ColumnCount ();
448
- for (int32_t i = diff; i > 0 ; i--) {
449
- int32_t index = input_chunk.ColumnCount () - i;
450
- output_chunk.data .emplace_back (input_chunk.data [index]);
435
+ idx_t diff = executor.expressions .size () - output_chunk.ColumnCount ();
436
+ (void )diff;
437
+ D_ASSERT (diff == multi_file_list.equality_id_to_result_id .size ());
438
+ if (diff > 0 ) {
439
+ int32_t start = input_chunk.ColumnCount () - diff;
440
+ for (int32_t i = 0 ; i < diff; i++) {
441
+ output_chunk.data .emplace_back (input_chunk.data [start + i]);
451
442
}
452
443
}
453
444
454
445
// ! Base class finalization first
455
446
MultiFileReader::FinalizeChunk (context, bind_data, reader, reader_data, input_chunk, output_chunk, executor,
456
447
global_state);
457
448
458
- // ! Map from index into local_columns -> field_id
459
449
auto &local_columns = reader.columns ;
460
- unordered_map<idx_t , idx_t > column_index_to_field_id;
461
- for (idx_t i = 0 ; i < local_columns.size (); i++) {
462
- auto &col = local_columns[i];
463
- column_index_to_field_id[i] = col.identifier .GetValue <int32_t >();
464
- }
465
-
466
- // ! Map from field_id -> index in 'output_chunk'
467
- unordered_map<idx_t , idx_t > field_id_to_result_id;
468
- auto &column_indexes = reader.column_indexes ;
469
- auto result_id = executor.expressions .size () - column_indexes.size ();
470
- for (auto &column_index : column_indexes) {
471
- field_id_to_result_id[column_index_to_field_id[column_index.GetPrimaryIndex ()]] = result_id++;
472
- }
473
-
474
- D_ASSERT (global_state);
475
- // Get the metadata for this file
476
- const auto &multi_file_list = dynamic_cast <const IcebergMultiFileList &>(*global_state->file_list );
477
450
auto file_id = reader.file_list_idx .GetIndex ();
478
451
auto &data_file = multi_file_list.data_files [file_id];
479
- ApplyEqualityDeletes (context, output_chunk, multi_file_list, data_file, local_columns, field_id_to_result_id );
452
+ ApplyEqualityDeletes (context, output_chunk, multi_file_list, data_file, local_columns);
480
453
481
454
// ! Remove the extra columns we added to perform the equality delete filtering
482
455
for (idx_t i = 0 ; i < diff; i++) {
0 commit comments