@@ -742,9 +742,9 @@ class ScanMetadataBuilder {
742742 std::optional<std::string> max_data_path;
743743
744744 // to remove dangling positional delete file, we need to make sure that there are no data files in the range
745- // [min_referenced_file, max_referenced_file]. Delete files in layer X are applied to data files in layers greater
745+ // [min_referenced_file, max_referenced_file]. Delete files in layer X are applied to data files in layers less
746746 // than or equal to X. To find all dangling deletes in one pass, we start from max layer
747- for (auto it = layers.rbegin (); it != layers.rend (); ++it) {
747+ for (auto it = layers.begin (); it != layers.end (); ++it) {
748748 auto & [seqno, layer] = *it;
749749
750750 for (const auto & data_entry : layer.data_entries_ ) {
@@ -763,8 +763,12 @@ class ScanMetadataBuilder {
763763
764764 int64_t dangling_positional_delete_files = 0 ;
765765 for (const auto & pos_delete : layer.positional_delete_entries_ ) {
766- bool has_stats =
767- pos_delete.min_max_referenced_path_ .has_value () && min_data_path.has_value () && max_data_path.has_value ();
766+ bool has_data = min_data_path.has_value () && max_data_path.has_value ();
767+ if (!has_data) {
768+ ++dangling_positional_delete_files;
769+ continue ;
770+ }
771+ bool has_stats = pos_delete.min_max_referenced_path_ .has_value ();
768772 if (has_stats) {
769773 const auto & [min_referenced_path, max_referenced_path] = *pos_delete.min_max_referenced_path_ ;
770774 if (*min_data_path > max_referenced_path || *max_data_path < min_referenced_path) {
@@ -794,7 +798,6 @@ class ScanMetadataBuilder {
794798 }
795799 }
796800
797- std::reverse (partition.begin (), partition.end ());
798801 if (min_data_path.has_value ()) {
799802 result.partitions .emplace_back (std::move (partition));
800803 }
0 commit comments