Skip to content

Commit 5e6efac

Browse files
authored
fix: incorrect search of dangling deletes (#119)
1 parent 3ce126b commit 5e6efac

File tree

1 file changed

+8
-5
lines changed

1 file changed

+8
-5
lines changed

iceberg/tea_scan.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -742,9 +742,9 @@ class ScanMetadataBuilder {
742742
std::optional<std::string> max_data_path;
743743

744744
// to remove dangling positional delete file, we need to make sure that there are no data files in the range
745-
// [min_referenced_file, max_referenced_file]. Delete files in layer X are applied to data files in layers greater
745+
// [min_referenced_file, max_referenced_file]. Delete files in layer X are applied to data files in layers less
746746
// than or equal to X. To find all dangling deletes in one pass, we start from max layer
747-
for (auto it = layers.rbegin(); it != layers.rend(); ++it) {
747+
for (auto it = layers.begin(); it != layers.end(); ++it) {
748748
auto& [seqno, layer] = *it;
749749

750750
for (const auto& data_entry : layer.data_entries_) {
@@ -763,8 +763,12 @@ class ScanMetadataBuilder {
763763

764764
int64_t dangling_positional_delete_files = 0;
765765
for (const auto& pos_delete : layer.positional_delete_entries_) {
766-
bool has_stats =
767-
pos_delete.min_max_referenced_path_.has_value() && min_data_path.has_value() && max_data_path.has_value();
766+
bool has_data = min_data_path.has_value() && max_data_path.has_value();
767+
if (!has_data) {
768+
++dangling_positional_delete_files;
769+
continue;
770+
}
771+
bool has_stats = pos_delete.min_max_referenced_path_.has_value();
768772
if (has_stats) {
769773
const auto& [min_referenced_path, max_referenced_path] = *pos_delete.min_max_referenced_path_;
770774
if (*min_data_path > max_referenced_path || *max_data_path < min_referenced_path) {
@@ -794,7 +798,6 @@ class ScanMetadataBuilder {
794798
}
795799
}
796800

797-
std::reverse(partition.begin(), partition.end());
798801
if (min_data_path.has_value()) {
799802
result.partitions.emplace_back(std::move(partition));
800803
}

0 commit comments

Comments
 (0)