Skip to content

Commit 06c1966

Browse files
committed
Skip dangling deletes
1 parent 929623d commit 06c1966

File tree

1 file changed

+30
-4
lines changed

1 file changed

+30
-4
lines changed

iceberg/tea_scan.cpp

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -730,19 +730,45 @@ class ScanMetadataBuilder {
730730

731731
for (auto& [partition_key, partition_map] : partitions) {
732732
ScanMetadata::Partition partition;
733-
for (auto& [seqnum, layer] : partition_map) {
733+
734+
std::optional<std::string> min_data_path;
735+
std::optional<std::string> max_data_path;
736+
737+
for (auto it = partition_map.rbegin(); it != partition_map.rend(); ++it) {
738+
auto& [seqno, layer] = *it;
739+
740+
for (const auto& data_entry : layer.data_entries_) {
741+
if (!min_data_path.has_value() || *min_data_path > data_entry.path) {
742+
min_data_path = data_entry.path;
743+
}
744+
if (!max_data_path.has_value() || *max_data_path < data_entry.path) {
745+
max_data_path = data_entry.path;
746+
}
747+
}
748+
734749
ScanMetadata::Layer result_layer;
735750
result_layer.data_entries_ = std::move(layer.data_entries_);
736751
result_layer.equality_delete_entries_ = std::move(result_layer.equality_delete_entries_);
737752

738-
result_layer.positional_delete_entries_.reserve(layer.positional_delete_entries_.size());
739-
for (auto& pos_delete : layer.positional_delete_entries_) {
753+
for (const auto& pos_delete : layer.positional_delete_entries_) {
754+
bool has_stats =
755+
pos_delete.min_max_referenced_path_.has_value() && min_data_path.has_value() && max_data_path.has_value();
756+
if (has_stats) {
757+
const auto& [min_referenced_path, max_referenced_path] = *pos_delete.min_max_referenced_path_;
758+
if (*min_data_path > max_referenced_path || *max_data_path < min_referenced_path) {
759+
continue;
760+
}
761+
}
740762
result_layer.positional_delete_entries_.emplace_back(std::move(pos_delete.positional_delete_.path));
741763
}
742764

743765
partition.emplace_back(std::move(result_layer));
744766
}
745-
result.partitions.emplace_back(std::move(partition));
767+
768+
std::reverse(partition.begin(), partition.end());
769+
if (min_data_path.has_value()) {
770+
result.partitions.emplace_back(std::move(partition));
771+
}
746772
}
747773

748774
return result;

0 commit comments

Comments
 (0)