@@ -731,7 +731,16 @@ class ScanMetadataBuilder {
731731 for (auto & [partition_key, partition_map] : partitions) {
732732 ScanMetadata::Partition partition;
733733 for (auto & [seqnum, layer] : partition_map) {
734- partition.emplace_back (std::move (layer));
734+ ScanMetadata::Layer result_layer;
735+ result_layer.data_entries_ = std::move (layer.data_entries_ );
736+ result_layer.equality_delete_entries_ = std::move (result_layer.equality_delete_entries_ );
737+
738+ result_layer.positional_delete_entries_ .reserve (layer.positional_delete_entries_ .size ());
739+ for (auto & pos_delete : layer.positional_delete_entries_ ) {
740+ result_layer.positional_delete_entries_ .emplace_back (std::move (pos_delete.positional_delete_ .path ));
741+ }
742+
743+ partition.emplace_back (std::move (result_layer));
735744 }
736745 result.partitions .emplace_back (std::move (partition));
737746 }
@@ -779,7 +788,19 @@ class ScanMetadataBuilder {
779788 // - There is no deletion vector that must be applied to the data file (when added, such a vector must
780789 // contain
781790 // all deletes from existing position delete files)
782- AddPositionDeletes (serialized_partition_key, sequence_number, entry.data_file .file_path );
791+ std::optional<std::pair<std::string, std::string>> min_max_referenced_path;
792+ constexpr uint32_t kFilePathId = 2147483546 ;
793+ if (entry.data_file .lower_bounds .contains (kFilePathId ) && entry.data_file .upper_bounds .contains (kFilePathId )) {
794+ const std::vector<uint8_t >& min_bytes = entry.data_file .lower_bounds .at (kFilePathId );
795+ const std::vector<uint8_t >& max_bytes = entry.data_file .upper_bounds .at (kFilePathId );
796+
797+ std::string min_path (min_bytes.begin (), min_bytes.end ());
798+ std::string max_path (max_bytes.begin (), max_bytes.end ());
799+
800+ min_max_referenced_path.emplace (std::move (min_path), std::move (max_path));
801+ }
802+ AddPositionDeletes (serialized_partition_key, sequence_number, entry.data_file .file_path ,
803+ min_max_referenced_path);
783804 break ;
784805 }
785806
@@ -807,8 +828,10 @@ class ScanMetadataBuilder {
807828 }
808829
809830 virtual void AddPositionDeletes (const std::string& serialized_partition_key, SequenceNumber sequence_number,
810- const std::string& path) {
811- partitions[serialized_partition_key][sequence_number].positional_delete_entries_ .emplace_back (path);
831+ const std::string& path,
832+ const std::optional<std::pair<std::string, std::string>>& min_max_referenced_path) {
833+ partitions[serialized_partition_key][sequence_number].positional_delete_entries_ .emplace_back (
834+ path, min_max_referenced_path);
812835 }
813836
814837 virtual void AddGlobalEqualityDeletes (SequenceNumber sequence_number, const std::string& path,
@@ -850,7 +873,26 @@ class ScanMetadataBuilder {
850873 const TableMetadataV2& table_metadata_;
851874 std::shared_ptr<const iceberg::Schema> schema_;
852875
853- std::map<std::string, std::map<SequenceNumber, ScanMetadata::Layer>> partitions;
876+ struct PositionalDeleteWithExtraInfo {
877+ PositionalDeleteInfo positional_delete_;
878+ std::optional<std::pair<std::string, std::string>> min_max_referenced_path_;
879+
880+ PositionalDeleteWithExtraInfo (std::string path,
881+ std::optional<std::pair<std::string, std::string>> min_max_referenced_path)
882+ : positional_delete_(std::move(path)), min_max_referenced_path_(std::move(min_max_referenced_path)) {}
883+ };
884+
885+ struct LayerWithExtraInfo {
886+ std::vector<DataEntry> data_entries_;
887+ std::vector<PositionalDeleteWithExtraInfo> positional_delete_entries_;
888+ std::vector<EqualityDeleteInfo> equality_delete_entries_;
889+
890+ bool operator ==(const LayerWithExtraInfo& layer) const = default ;
891+
892+ bool Empty () const ;
893+ };
894+
895+ std::map<std::string, std::map<SequenceNumber, LayerWithExtraInfo>> partitions;
854896 // if there are k partitions and t global equality delete entries, k * t entries will be created
855897 // TODO(gmusya): improve
856898 std::map<SequenceNumber, std::vector<EqualityDeleteInfo>> global_equality_deletes;
@@ -868,9 +910,10 @@ class ScanMetadataBuilderMT : public ScanMetadataBuilder {
868910 }
869911
870912 void AddPositionDeletes (const std::string& serialized_partition_key, SequenceNumber sequence_number,
871- const std::string& path) override {
913+ const std::string& path,
914+ const std::optional<std::pair<std::string, std::string>>& min_max_referenced_path) override {
872915 std::lock_guard<std::mutex> guard (mutex_);
873- ScanMetadataBuilder::AddPositionDeletes (serialized_partition_key, sequence_number, path);
916+ ScanMetadataBuilder::AddPositionDeletes (serialized_partition_key, sequence_number, path, min_max_referenced_path );
874917 }
875918
876919 void AddGlobalEqualityDeletes (SequenceNumber sequence_number, const std::string& path,
0 commit comments