From 2e763944db14e77c9d4336ce71cb7e08b0b068e7 Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 31 Oct 2022 20:37:51 +0100 Subject: [PATCH 001/125] sketching the new `mapper` class --- eodal/core/raster.py | 4 +- eodal/mapper/__init__.py | 0 eodal/mapper/features.py | 56 ++++++++++++++++++++ eodal/mapper/filters.py | 73 ++++++++++++++++++++++++++ eodal/metadata/utils.py | 2 +- eodal/operational/mapping/mapper.py | 16 +++--- eodal/operational/mapping/sentinel1.py | 2 +- eodal/operational/mapping/sentinel2.py | 4 +- 8 files changed, 143 insertions(+), 14 deletions(-) create mode 100644 eodal/mapper/__init__.py create mode 100644 eodal/mapper/features.py create mode 100644 eodal/mapper/filters.py diff --git a/eodal/core/raster.py b/eodal/core/raster.py index 82236cd8..b841ff8b 100644 --- a/eodal/core/raster.py +++ b/eodal/core/raster.py @@ -354,14 +354,14 @@ def collection(self, value): @check_band_names def get_band_alias(self, band_name: str) -> Union[Dict[str, str], None]: """ - Retuns the band_name-alias mapping of a given band + Retuns the band_name-alias mapper of a given band in collection if the band has an alias, None instead :param band_name: name of the band for which to return the alias or its name if the alias is provided :returns: - mapping of band_name:band_alias (band name is always the + mapper of band_name:band_alias (band name is always the key and band_alias is the value) """ if self[band_name].has_alias: diff --git a/eodal/mapper/__init__.py b/eodal/mapper/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/eodal/mapper/features.py b/eodal/mapper/features.py new file mode 100644 index 00000000..464f3661 --- /dev/null +++ b/eodal/mapper/features.py @@ -0,0 +1,56 @@ +''' +Module defining geographic features for mapping. + +Copyright (C) 2022 Lukas Valentin Graf + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +''' + +class Feature: + """ + Generic class for geographic features + + :attrib name: + name of the feature (used for identification) + :attrib geometry: + `shapely` geometry of the feature in a spatial reference system + :attrib crs: + spatial coordinate reference system of the feature + """ + def __init__(self, name: str, geometry, crs): + """ + Class constructor + """ + self._name = name + self._geometry = geometry + self._crs = crs + + def __repr__(self) -> str: + return f'Feature name: {self.name}\nFeature Geometry: ' + \ + f'{self.geometry} (CRS: {self.crs})' + + @property + def name(self) -> str: + """the feature name""" + return self._name + + @property + def geometry(self): + """the feature geometry""" + return self._geometry + + @property + def crs(self): + """the feature coordinate reference system""" + return self._crs \ No newline at end of file diff --git a/eodal/mapper/filters.py b/eodal/mapper/filters.py new file mode 100644 index 00000000..6dc72f15 --- /dev/null +++ b/eodal/mapper/filters.py @@ -0,0 +1,73 @@ +''' +Predefined filters for EO data selection by their metadata. + +Copyright (C) 2022 Lukas Valentin Graf + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +''' + + +class Filter: + """ + The generic filter class. + + :attrib entity: + metadata entity to use for filtering + :attrib condition: + condition that must be met to keep a metadata item in the + selection + """ + def __init__(self, entity: str, condition: str): + """ + Constructor method + + :param entity: + metadata entity to use for filtering + :param condition: + condition that must be met to keep a metadata item in the + selection + """ + self._entity = entity + self._condition = condition + + def __repr__(self) -> str: + return f'Filter by {self.entity} {self.condition}' + + def __add__(self, other): + # IDEA: filters can be combined using logical AND and OR + pass + + @property + def entity(self) -> str: + return self._entity + + @property + def condition(self) -> str: + return self._condition + +class GeoFilter(Filter): + pass + +class TimeFilter(Filter): + pass + +class QualityFilter(Filter): + pass + + +if __name__ == '__main__': + + cc_filter = Filter(entity='cloudy_pixel_percentage', condition='<30') + cc_filter + \ No newline at end of file diff --git a/eodal/metadata/utils.py b/eodal/metadata/utils.py index 2d8b9e62..fb459fa2 100644 --- a/eodal/metadata/utils.py +++ b/eodal/metadata/utils.py @@ -45,7 +45,7 @@ def _check_linux_cifs(ip: Union[str, Path]) -> Path: lines = response.split("\n") for line in lines: if str(ip) in line: - # data is on mounted share -> get local file system mapping + # data is on mounted share -> get local file system mapper local_path = line.split(" ")[1] # check if current user has access to read local path, otherwise keep # searching (might happen if another user manually mounts the NAS) diff --git a/eodal/operational/mapping/mapper.py b/eodal/operational/mapping/mapper.py index 8b4d0a6e..1d7e7f66 100644 --- a/eodal/operational/mapping/mapper.py +++ b/eodal/operational/mapping/mapper.py @@ -1,5 +1,5 @@ """ -Generic mapping module +Generic mapper module Copyright (C) 2022 Lukas Valentin Graf @@ -58,7 +58,7 @@ class Feature(object): :attrib epsg: epsg code of the feature's geometry :attrib properties: - any key-value dictionary like mapping of feature properties + any key-value dictionary like mapper of feature properties (e.g., its name or other attributes spoken in terms of an ESRI shapefile's table of attributes) """ @@ -80,7 +80,7 @@ def __init__( :param epsg: epsg code of the feature's geometry :param properties: - any key-value dictionary like mapping of feature properties + any key-value dictionary like mapper of feature properties (e.g., its name or other attributes spoken in terms of an ESRI shapefile's table of attributes) """ @@ -127,7 +127,7 @@ class MapperConfigs(object): :attrib band_names: names of raster bands to process from each dataset found during the - mapping process + mapper process :attrib resampling_method: resampling might become necessary when the spatial resolution changes. Nearest neighbor by default. @@ -154,7 +154,7 @@ def __init__( :param band_names: names of raster bands to process from each dataset found during the - mapping process + mapper process :param resampling_method: resampling might become necessary when the spatial resolution changes. Nearest neighbor by default. @@ -199,7 +199,7 @@ class Mapper(object): feature (AOI) uniquely identifiable. If None (default) the features are labelled by a unique-identifier created on the fly. :attrib mapping_configs: - Mapping configurations specified by `~eodal.operational.mapping.MapperConfigs`. + Mapping configurations specified by `~eodal.operational.mapper.MapperConfigs`. Uses default configurations if not provided. :attrib observations: data structure for storing DB query results per AOI. @@ -232,7 +232,7 @@ def __init__( feature (AOI) uniquely identifiable. If None (default) the features are labelled by a unique-identifier created on the fly. :param mapping_configs: - Mapping configurations specified by `~eodal.operational.mapping.MapperConfigs`. + Mapping configurations specified by `~eodal.operational.mapper.MapperConfigs`. Uses default configurations if not provided. """ object.__setattr__(self, "date_start", date_start) @@ -370,7 +370,7 @@ def _get_scenes(self, sensor: str) -> None: def _prepare_features(self) -> pd.DataFrame: """ - Prepares the feature collection for mapping + Prepares the feature collection for mapper :returns: `DataFrame` with prepared features diff --git a/eodal/operational/mapping/sentinel1.py b/eodal/operational/mapping/sentinel1.py index 514d650d..ce228529 100644 --- a/eodal/operational/mapping/sentinel1.py +++ b/eodal/operational/mapping/sentinel1.py @@ -18,7 +18,7 @@ class Sentinel1Mapper(Mapper): """ - Spatial mapping class for Sentinel-1 data. + Spatial mapper class for Sentinel-1 data. """ def __init__( self, diff --git a/eodal/operational/mapping/sentinel2.py b/eodal/operational/mapping/sentinel2.py index 5307203c..042a69d3 100644 --- a/eodal/operational/mapping/sentinel2.py +++ b/eodal/operational/mapping/sentinel2.py @@ -45,7 +45,7 @@ class Sentinel2Mapper(Mapper): """ - Spatial mapping class for Sentinel-2 data. + Spatial mapper class for Sentinel-2 data. :attrib processing_level: Sentinel-2 data processing level (L1C or L2A) @@ -106,7 +106,7 @@ def get_scenes(self) -> None: NOTE: By passing a list of Sentinel-2 tiles you can explicitly control which Sentinel-2 tiles are considered. This might be useful for - mapping tasks where your feature collection lies completely within + mapper tasks where your feature collection lies completely within a single Sentinel-2 tile but also overlaps with neighboring tiles. The scene selection and processing workflow contains several steps: From 4f592509882989115689cbd4166879e24cb1e714 Mon Sep 17 00:00:00 2001 From: lukas Date: Thu, 3 Nov 2022 20:20:38 +0100 Subject: [PATCH 002/125] introducing vector features as attribute and remove obselete `alias` --- eodal/core/band.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/eodal/core/band.py b/eodal/core/band.py index 5295a849..aee3993a 100644 --- a/eodal/core/band.py +++ b/eodal/core/band.py @@ -362,6 +362,9 @@ class Band(object): `rasterio` compatible representation of essential image metadata :attrib transform: `Affine` transform representation of the image geo-localisation + :attrib vector_features: + `geopandas.GeoDataFrame` with vector features used for reading the image + (clipping or masking). Can be None if no features were used for reading. """ def __init__( @@ -377,7 +380,7 @@ def __init__( nodata: Optional[Union[int, float]] = None, is_tiled: Optional[Union[int, bool]] = 0, area_or_point: Optional[str] = "Area", - alias: Optional[str] = "" + vector_features: Optional[gpd.GeoDataFrame] = None ): """ Constructor to instantiate a new band object. @@ -424,8 +427,10 @@ def __init__( `Point`. When `Area` pixel coordinates refer to the upper left corner of the pixel, whereas `Point` indicates that pixel coordinates are from the center of the pixel. - :param alias: - band alias name (optional). + :param vector_features: + `geopandas.GeoDataFrame` with vector features used for reading the image + (clipping or masking). Can be None if no features were used for reading + (optional). """ # make sure the passed values are 2-dimensional @@ -441,6 +446,13 @@ def __init__( elif values.dtype in ["uint8", "uint16", "uint32", "uint64"]: nodata = 0 + # make sure vector features is a valid GeoDataFrame + if vector_features is not None: + if vector_features.crs is None: + raise ValueError( + f'Cannot handle vector features without spatial coordinate reference system' + ) + object.__setattr__(self, "band_name", band_name) object.__setattr__(self, "values", values) object.__setattr__(self, "geo_info", geo_info) @@ -452,6 +464,7 @@ def __init__( object.__setattr__(self, "nodata", nodata) object.__setattr__(self, "is_tiled", is_tiled) object.__setattr__(self, "area_or_point", area_or_point) + object.__setattr__(self, "vector_features", vector_features) def __setattr__(self, *args, **kwargs): raise TypeError("Band object attributes are immutable") @@ -519,6 +532,11 @@ def crs(self) -> CRS: """Coordinate Reference System of the band""" return CRS.from_epsg(self.geo_info.epsg) + @property + def vector_features(self) -> None | gpd.GeoDataFrame: + """vector features used for reading or reducing band data""" + return self.vector_features + @property def has_alias(self) -> bool: """Checks if a color name can be used for aliasing""" From dc5a27a3cd2cdbec19d9b1d12b2e5c48b3934c97 Mon Sep 17 00:00:00 2001 From: lukas Date: Thu, 3 Nov 2022 20:26:16 +0100 Subject: [PATCH 003/125] adding attribute `vector_features` (untested) --- eodal/core/band.py | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/eodal/core/band.py b/eodal/core/band.py index aee3993a..c2dc721e 100644 --- a/eodal/core/band.py +++ b/eodal/core/band.py @@ -447,12 +447,8 @@ def __init__( nodata = 0 # make sure vector features is a valid GeoDataFrame - if vector_features is not None: - if vector_features.crs is None: - raise ValueError( - f'Cannot handle vector features without spatial coordinate reference system' - ) - + self._check_vector_features(vector_features) + object.__setattr__(self, "band_name", band_name) object.__setattr__(self, "values", values) object.__setattr__(self, "geo_info", geo_info) @@ -537,6 +533,12 @@ def vector_features(self) -> None | gpd.GeoDataFrame: """vector features used for reading or reducing band data""" return self.vector_features + @vector_features.setter + def vector_features(self, features: Optional[gpd.GeoDataFrame]): + """set vector features for reducing band data""" + self._check_vector_features(vector_features=features) + object.__setattr__(self, "vector_features", features) + @property def has_alias(self) -> bool: """Checks if a color name can be used for aliasing""" @@ -587,6 +589,17 @@ def transform(self) -> Affine: """Affine transformation of the band""" return self.geo_info.as_affine() + @staticmethod + def _check_vector_features(vector_features: None | gpd.GeoDataFrame) -> None: + """ + Asserts that passed GeoDataFrame has a CRS + """ + if vector_features is not None: + if vector_features.crs is None: + raise ValueError( + f'Cannot handle vector features without spatial coordinate reference system' + ) + @staticmethod def _get_pixel_geometries( vector_features: Union[Path, gpd.GeoDataFrame], From e670670dfe0d3d07fc9e53bf5cecefe569f8f83a Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 8 Nov 2022 11:45:52 +0100 Subject: [PATCH 004/125] fixed invalid recursion due to overwriting of names --- eodal/core/band.py | 6 +++--- .../{local_archive_query.py => satellite_archive_query.py} | 0 2 files changed, 3 insertions(+), 3 deletions(-) rename examples/{local_archive_query.py => satellite_archive_query.py} (100%) diff --git a/eodal/core/band.py b/eodal/core/band.py index c2dc721e..4379cf14 100644 --- a/eodal/core/band.py +++ b/eodal/core/band.py @@ -529,12 +529,12 @@ def crs(self) -> CRS: return CRS.from_epsg(self.geo_info.epsg) @property - def vector_features(self) -> None | gpd.GeoDataFrame: + def features(self) -> None | gpd.GeoDataFrame: """vector features used for reading or reducing band data""" return self.vector_features - @vector_features.setter - def vector_features(self, features: Optional[gpd.GeoDataFrame]): + @features.setter + def features(self, features: Optional[gpd.GeoDataFrame]): """set vector features for reducing band data""" self._check_vector_features(vector_features=features) object.__setattr__(self, "vector_features", features) diff --git a/examples/local_archive_query.py b/examples/satellite_archive_query.py similarity index 100% rename from examples/local_archive_query.py rename to examples/satellite_archive_query.py From 1a55c9998a5eda53538f6ac8e6bd7d4cecc1b6d5 Mon Sep 17 00:00:00 2001 From: lukas Date: Fri, 11 Nov 2022 15:03:12 +0100 Subject: [PATCH 005/125] improvements in syntax and return types --- eodal/operational/cli/sentinel2.py | 12 ++++-- examples/satellite_archive_query.py | 59 +++++++++++++++++++++++++---- 2 files changed, 59 insertions(+), 12 deletions(-) diff --git a/eodal/operational/cli/sentinel2.py b/eodal/operational/cli/sentinel2.py index ccf82f7f..eb598f27 100644 --- a/eodal/operational/cli/sentinel2.py +++ b/eodal/operational/cli/sentinel2.py @@ -24,6 +24,7 @@ import matplotlib.pyplot as plt import numpy as np +import pandas as pd import shutil from datetime import date @@ -47,7 +48,6 @@ logger = get_settings().logger - def cli_s2_pipeline_fun( processed_data_archive: Path, date_start: date, @@ -392,7 +392,7 @@ def cli_s2_scene_selection( processing_level: ProcessingLevels, out_dir: Path, cloud_cover_threshold: Optional[Union[int, float]] = 100, -) -> None: +) -> pd.DataFrame: """ Function to query the Sentinel-2 metadata using a set of search criteria, including filtering by date range, cloud cover and Sentinel-2 tile. @@ -415,6 +415,8 @@ def cli_s2_scene_selection( :param cloud_cover_threshold: optional cloud cover threshold to filter out to cloudy scenes as integer between 0 and 100%. + :returns: + metadata of scenes """ # query metadata from database @@ -439,7 +441,7 @@ def cli_s2_scene_selection( metadata.to_csv(out_dir.joinpath(f"{query_time}_query.csv"), index=False) # Plot available scenes for query - fig = plt.figure(figsize=(8, 6), dpi=300) + fig = plt.figure(figsize=(15, 10), dpi=300) ax = fig.add_subplot(111) ax.plot( metadata["sensing_date"], @@ -456,4 +458,6 @@ def cli_s2_scene_selection( + f"Average cloud cover: {np.round(cc_avg, 2)}%" ) plt.savefig(out_dir.joinpath(f"{query_time}_query_CCplot.png"), bbox_inches="tight") - plt.close() + plt.close(fig) + + return metadata diff --git a/examples/satellite_archive_query.py b/examples/satellite_archive_query.py index 397b8092..e649b593 100644 --- a/examples/satellite_archive_query.py +++ b/examples/satellite_archive_query.py @@ -25,26 +25,48 @@ along with this program. If not, see . """ -import os -from datetime import datetime +import matplotlib +import matplotlib.pyplot as plt +import pandas as pd + +from datetime import date, datetime from pathlib import Path from eodal.operational.cli import cli_s2_scene_selection from eodal.utils.constants import ProcessingLevels +# settings for plotting +plt.style.use('seaborn-darkgrid') +matplotlib.rc('ytick', labelsize=16) +matplotlib.rc('xtick', labelsize=16) +matplotlib.rc('font', size=16) + # user inputs tile = 'T32TLT' processing_level = ProcessingLevels.L2A -out_dir = Path('/mnt/ides/Lukas/03_Debug') -date_start = '2021-10-01' -date_end = '2022-05-18' -cc_threshold = 80. +out_dir = Path(f'../data') +date_start = '2019-01-01' +date_end = '2020-12-31' +cc_threshold = 100. # date range date_start = datetime.strptime(date_start, '%Y-%m-%d') date_end = datetime.strptime(date_end, '%Y-%m-%d') -# execute scene selection -cli_s2_scene_selection( +# execute scene selection (plots cloud cover over time) +metadata = cli_s2_scene_selection( + tile=tile, + processing_level=processing_level, + cloud_cover_threshold=cc_threshold, + date_start=date_start, + date_end=date_end, + out_dir=Path(out_dir) +) + +# plot cloud cover by month +date_start = date(2017,1,1) +date_end = date(2021,12,31) + +metadata = cli_s2_scene_selection( tile=tile, processing_level=processing_level, cloud_cover_threshold=cc_threshold, @@ -52,3 +74,24 @@ date_end=date_end, out_dir=Path(out_dir) ) + +# group by month and plot the average cloud cover +metadata_monthly = metadata['cloudy_pixel_percentage'].groupby( + by=pd.to_datetime(metadata.sensing_date).dt.month).agg('median') +metadata_monthly = metadata_monthly.reset_index() +months = {1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6: 'Jun', 7: 'Jul', 8: 'Aug', 9: 'Sep', + 10: 'Oct', 11: 'Nov', 12: 'Dec'} +metadata_monthly.sensing_date = metadata_monthly.sensing_date.map(months) + +f, ax = plt.subplots(figsize=(8,6)) +ax.plot(metadata_monthly.sensing_date, metadata_monthly.cloudy_pixel_percentage, + marker='o', label='Median') +ax.set_xlabel('Month', fontsize=16) +ax.set_ylabel('Cloudy Pixel Percentage [%]', fontsize=16) +ax.set_title(f'Sentinel-2 Tile {tile[1::]} ({date_start} - {date_end})\nNumber of Scenes: {metadata.shape[0]}', + size=18) +ax.set_ylim(0,100) +ax.set_xlim(-1,12) +ax.legend(fontsize=16) +f.savefig(out_dir.joinpath(f'monthly_cloudy_pixel_percentage_{date_start}-{date_end}.png'), bbox_inches='tight') + From ef0c43884b6f0bfbfd47b0ee84766565dcbcf843 Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 13 Nov 2022 10:27:05 +0100 Subject: [PATCH 006/125] drafting next-generation of mapping capabilities --- eodal/{mapper => scenes}/__init__.py | 0 eodal/{mapper/features.py => scenes/feature.py} | 0 eodal/{mapper/filters.py => scenes/filter.py} | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename eodal/{mapper => scenes}/__init__.py (100%) rename eodal/{mapper/features.py => scenes/feature.py} (100%) rename eodal/{mapper/filters.py => scenes/filter.py} (100%) diff --git a/eodal/mapper/__init__.py b/eodal/scenes/__init__.py similarity index 100% rename from eodal/mapper/__init__.py rename to eodal/scenes/__init__.py diff --git a/eodal/mapper/features.py b/eodal/scenes/feature.py similarity index 100% rename from eodal/mapper/features.py rename to eodal/scenes/feature.py diff --git a/eodal/mapper/filters.py b/eodal/scenes/filter.py similarity index 100% rename from eodal/mapper/filters.py rename to eodal/scenes/filter.py From 0d1743acd5fcf263ee47642f632b09681fa90404 Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 13 Nov 2022 10:29:02 +0100 Subject: [PATCH 007/125] minor beautification --- eodal/metadata/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eodal/metadata/utils.py b/eodal/metadata/utils.py index fb459fa2..0fdb6976 100644 --- a/eodal/metadata/utils.py +++ b/eodal/metadata/utils.py @@ -45,7 +45,7 @@ def _check_linux_cifs(ip: Union[str, Path]) -> Path: lines = response.split("\n") for line in lines: if str(ip) in line: - # data is on mounted share -> get local file system mapper + # data is on mounted share -> get local file system scenes local_path = line.split(" ")[1] # check if current user has access to read local path, otherwise keep # searching (might happen if another user manually mounts the NAS) From 0cd610c2de53a5e4fa431aa04e1cc15fe383d3ca Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 13 Nov 2022 10:29:34 +0100 Subject: [PATCH 008/125] small experiment --- examples/random_sentinel2_pixels.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/random_sentinel2_pixels.py b/examples/random_sentinel2_pixels.py index 77329c94..26458e3b 100644 --- a/examples/random_sentinel2_pixels.py +++ b/examples/random_sentinel2_pixels.py @@ -72,14 +72,14 @@ def get_pixels(date_start: date, date_end: date, scene_cloud_cover_threshold: in :param aois: areas of interest (1 to N) for which to extract random pixel observations """ - # setup Sentinel-2 mapper to get the relevant scenes + # setup Sentinel-2 scenes to get the relevant scenes mapper_configs = MapperConfigs( spatial_resolution=10., resampling_method=cv2.INTER_NEAREST_EXACT, band_names=['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A', 'B09', 'B11', 'B12'] ) - # get a new mapper instance + # get a new scenes instance mapper = Sentinel2Mapper( date_start=date_start, date_end=date_end, From 425d6dc094abbeac2a97cc2006d22519d89e7cf9 Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 13 Nov 2022 10:32:00 +0100 Subject: [PATCH 009/125] typos --- eodal/operational/mapping/mapper.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/eodal/operational/mapping/mapper.py b/eodal/operational/mapping/mapper.py index 1d7e7f66..8ba3cdeb 100644 --- a/eodal/operational/mapping/mapper.py +++ b/eodal/operational/mapping/mapper.py @@ -58,7 +58,7 @@ class Feature(object): :attrib epsg: epsg code of the feature's geometry :attrib properties: - any key-value dictionary like mapper of feature properties + any key-value dictionary-like mapping of feature properties (e.g., its name or other attributes spoken in terms of an ESRI shapefile's table of attributes) """ @@ -80,7 +80,7 @@ def __init__( :param epsg: epsg code of the feature's geometry :param properties: - any key-value dictionary like mapper of feature properties + any key-value dictionary-like mapping of feature properties (e.g., its name or other attributes spoken in terms of an ESRI shapefile's table of attributes) """ @@ -126,8 +126,7 @@ class MapperConfigs(object): Class defining configurations for the ``Mapper`` class :attrib band_names: - names of raster bands to process from each dataset found during the - mapper process + names of raster bands to process from each dataset found :attrib resampling_method: resampling might become necessary when the spatial resolution changes. Nearest neighbor by default. @@ -153,8 +152,7 @@ def __init__( Constructs a new ``MapperConfig`` instance. :param band_names: - names of raster bands to process from each dataset found during the - mapper process + names of raster bands to process from each dataset found :param resampling_method: resampling might become necessary when the spatial resolution changes. Nearest neighbor by default. From 5254baa0c1b00b6ce4ccd92531f68699b95281c7 Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 13 Nov 2022 10:32:26 +0100 Subject: [PATCH 010/125] fixed typos in documentation --- eodal/core/raster.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eodal/core/raster.py b/eodal/core/raster.py index b841ff8b..9b0cb2fe 100644 --- a/eodal/core/raster.py +++ b/eodal/core/raster.py @@ -354,14 +354,14 @@ def collection(self, value): @check_band_names def get_band_alias(self, band_name: str) -> Union[Dict[str, str], None]: """ - Retuns the band_name-alias mapper of a given band + Retuns the band_name-alias scenes of a given band in collection if the band has an alias, None instead :param band_name: name of the band for which to return the alias or its name if the alias is provided :returns: - mapper of band_name:band_alias (band name is always the + mapping of band_name:band_alias (band name is always the key and band_alias is the value) """ if self[band_name].has_alias: From 098df86d99453c04ae5d0d2a0a1f9d87d1e5dee3 Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 13 Nov 2022 10:32:40 +0100 Subject: [PATCH 011/125] drafting SceneCollection class --- eodal/scenes/feature.py | 158 ++++++++++++++++++++++++++++--- eodal/scenes/filter.py | 16 +--- eodal/scenes/scene_collection.py | 50 ++++++++++ 3 files changed, 195 insertions(+), 29 deletions(-) create mode 100644 eodal/scenes/scene_collection.py diff --git a/eodal/scenes/feature.py b/eodal/scenes/feature.py index 464f3661..2a6d9c5b 100644 --- a/eodal/scenes/feature.py +++ b/eodal/scenes/feature.py @@ -17,40 +17,170 @@ along with this program. If not, see . ''' +import geopandas as gpd +import pandas as pd + +from shapely.geometry import MultiPoint, MultiPolygon, Point, Polygon +from typing import Any, Dict, Optional + +allowed_geom_types = [MultiPoint, MultiPolygon, Point, Polygon] + class Feature: """ - Generic class for geographic features + Generic class for a geographic feature :attrib name: name of the feature (used for identification) :attrib geometry: `shapely` geometry of the feature in a spatial reference system - :attrib crs: - spatial coordinate reference system of the feature + :attrib epgs: + spatial coordinate reference system of the feature as EPSG code + :attrib attributes: + optional attributes of the feature """ - def __init__(self, name: str, geometry, crs): + def __init__( + self, + name: str, + geometry: MultiPoint | MultiPolygon | Point | Polygon, + epsg: int, + attributes: Optional[Dict[str, Any] | pd.Series] = {} + ): """ Class constructor + + :param name: + name of the feature (used for identification) + :param geometry: + `shapely` geometry of the feature in a spatial reference system + :param epgs: + spatial coordinate reference system of the feature as EPSG code + :param attributes: + optional attributes of the feature """ + # check inputs + if name == '': + raise ValueError(f'Empty feature names are not allowed') + if type(geometry) not in allowed_geom_types: + raise ValueError(f'geometry must of type {",".join(allowed_geom_types)}') + if type(epsg) != int or epsg <= 0: + raise ValueError('EPSG code must be a positive integer value') + if not isinstance(attributes, pd.Series) and not isinstance(attributes, dict): + raise ValueError('Attributes must pd.Series or dictionary') + self._name = name self._geometry = geometry - self._crs = crs + self._epsg = epsg + self._attributes = attributes def __repr__(self) -> str: - return f'Feature name: {self.name}\nFeature Geometry: ' + \ - f'{self.geometry} (CRS: {self.crs})' + return f'Name\t\t{self.name}\nGeometry\t' + \ + f'{self.geometry}\nEPSG Code\t{self.epsg}' + \ + f'\nAttributes\t{self.attributes}' @property - def name(self) -> str: - """the feature name""" - return self._name + def attributes(self) -> Dict: + """feature attributes""" + if isinstance(self._attributes, pd.Series): + return self._attributes.to_dict() + else: + return self._attributes @property - def geometry(self): + def epsg(self) -> int: + """the feature coordinate reference system as EPSG code""" + return self._epsg + + @property + def geometry(self) -> MultiPoint | MultiPolygon | Point | Polygon: """the feature geometry""" return self._geometry @property - def crs(self): - """the feature coordinate reference system""" - return self._crs \ No newline at end of file + def name(self) -> str: + """the feature name""" + return self._name + + @classmethod + def from_geoseries(cls, gds: gpd.GeoSeries): + """ + Feature object from `GeoSeries` + + :param gds: + `GeoSeries` to cast to Feature + :returns: + Feature instance created from input `GeoSeries` + """ + return cls( + name=gds.name, + geometry=gds.geometry.values[0], + epsg=gds.crs.to_epsg(), + attributes=gds.attrs + ) + + def to_epsg(self, epsg: int): + """ + Projects the feature into a different spatial reference system + identified by an EPSG code. Returns a copy of the Feature with + transformed coordinates. + + :param epsg: + EPSG code of the reference system the feature is project to + :returns: + new Feature instance in the target spatial reference system + """ + gds = self.to_geoseries() + gds_projected = gds.to_crs(epsg=epsg) + return Feature.from_geoseries(gds_projected) + + def to_geoseries(self) -> gpd.GeoSeries: + """ + Casts the feature to a GeoSeries object + + :returns: + Feature object casted as `GeoSeries` + """ + gds = gpd.GeoSeries([self.geometry], crs=f'EPSG:{self.epsg}') + # add attributes from Feature + gds.attrs = self.attributes + # set name of Feature to GeoSeries + gds.name = self.name + return gds + +if __name__ == '__main__': + + # working constructor calls + geom = Point([49,11]) + epsg = 4326 + name = 'Test Point' + feature = Feature(name, geom, epsg) + + assert feature.geometry == geom, 'geometry differs' + assert feature.epsg == epsg, 'EPSG code differs' + assert feature.name == name, 'name differs' + assert feature.attributes == {}, 'attributes must be empty' + + attributes = {'key': 'value'} + feature = Feature(name, geom, epsg, attributes) + assert feature.attributes == attributes, 'attributes differ' + + attributes = pd.Series({'key1': 'value1', 'key2': 'value2'}) + feature = Feature(name, geom, epsg, attributes) + assert feature.attributes == attributes.to_dict(), 'attributes differ' + + gds = feature.to_geoseries() + assert gds.name == feature.name, 'name differs' + assert gds.crs.to_epsg() == feature.epsg, 'EPSG differs' + assert gds.attrs == feature.attributes, 'attributes differ' + + # from_geoseries class method + gds.attrs = {} + feature = Feature.from_geoseries(gds) + assert gds.name == feature.name, 'name differs' + assert gds.crs.to_epsg() == feature.epsg, 'EPSG differs' + assert gds.attrs == feature.attributes, 'attributes differ' + + # project into another spatial reference system + feature_utm = feature.to_epsg(epsg=32632) + assert feature_utm.epsg == 32632, 'projection had no effect' + assert feature_utm.name == feature.name, 'name got lost' + assert feature_utm.attributes == feature.attributes, 'attributes got lost' diff --git a/eodal/scenes/filter.py b/eodal/scenes/filter.py index 6dc72f15..52c7cfba 100644 --- a/eodal/scenes/filter.py +++ b/eodal/scenes/filter.py @@ -1,5 +1,5 @@ ''' -Predefined filters for EO data selection by their metadata. +Predefined filters for EO data selection by metadata. Copyright (C) 2022 Lukas Valentin Graf @@ -44,10 +44,6 @@ def __init__(self, entity: str, condition: str): def __repr__(self) -> str: return f'Filter by {self.entity} {self.condition}' - def __add__(self, other): - # IDEA: filters can be combined using logical AND and OR - pass - @property def entity(self) -> str: return self._entity @@ -56,16 +52,6 @@ def entity(self) -> str: def condition(self) -> str: return self._condition -class GeoFilter(Filter): - pass - -class TimeFilter(Filter): - pass - -class QualityFilter(Filter): - pass - - if __name__ == '__main__': cc_filter = Filter(entity='cloudy_pixel_percentage', condition='<30') diff --git a/eodal/scenes/scene_collection.py b/eodal/scenes/scene_collection.py new file mode 100644 index 00000000..1fdd4f6b --- /dev/null +++ b/eodal/scenes/scene_collection.py @@ -0,0 +1,50 @@ +''' +Created on Nov 13, 2022 + +@author: graflu +''' + +import pandas as pd + +from eodal.core.raster import RasterCollection + +class Scene: + + def __init__(self, metadata: pd.Series, data: RasterCollection): + """ + """ + self._metadata = metadata + self._data = data + + @property + def metadata(self) -> pd.Series: + return self._metadata + + @property + def data(self) -> RasterCollection: + return self._data + +class SceneCollection: + def __init__(self): + pass + + def __repr__(self) -> str: + pass + + def apply(self): + pass + + def dump(self): + pass + + def filter(self): + pass + + def load(self): + pass + + def plot(self): + pass + + def to_xarray(self): + pass From 253a42e8ce685e27980128d656f2eed43436eceb Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 13 Nov 2022 10:32:46 +0100 Subject: [PATCH 012/125] minor changes --- eodal/operational/mapping/sentinel1.py | 2 +- eodal/operational/mapping/sentinel2.py | 4 ++-- examples/sentinel1_mapping_example.py | 2 +- examples/sentinel2_mapping_example.py | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/eodal/operational/mapping/sentinel1.py b/eodal/operational/mapping/sentinel1.py index ce228529..8c5ed36f 100644 --- a/eodal/operational/mapping/sentinel1.py +++ b/eodal/operational/mapping/sentinel1.py @@ -18,7 +18,7 @@ class Sentinel1Mapper(Mapper): """ - Spatial mapper class for Sentinel-1 data. + Spatial scenes class for Sentinel-1 data. """ def __init__( self, diff --git a/eodal/operational/mapping/sentinel2.py b/eodal/operational/mapping/sentinel2.py index 042a69d3..bccbd5c9 100644 --- a/eodal/operational/mapping/sentinel2.py +++ b/eodal/operational/mapping/sentinel2.py @@ -45,7 +45,7 @@ class Sentinel2Mapper(Mapper): """ - Spatial mapper class for Sentinel-2 data. + Spatial scenes class for Sentinel-2 data. :attrib processing_level: Sentinel-2 data processing level (L1C or L2A) @@ -106,7 +106,7 @@ def get_scenes(self) -> None: NOTE: By passing a list of Sentinel-2 tiles you can explicitly control which Sentinel-2 tiles are considered. This might be useful for - mapper tasks where your feature collection lies completely within + scenes tasks where your feature collection lies completely within a single Sentinel-2 tile but also overlaps with neighboring tiles. The scene selection and processing workflow contains several steps: diff --git a/examples/sentinel1_mapping_example.py b/examples/sentinel1_mapping_example.py index c59d38a5..d61c3dfd 100644 --- a/examples/sentinel1_mapping_example.py +++ b/examples/sentinel1_mapping_example.py @@ -60,7 +60,7 @@ #%% executable part -# get a new mapper instance +# get a new scenes instance mapper = Sentinel1Mapper( date_start=date_start, date_end=date_end, diff --git a/examples/sentinel2_mapping_example.py b/examples/sentinel2_mapping_example.py index d9e654a2..18b7baa9 100644 --- a/examples/sentinel2_mapping_example.py +++ b/examples/sentinel2_mapping_example.py @@ -65,13 +65,13 @@ aoi: Path = Path('../data/sample_polygons/lake_lucerne.gpkg') #%% executable part -# Sentinel-2 mapper configuration +# Sentinel-2 scenes configuration mapper_configs = MapperConfigs( spatial_resolution=spatial_resolution, resampling_method=resampling_method, ) -# get a new mapper instance +# get a new scenes instance mapper = Sentinel2Mapper( date_start=date_start, date_end=date_end, From 5a71bdb0dadc34c77333131def8d665cb1544848 Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 20 Nov 2022 16:51:34 +0100 Subject: [PATCH 013/125] passing Band objects as mask directly --- eodal/core/raster.py | 61 +++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 23 deletions(-) diff --git a/eodal/core/raster.py b/eodal/core/raster.py index 9b0cb2fe..ceefb2a9 100644 --- a/eodal/core/raster.py +++ b/eodal/core/raster.py @@ -447,6 +447,25 @@ def _bands_from_selection( "band_count": band_count, } + def apply(self, func: Callable, *args, **kwargs) -> Any: + """ + Apply a custom function to a ``RasterCollection``. + + :param func: + custom callable taking the ``RasterCollection`` as first + argument + :param args: + optional arguments to pass to `func` + :param kwargs: + optional keyword arguments to pass to `func` + :returns: + result of the callable + """ + try: + return func.__call__(self, *args, **kwargs) + except Exception as e: + raise ValueError from e + def copy(self): """ Returns a copy of the current ``RasterCollection`` @@ -1046,7 +1065,7 @@ def resample( def mask( self, - mask: Union[str, np.ndarray], + mask: Union[str, np.ndarray, Band], mask_values: Optional[List[Any]] = None, keep_mask_values: Optional[bool] = False, bands_to_mask: Optional[List[str]] = None, @@ -1061,7 +1080,8 @@ def mask( :param mask: either a band out of the collection (identified through its - band name) or a ``numpy.ndarray`` of datatype boolean. + band name) or a ``numpy.ndarray`` of datatype boolean or + another `Band` object :param mask_values: if `mask` is a band out of the collection, a list of values **must** be specified to create a boolean mask. Ignored if `mask` @@ -1079,15 +1099,16 @@ def mask( :returns: new RasterCollection if `inplace==False`, None otherwise """ + _mask = deepcopy(mask) # check mask and prepare it if required - if isinstance(mask, np.ndarray): + if isinstance(_mask, np.ndarray): if mask.dtype != "bool": raise TypeError("When providing an array it must be boolean") - if len(mask.shape) != 2: + if len(_mask.shape) != 2: raise ValueError("When providing an array it must be 2-dimensional") - elif isinstance(mask, str): + elif isinstance(_mask, str): try: - mask = self.get_values(band_selection=[mask])[0, :, :] + _mask = self.get_values(band_selection=[_mask])[0, :, :] except Exception as e: raise ValueError(f"Invalid mask band: {e}") # translate mask band into boolean array @@ -1096,18 +1117,22 @@ def mask( "When using a band as mask, you have to provide a list of mask values" ) # convert the mask to a temporary binary mask - tmp = np.zeros_like(mask) + tmp = np.zeros_like(_mask) # set valid classes to 1, the other ones are zero if keep_mask_values: # drop all other values not in mask_values - tmp[~np.isin(mask, mask_values)] = 1 + tmp[~np.isin(_mask, mask_values)] = 1 else: # drop all values in mask_values - tmp[np.isin(mask, mask_values)] = 1 - mask = tmp.astype("bool") + tmp[np.isin(_mask, mask_values)] = 1 + _mask = tmp.astype("bool") + elif isinstance(_mask, Band): + if _mask.values.dtype != 'bool': + raise TypeError(f'Mask must have boolean values not {_mask.values.dtype}') + _mask = _mask.values else: raise TypeError( - f"Mask must be either band_name or np.ndarray not {type(mask)}" + f"Mask must be either band_name or np.ndarray not {type(_mask)}" ) # check bands to mask @@ -1119,16 +1144,6 @@ def mask( raise ValueError( "Can only mask bands that have the same spatial extent, pixel size and CRS" ) - if mask.shape[0] != self[bands_to_mask[0]].nrows: - raise ValueError( - f"Number of rows in mask ({mask.shape[0]}) does not match " - f"number of rows in the raster data ({self[bands_to_mask[0]].nrows})" - ) - if mask.shape[1] != self[bands_to_mask[0]].ncols: - raise ValueError( - f"Number of columns in mask ({mask.shape[1]}) does not match " - f"number of columns in the raster data ({self[bands_to_mask[0]].ncols})" - ) # initialize a new raster collection if inplace is False collection = None @@ -1140,11 +1155,11 @@ def mask( # loop over band reproject the selected ones for band_name in bands_to_mask: if inplace: - self[band_name].mask(mask=mask, inplace=inplace) + self[band_name].mask(mask=_mask, inplace=inplace) else: band = self.get_band(band_name) collection.add_band( - band_constructor=band.mask, mask=mask, inplace=inplace + band_constructor=band.mask, mask=_mask, inplace=inplace ) return collection From a2f500668866ab42ac1cb1de1e715cb935ae65d2 Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 20 Nov 2022 16:52:20 +0100 Subject: [PATCH 014/125] testing new RasterCollection().apply() method --- tests/core/test_raster_apply.py | 104 ++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 tests/core/test_raster_apply.py diff --git a/tests/core/test_raster_apply.py b/tests/core/test_raster_apply.py new file mode 100644 index 00000000..fd663323 --- /dev/null +++ b/tests/core/test_raster_apply.py @@ -0,0 +1,104 @@ +''' +Created on Nov 20, 2022 + +@author: graflu +''' + +import numpy as np +import pytest + +from copy import deepcopy +from typing import List, Optional + +from eodal.core.band import Band +from eodal.core.raster import RasterCollection +from build.lib.eodal.core import raster + +def sqrt_per_band( + raster_collection: RasterCollection, + band_selection: Optional[List[str]] = None +) -> RasterCollection: + """ + Calculate the square root per band in RasterCollection. + + :param raster_collection: + non-empty raster collection + :param band_selection: + optional list of bands for which to calculate the square root + :returns: + RasterCollection object with square root values + """ + # make sure the RasterCollection is not empty + if raster_collection.empty: + raise ValueError('Passed RasterCollection must not be empty') + + # check passed bands + _band_selection = deepcopy(band_selection) + if _band_selection is None: + _band_selection = raster_collection.band_names + + # calculate the square root per band + out_collection = RasterCollection() + for band_name in _band_selection: + vals = raster_collection[band_name].values + if isinstance(vals, np.ma.MaskedArray): + sqrt_vals = np.ma.sqrt(vals) + else: + sqrt_vals = np.sqrt(vals) + out_collection.add_band( + Band, + values=sqrt_vals, + geo_info=raster_collection[band_name].geo_info, + band_name=f'SQRT({raster_collection[band_name].band_name})', + ) + out_collection.scene_properties = raster_collection.scene_properties + return out_collection + +def test_apply_custom_function(get_bandstack): + """ + test applying a custom function to a RasterCollection + """ + fpath_raster = get_bandstack() + gTiff_collection = RasterCollection.from_multi_band_raster( + fpath_raster=fpath_raster + ) + + # define a custom function for calculation the square root of values + # per band + out_collection_func = sqrt_per_band(raster_collection=gTiff_collection) + # apply the function to the RasterCollection + out_collection_apply = gTiff_collection.apply(sqrt_per_band) + assert isinstance(out_collection_apply, RasterCollection), \ + 'apply did not return a RasterCollection' + + assert (out_collection_func['SQRT(B02)'] == out_collection_apply['SQRT(B02)']).values.all(), \ + 'RasterCollection.apply and applying the function to RasterCollection did yield different results' + + assert (gTiff_collection['B02'] == out_collection_apply['SQRT(B02)']).values.any(), \ + 'RasterCollection.apply had no effect' + + # apply to selection of bands + out_collection = gTiff_collection.apply(sqrt_per_band, band_selection=['B03']) + assert len(out_collection.band_names) == 1, 'wrong number of bands' + + # apply to masked RasterCollection + mask = gTiff_collection['B02'] < 1000 + masked_collection = gTiff_collection.mask(mask=mask) + out_collection_masked = masked_collection.apply(sqrt_per_band) + assert not out_collection_masked.empty, 'Output is empty' + assert out_collection_masked['SQRT(B02)'].is_masked_array, \ + 'returned band is not masked' + + # incorrect apply calls + with pytest.raises(TypeError): + # function missing + gTiff_collection.apply(band_selection=['B02']) + + with pytest.raises(ValueError): + # wrong argument passed to function + gTiff_collection.apply(sqrt_per_band, dummy='false-arg') + + + + + \ No newline at end of file From b4c0149956058f57009f0822115ac7a834e680e4 Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 20 Nov 2022 16:52:35 +0100 Subject: [PATCH 015/125] continuing drafting of class methods and attributes --- eodal/scenes/scene_collection.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/eodal/scenes/scene_collection.py b/eodal/scenes/scene_collection.py index 1fdd4f6b..7bcb9886 100644 --- a/eodal/scenes/scene_collection.py +++ b/eodal/scenes/scene_collection.py @@ -6,22 +6,32 @@ import pandas as pd +from typing import Callable + from eodal.core.raster import RasterCollection +from eodal.core.scene import SceneProperties class Scene: - def __init__(self, metadata: pd.Series, data: RasterCollection): + def __init__(self, data: RasterCollection): """ + Class constructor + + :param data: + scene data as `RasterCollection` """ - self._metadata = metadata self._data = data + # set metadata using the SceneProperties from RasterCollection + self._metadata = data.scene_properties @property - def metadata(self) -> pd.Series: + def metadata(self) -> SceneProperties: + """scene metadata""" return self._metadata @property def data(self) -> RasterCollection: + """scene data""" return self._data class SceneCollection: @@ -31,7 +41,7 @@ def __init__(self): def __repr__(self) -> str: pass - def apply(self): + def apply(self, func: Callable): pass def dump(self): From 73d6b78d389fbe854b15a9757f8e446bf7e4602f Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 20 Nov 2022 16:53:05 +0100 Subject: [PATCH 016/125] removing blank lines --- tests/core/test_raster_apply.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/core/test_raster_apply.py b/tests/core/test_raster_apply.py index fd663323..cf207665 100644 --- a/tests/core/test_raster_apply.py +++ b/tests/core/test_raster_apply.py @@ -97,8 +97,4 @@ def test_apply_custom_function(get_bandstack): with pytest.raises(ValueError): # wrong argument passed to function gTiff_collection.apply(sqrt_per_band, dummy='false-arg') - - - - \ No newline at end of file From 8e1c6958636997eb8d04d5207630e37053ccc02c Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 20 Nov 2022 16:54:45 +0100 Subject: [PATCH 017/125] updating doc-string of apply method --- eodal/core/raster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eodal/core/raster.py b/eodal/core/raster.py index ceefb2a9..fc5d4569 100644 --- a/eodal/core/raster.py +++ b/eodal/core/raster.py @@ -459,7 +459,7 @@ def apply(self, func: Callable, *args, **kwargs) -> Any: :param kwargs: optional keyword arguments to pass to `func` :returns: - result of the callable + results of `func` """ try: return func.__call__(self, *args, **kwargs) From 4f7a322bbe9097ab9593568acd82230fb5d846fa Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 20 Nov 2022 17:04:25 +0100 Subject: [PATCH 018/125] RasterCollection are now properly iterable (__iter__) --- eodal/core/raster.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/eodal/core/raster.py b/eodal/core/raster.py index fc5d4569..40684f68 100644 --- a/eodal/core/raster.py +++ b/eodal/core/raster.py @@ -281,7 +281,8 @@ def __delitem__(self, key: str): del self.collection[key] def __iter__(self): - return iter(self.collection) + for k, v in self.collection.items(): + yield k, v def __len__(self) -> int: return len(self.collection) From 0c74b072ff024edc76f9ae4e40af7d2ab58bcda1 Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 20 Nov 2022 17:04:37 +0100 Subject: [PATCH 019/125] added test for iterable RasterCollections --- tests/core/test_raster_iterator.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 tests/core/test_raster_iterator.py diff --git a/tests/core/test_raster_iterator.py b/tests/core/test_raster_iterator.py new file mode 100644 index 00000000..acef8777 --- /dev/null +++ b/tests/core/test_raster_iterator.py @@ -0,0 +1,23 @@ +''' +Created on Nov 20, 2022 + +@author: graflu +''' + +import pytest + +from eodal.core.band import Band +from eodal.core.raster import RasterCollection + +def test_raster_iterator(get_bandstack): + fpath_raster = get_bandstack() + ds = RasterCollection.from_multi_band_raster( + fpath_raster=fpath_raster + ) + band_names = ds.band_names + + idx = 0 + for band_name, band_obj in ds: + assert band_name == band_names[idx], 'wrong band name returned' + assert isinstance(band_obj, Band), 'no band object returned' + assert band_obj.band_name == band_name, 'band names do not match' From 98e9371b2c828ef50658833bcc9c6ec3be547c5e Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 21 Nov 2022 17:30:51 +0100 Subject: [PATCH 020/125] updating CHANGELOG with recent changes and updated links --- CHANGELOG.rst | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 65541706..52f47c27 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -10,13 +10,16 @@ The format is based on `Keep a Changelog`_, and this project adheres to `Semanti Categories for changes are: Added, Changed, Deprecated, Removed, Fixed, Security. -Version `0.0.2 < https://github.com/remote-sensing-team/eodal/releases/tag/v0.0.2>`__ +Version `0.0.2 < https://github.com/EOA-team/eodal/releases/tag/v0.0.2>`__ -------------------------------------------------------------------------------- Release date: YYYY-MM-DD +- Added: RasterCollection objects are now iterable (iterate over bands in collection) +- Added: RasterCollection now have a "apply" method allowing to pass custom functions to RasterCollection objects -Version `0.0.1 < https://github.com/remote-sensing-team/eodal/releases/tag/v0.0.1>`__ + +Version `0.0.1 < https://github.com/EOA-team/eodal/releases/tag/v0.0.1>`__ -------------------------------------------------------------------------------- Release date: 2022-10-31. From 0ff3805b1058c899d9e05ee31699b3370f894978 Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 21 Nov 2022 17:31:03 +0100 Subject: [PATCH 021/125] finalized iterator test --- tests/core/test_raster_iterator.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/core/test_raster_iterator.py b/tests/core/test_raster_iterator.py index acef8777..2fe5bd49 100644 --- a/tests/core/test_raster_iterator.py +++ b/tests/core/test_raster_iterator.py @@ -10,6 +10,8 @@ from eodal.core.raster import RasterCollection def test_raster_iterator(get_bandstack): + """test the __iter__() method of RasterCollections""" + fpath_raster = get_bandstack() ds = RasterCollection.from_multi_band_raster( fpath_raster=fpath_raster @@ -21,3 +23,6 @@ def test_raster_iterator(get_bandstack): assert band_name == band_names[idx], 'wrong band name returned' assert isinstance(band_obj, Band), 'no band object returned' assert band_obj.band_name == band_name, 'band names do not match' + idx += 1 + + assert idx == len(ds.band_names), 'iterator did not cover all bands' From c9f2ef90cbd425b878dc5d8e1b9217b81068e331 Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 21 Nov 2022 17:55:46 +0100 Subject: [PATCH 022/125] adding __repr__() method and correcting some typos in docs --- eodal/core/raster.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/eodal/core/raster.py b/eodal/core/raster.py index 40684f68..187d331d 100644 --- a/eodal/core/raster.py +++ b/eodal/core/raster.py @@ -273,7 +273,7 @@ def __setitem__(self, item: Band): raise TypeError("Only Band objects can be passed") key = item.band_name if key in self.collection.keys(): - raise KeyError("Duplicate band names not permitted") + raise KeyError("Duplicate band names are not permitted") value = item.copy() self.collection[key] = value @@ -317,6 +317,14 @@ def __gt__(self, other): def __lt__(self, other): return RasterOperator.calc(a=self, other=other, operator="<") + def __repr__(self) -> str: + if self.empty: + return 'Empty EOdal RasterCollection' + else: + return f'EOdal RasterCollection\n----------------------\n' + \ + f'# Bands: {len(self)}\n Band names: {", ".join(self.band_names)}\n' + \ + f'Is band stack: {self.is_bandstacked}' + @property def band_names(self) -> List[str]: """band names in collection""" From c263e7b00ffcfa9282174aed1b55477cfd532ce9 Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 21 Nov 2022 17:55:55 +0100 Subject: [PATCH 023/125] minor --- examples/digital_terrain_model.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/digital_terrain_model.py b/examples/digital_terrain_model.py index c0e4b97d..c5e8b047 100644 --- a/examples/digital_terrain_model.py +++ b/examples/digital_terrain_model.py @@ -34,4 +34,3 @@ ) fig.savefig('../img/eodal_SwissALTI3D_sample.png', dpi=150, bbox_inches='tight') - From 14d3e9dfc9c808f2632d56f2d1a7ae4eba328aa7 Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 21 Nov 2022 17:59:38 +0100 Subject: [PATCH 024/125] refactoring --- eodal/{scenes => mapper}/__init__.py | 0 eodal/{scenes => mapper}/feature.py | 0 eodal/{scenes => mapper}/filter.py | 0 eodal/{scenes => mapper}/scene_collection.py | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename eodal/{scenes => mapper}/__init__.py (100%) rename eodal/{scenes => mapper}/feature.py (100%) rename eodal/{scenes => mapper}/filter.py (100%) rename eodal/{scenes => mapper}/scene_collection.py (100%) diff --git a/eodal/scenes/__init__.py b/eodal/mapper/__init__.py similarity index 100% rename from eodal/scenes/__init__.py rename to eodal/mapper/__init__.py diff --git a/eodal/scenes/feature.py b/eodal/mapper/feature.py similarity index 100% rename from eodal/scenes/feature.py rename to eodal/mapper/feature.py diff --git a/eodal/scenes/filter.py b/eodal/mapper/filter.py similarity index 100% rename from eodal/scenes/filter.py rename to eodal/mapper/filter.py diff --git a/eodal/scenes/scene_collection.py b/eodal/mapper/scene_collection.py similarity index 100% rename from eodal/scenes/scene_collection.py rename to eodal/mapper/scene_collection.py From eaf07c53eede498e663877bc5ade759543deb23e Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 21 Nov 2022 17:59:46 +0100 Subject: [PATCH 025/125] refactoring --- eodal/core/raster.py | 2 +- eodal/core/scene.py | 199 ++++++++---------- eodal/downloader/planet_scope/planet.py | 10 +- eodal/downloader/sentinel2/creodias.py | 4 +- eodal/downloader/utils/unzip_datasets.py | 6 +- eodal/mapper/scene_collection.py | 54 ----- .../planet_scope/database/querying.py | 2 +- eodal/metadata/planet_scope/parsing.py | 6 +- .../metadata/sentinel1/database/ingestion.py | 4 +- eodal/metadata/sentinel1/database/querying.py | 2 +- eodal/metadata/sentinel1/parsing.py | 20 +- .../metadata/sentinel2/database/ingestion.py | 4 +- eodal/metadata/sentinel2/database/querying.py | 4 +- eodal/metadata/sentinel2/parsing.py | 24 +-- eodal/metadata/sentinel2/utils.py | 8 +- eodal/metadata/stac/client.py | 8 +- eodal/metadata/utils.py | 2 +- eodal/operational/archive/sentinel1.py | 2 +- eodal/operational/archive/sentinel2.py | 4 +- eodal/operational/cli/planet_scope.py | 2 +- eodal/operational/cli/sentinel2.py | 12 +- eodal/operational/mapping/mapper.py | 24 +-- eodal/operational/mapping/sentinel1.py | 14 +- eodal/operational/mapping/sentinel2.py | 56 ++--- eodal/operational/mapping/utils.py | 8 +- .../resampling/sentinel2/__init__.py | 24 +-- .../resampling/sentinel2/merge_blackfill.py | 16 +- eodal/operational/resampling/utils.py | 8 +- examples/planet_download.py | 2 +- examples/random_sentinel2_pixels.py | 10 +- examples/satellite_archive_query.py | 2 +- examples/sentinel1_mapping_example.py | 8 +- examples/sentinel2_mapping_example.py | 14 +- tests/metadata/test_stac_client.py | 4 +- tests/operational/test_sentinel2_mapper.py | 10 +- 35 files changed, 252 insertions(+), 327 deletions(-) diff --git a/eodal/core/raster.py b/eodal/core/raster.py index 187d331d..9c2d4968 100644 --- a/eodal/core/raster.py +++ b/eodal/core/raster.py @@ -363,7 +363,7 @@ def collection(self, value): @check_band_names def get_band_alias(self, band_name: str) -> Union[Dict[str, str], None]: """ - Retuns the band_name-alias scenes of a given band + Retuns the band_name-alias mapper of a given band in collection if the band has an alias, None instead :param band_name: diff --git a/eodal/core/scene.py b/eodal/core/scene.py index d6efdbc0..4ea15964 100644 --- a/eodal/core/scene.py +++ b/eodal/core/scene.py @@ -21,12 +21,13 @@ import datetime import numpy as np +from collections.abc import MutableMapping from numbers import Number -from typing import Optional +from typing import Callable, List, Optional +from eodal.core.raster import RasterCollection from eodal.utils.constants import ProcessingLevels - class SceneProperties(object): """ A class for storing scene-relevant properties @@ -152,111 +153,89 @@ def mode(self, value: str) -> None: raise TypeError("Expected a str object") self._mode = value -# class Sentinel2SceneProperties(SceneProperties): -# """ -# Sentinel-2 specific scene properties -# -# :attribute sun_zenith_angle: -# scene-wide sun zenith angle [deg] -# :attribute sun_azimuth_angle: -# scene-wide sun azimuth angle [deg] -# :attribute sensor_zenith_angle: -# scene-wide sensor zenith angle [deg] -# :attribute sensor_azimuth_angle: -# scene-wide sensor azimuth angle [deg] -# """ -# -# def __init__( -# self, -# sun_zenith_angle: Optional[float] = np.nan, -# sun_azimuth_angle: Optional[float] = np.nan, -# sensor_zenith_angle: Optional[float] = np.nan, -# sensor_azimuth_angle: Optional[float] = np.nan, -# *args, -# **kwargs -# ): -# """ -# Class constructor -# -# :param sun_zenith_angle: -# scene-wide sun zenith angle [deg] -# :param sun_azimuth_angle: -# scene-wide sun azimuth angle [deg] -# :param sensor_zenith_angle: -# scene-wide sensor zenith angle [deg] -# :param sensor_azimuth_angle: -# scene-wide sensor azimuth angle [deg] -# :param args: -# positional arguments to pass to the constructor of the -# super-class -# :param kwargs: -# key-word arguments to pass to the constructor of the -# super-class -# """ -# # call constructor of super class -# super().__init__(*args, **kwargs) -# -# self.sun_zenith_angle = sun_zenith_angle -# self.sun_azimuth_angle = sun_azimuth_angle -# self.sensor_zenith_angle = sensor_zenith_angle -# self.sensor_azimuth_angle = sensor_azimuth_angle -# -# @property -# def sun_zenith_angle(self) -> float: -# """sun zenith angle [deg]""" -# return self._sun_zenith_angle -# -# @sun_zenith_angle.setter -# def sun_zenith_angle(self, val: float) -> None: -# """sun zenith angle [deg]""" -# if not isinstance(val, Number): -# raise TypeError('Expected integer of float') -# # plausibility check -# if not 0 <= val <= 90: -# raise ValueError('The sun zenith angle ranges from 0 to 90 degrees') -# self._sun_zenith_angle = val -# -# @property -# def sun_azimuth_angle(self) -> float: -# """sun azimuth angle [deg]""" -# return self._sun_zenith_angle -# -# @sun_azimuth_angle.setter -# def sun_azimuth_angle(self, val: float) -> None: -# """sun azimuth angle [deg]""" -# if not isinstance(val, Number): -# raise TypeError('Expected integer of float') -# # plausibility check -# if not 0 <= val <= 180: -# raise ValueError('The sun azimuth angle ranges from 0 to 180 degrees') -# self._sun_zenith_angle = val -# -# @property -# def sensor_zenith_angle(self) -> float: -# """sensor zenith angle [deg]""" -# return self._sensor_zenith_angle -# -# @sensor_zenith_angle.setter -# def sensor_zenith_angle(self, val: float) -> None: -# """sensor zenith angle [deg]""" -# if not isinstance(val, Number): -# raise TypeError('Expected integer of float') -# # plausibility check -# if not 0 <= val <= 90: -# raise ValueError('The sensor zenith angle ranges from 0 to 90 degrees') -# self._sensor_zenith_angle = val -# -# @property -# def sensor_azimuth_angle(self) -> float: -# """sun azimuth angle [deg]""" -# return self._sensor_zenith_angle -# -# @sensor_azimuth_angle.setter -# def sensor_azimuth_angle(self, val: float) -> None: -# """sun azimuth angle [deg]""" -# if not isinstance(val, Number): -# raise TypeError('Expected integer of float') -# # plausibility check -# if not 0 <= val <= 180: -# raise ValueError('The sensor azimuth angle ranges from 0 to 180 degrees') -# self._sun_zenith_angle = val + +class SceneCollection(MutableMapping): + """ + Collection of 0:N scenes where each scene is a RasterCollection with + **non-empty** `SceneProperties` as each scene is indexed by its + acquistion time. + """ + def __init__( + self, + scene_constructor: Optional[Callable[..., RasterCollection]] = None, + *args, + **kwargs + ): + """ + Initializes a SceneCollection object with 0 to N scenes. + + :param scene_constructor: + optional callable returning an `~eodal.core.raster.RasterCollection` + instance. + :param args: + arguments to pass to `scene_constructor` or one of RasterCollection's + class methods (e.g., `RasterCollection.from_multi_band_raster`) + :param kwargs: + key-word arguments to pass to `scene_constructor` or one of RasterCollection's + class methods (e.g., `RasterCollection.from_multi_band_raster`) + """ + # mapper are stored in a dictionary like collection + self._frozen = False + self.collection = dict() + self._frozen = True + + if scene_constructor is not None: + scene = scene_constructor.__call__(*args, **kwargs) + if not isinstance(scene, RasterCollection): + raise TypeError('Only RasterCollection objects can be passed') + self.__setitem__(scene) + + def __getitem__(self, key: str) -> RasterCollection: + return self.collection[key] + + def __setitem__(self, item: RasterCollection): + if not isinstance(item, RasterCollection): + raise TypeError("Only RasterCollection objects can be passed") + key = item.scene_properties.acquisition_time + if key in self.collection.keys(): + raise KeyError("Duplicate scene names are not permitted") + if key is None: + raise ValueError("RasterCollection passed must have an acquistion time stamp") + value = item.copy() + self.collection[key] = value + + def __delitem__(self, key: str): + del self.collection[key] + + def __iter__(self): + for k, v in self.collection.items(): + yield k, v + + def __len__(self) -> int: + return len(self.collection) + + def __repr__(self) -> str: + pass + + @property + def scene_names(self) -> List[str]: + """scene names in collection""" + return list(self.collection.keys()) + + def apply(self, func: Callable): + pass + + def dump(self): + pass + + def filter(self): + pass + + def load(self): + pass + + def plot(self): + pass + + def to_xarray(self): + pass diff --git a/eodal/downloader/planet_scope/planet.py b/eodal/downloader/planet_scope/planet.py index 7003d812..15c0882e 100644 --- a/eodal/downloader/planet_scope/planet.py +++ b/eodal/downloader/planet_scope/planet.py @@ -1,6 +1,6 @@ ''' Class for interacting with PlanetScope's Data and Order URL for checking -available scenes, placing orders and downloading data. +available mapper, placing orders and downloading data. Make sure to have a Planet-account and to have exported your valid API key as environmental variable. You can find your API following this link: @@ -62,7 +62,7 @@ class PlanetAPIClient(object): date and dataset filters) :attrib features: features returned from Planet API (i.e., found - Planet-Scope scenes) + Planet-Scope mapper) :attrib session: (authenticated) session object to interact with the Planet-API without re-sensing the API key for @@ -83,7 +83,7 @@ def __init__( date and dataset filters) :param features: features returned from Planet API (i.e., found - Planet-Scope scenes) + Planet-Scope mapper) :param session: (authenticated) session object to interact with the Planet-API without re-sensing the API key for @@ -191,7 +191,7 @@ def query_planet_api( Planet product item type. `PSScene` by default. :param cloud_cover_threshold: cloudy pixel percentage threshold (0-100%) for filtering - too cloudy scenes + too cloudy mapper :returns: `PlanetAPIClient object' """ @@ -403,7 +403,7 @@ def download_order(self, download_dir: Path, order_name: Optional[str] = '', Download data from an order. Order must be activated! :param download_dir: - directory where to download the Planet scenes to. Each scene is + directory where to download the Planet mapper to. Each scene is stored in a own sub-directory named by its ID to make the archive structure comparable to Sentinel-2 and the single assets (files) are placed within that sub-directory. diff --git a/eodal/downloader/sentinel2/creodias.py b/eodal/downloader/sentinel2/creodias.py index 4f11b0de..b1e404ad 100644 --- a/eodal/downloader/sentinel2/creodias.py +++ b/eodal/downloader/sentinel2/creodias.py @@ -71,9 +71,9 @@ def query_creodias( the queried region :param cloud_cover_threshold: cloudy pixel percentage threshold (0-100%) for filtering - scenes too cloudy for processing. All scenes with a cloud + mapper too cloudy for processing. All mapper with a cloud cover lower than the threshold specified will be downloaded. - Per default all scenes are downloaded. + Per default all mapper are downloaded. :returns: results of the CREODIAS query (no downloaded data!) as pandas DataFrame diff --git a/eodal/downloader/utils/unzip_datasets.py b/eodal/downloader/utils/unzip_datasets.py index b6045694..eee60f9c 100644 --- a/eodal/downloader/utils/unzip_datasets.py +++ b/eodal/downloader/utils/unzip_datasets.py @@ -31,16 +31,16 @@ def unzip_datasets(download_dir: Path, platform: str, remove_zips: Optional[bool] = True) -> None: """ - Helper function to unzip Sentinel-1 and 2 scenes once they are + Helper function to unzip Sentinel-1 and 2 mapper once they are downloaded from CREODIAS. Works currently on *nix system only and requires `unzip` to be installed on the system. :param download_dir: - directory where the zipped scenes in .SAFE format are located + directory where the zipped mapper in .SAFE format are located :param platform: either 'S1' (Sentinel-1) or 'S2' (Sentinel-2) :param remove_zips: - If set to False the zipped .SAFE scenes will be kept, otherwise + If set to False the zipped .SAFE mapper will be kept, otherwise (Default) they will be removed """ diff --git a/eodal/mapper/scene_collection.py b/eodal/mapper/scene_collection.py index 7bcb9886..7908a426 100644 --- a/eodal/mapper/scene_collection.py +++ b/eodal/mapper/scene_collection.py @@ -4,57 +4,3 @@ @author: graflu ''' -import pandas as pd - -from typing import Callable - -from eodal.core.raster import RasterCollection -from eodal.core.scene import SceneProperties - -class Scene: - - def __init__(self, data: RasterCollection): - """ - Class constructor - - :param data: - scene data as `RasterCollection` - """ - self._data = data - # set metadata using the SceneProperties from RasterCollection - self._metadata = data.scene_properties - - @property - def metadata(self) -> SceneProperties: - """scene metadata""" - return self._metadata - - @property - def data(self) -> RasterCollection: - """scene data""" - return self._data - -class SceneCollection: - def __init__(self): - pass - - def __repr__(self) -> str: - pass - - def apply(self, func: Callable): - pass - - def dump(self): - pass - - def filter(self): - pass - - def load(self): - pass - - def plot(self): - pass - - def to_xarray(self): - pass diff --git a/eodal/metadata/planet_scope/database/querying.py b/eodal/metadata/planet_scope/database/querying.py index 125b7af2..1d5afc09 100644 --- a/eodal/metadata/planet_scope/database/querying.py +++ b/eodal/metadata/planet_scope/database/querying.py @@ -71,7 +71,7 @@ def find_raw_data_by_bbox( optional cloud cover threshold to filter datasets by scene cloud coverage. Must be provided as number between 0 and 100%. :returns: - dataframe with references to found Planet-Scope scenes + dataframe with references to found Planet-Scope mapper """ # convert shapely geometry into extended well-known text representation diff --git a/eodal/metadata/planet_scope/parsing.py b/eodal/metadata/planet_scope/parsing.py index 674e3afa..229109f7 100644 --- a/eodal/metadata/planet_scope/parsing.py +++ b/eodal/metadata/planet_scope/parsing.py @@ -29,7 +29,7 @@ def _parse_metadata_json(in_file: Path) -> Dict[str, Any]: """ Parses the metadata file (*.json) delivered with the Planet-Scope - scenes + mapper :param in_file: PlanetScope metadata file-path (*.json) @@ -69,7 +69,7 @@ def _parse_metadata_json(in_file: Path) -> Dict[str, Any]: def _parse_metadata_xml(in_file: Path) -> Dict[str, Any]: """ Parses the metadata file (*.xml) delivered with the Planet-Scope - scenes to extract the EPSG code of the scene and the orbit directions + mapper to extract the EPSG code of the scene and the orbit directions :param in_file: PlanetScope metadata file-path (*.xml) @@ -89,7 +89,7 @@ def _parse_metadata_xml(in_file: Path) -> Dict[str, Any]: def parse_metadata(in_dir: Path) -> Dict[str, Any]: """ Parses the metadata files (*.json and *.xml) delivered with the Planet-Scope - scenes and returns the data in a format ready for DB insert + mapper and returns the data in a format ready for DB insert :param in_dir: PS scene directory where metadata and image files are located diff --git a/eodal/metadata/sentinel1/database/ingestion.py b/eodal/metadata/sentinel1/database/ingestion.py index 5c7df42e..ab37da04 100644 --- a/eodal/metadata/sentinel1/database/ingestion.py +++ b/eodal/metadata/sentinel1/database/ingestion.py @@ -38,12 +38,12 @@ def meta_df_to_database( meta_df: pd.DataFrame ) -> None: """ - Once the metadata from one or more scenes have been extracted + Once the metadata from one or more mapper have been extracted the data can be ingested into the metadata base (strongly recommended). :param meta_df: - data frame with metadata of one or more scenes to insert + data frame with metadata of one or more mapper to insert :param raw_metadata: If set to False, assumes the metadata is about processed products diff --git a/eodal/metadata/sentinel1/database/querying.py b/eodal/metadata/sentinel1/database/querying.py index 412b616b..f5a43a7c 100644 --- a/eodal/metadata/sentinel1/database/querying.py +++ b/eodal/metadata/sentinel1/database/querying.py @@ -76,7 +76,7 @@ def find_raw_data_by_bbox( :param sensor_mode: Sentinel-1 sensor mode. 'IW' (Interferometric Wide Swath) by default. :returns: - `DataFrame` with references to found Sentinel-2 scenes + `DataFrame` with references to found Sentinel-2 mapper """ # convert shapely geometry into extended well-known text representation if isinstance(bounding_box, Polygon): diff --git a/eodal/metadata/sentinel1/parsing.py b/eodal/metadata/sentinel1/parsing.py index 204e5b8a..f847b446 100644 --- a/eodal/metadata/sentinel1/parsing.py +++ b/eodal/metadata/sentinel1/parsing.py @@ -149,7 +149,7 @@ def extract_s1_footprint( Should the GML coordinates (from the manifest.safe) be used, or the KML coordinates (from the ./preview/map-overlay.kml) :returns: - Well-known-text (WKT) of the S1 scenes' footprint in geographic coordinates + Well-known-text (WKT) of the S1 mapper' footprint in geographic coordinates (WGS84, EPSG:4326). """ in_file = in_dir.joinpath("manifest.safe").as_posix() @@ -199,7 +199,7 @@ def loop_s1_archive( ) -> pd.DataFrame: """ wrapper function to loop over an entire archive (i.e., collection) of - Sentinel-2 scenes in either L1C or L2A processing level or a mixture + Sentinel-2 mapper in either L1C or L2A processing level or a mixture thereof. The function returns a pandas dataframe for all found entries in the @@ -207,7 +207,7 @@ def loop_s1_archive( :param in_dir: directory containing the Sentinel-2 data (L1C and/or L2A - processing level). Sentinel-2 scenes are assumed to follow ESA's + processing level). Sentinel-2 mapper are assumed to follow ESA's .SAFE naming convention and structure :param extract_datastrip: If True reads also metadata from the datastrip xml file @@ -215,14 +215,14 @@ def loop_s1_archive( :param get_newest_datasets: if set to True only datasets newer than a user-defined time stamp will be considered for ingestion into the database. This is particularly - useful for updating the database after new scenes have been downloaded + useful for updating the database after new mapper have been downloaded or processed. :param last_execution_date: if get_newest_datasets is True this variable needs to be set. All datasets younger than that date will be considered for ingestion into the database. :return: - dataframe with metadata of all scenes handled by the function + dataframe with metadata of all mapper handled by the function call """ @@ -233,16 +233,16 @@ def loop_s1_archive( "A timestamp must be provided when the only newest datasets shall be considered" ) - # search for .SAFE subdirectories identifying the single scenes + # search for .SAFE subdirectories identifying the single mapper # some data providers, however, do not name their products following the # ESA convention (.SAFE is missing) s1_scenes = glob.glob(str(in_dir.joinpath("*.SAFE"))) n_scenes = len(s1_scenes) if n_scenes == 0: - raise DataNotFoundError(f'No .SAFE scenes found in {in_dir}') + raise DataNotFoundError(f'No .SAFE mapper found in {in_dir}') - # if only scenes after a specific timestamp shall be considered drop + # if only mapper after a specific timestamp shall be considered drop # those from the list which are "too old" if get_newest_datasets: filtered_scenes = [] @@ -255,10 +255,10 @@ def loop_s1_archive( s1_scenes = filtered_scenes if len(s1_scenes) == 0: raise DataNotFoundError( - f'No scenes younger than {datetime.strftime(last_execution_date, "%Y-%m-%d")} found' + f'No mapper younger than {datetime.strftime(last_execution_date, "%Y-%m-%d")} found' ) - # loop over the scenes + # loop over the mapper metadata_scenes = [] error_file = open(in_dir.joinpath("errored_datasets.txt"), "w+") for idx, s1_scene in enumerate(s1_scenes): diff --git a/eodal/metadata/sentinel2/database/ingestion.py b/eodal/metadata/sentinel2/database/ingestion.py index 5a08c975..0050554b 100644 --- a/eodal/metadata/sentinel2/database/ingestion.py +++ b/eodal/metadata/sentinel2/database/ingestion.py @@ -42,7 +42,7 @@ def meta_df_to_database( meta_df: pd.DataFrame, raw_metadata: Optional[bool] = True ) -> None: """ - Once the metadata from one or more scenes have been extracted + Once the metadata from one or more mapper have been extracted the data can be ingested into the metadata base (strongly recommended). @@ -52,7 +52,7 @@ def meta_df_to_database( sql-methods into the database. :param meta_df: - data frame with metadata of one or more scenes to insert + data frame with metadata of one or more mapper to insert :param raw_metadata: If set to False, assumes the metadata is about processed products diff --git a/eodal/metadata/sentinel2/database/querying.py b/eodal/metadata/sentinel2/database/querying.py index 7d2b5811..e30d96e7 100644 --- a/eodal/metadata/sentinel2/database/querying.py +++ b/eodal/metadata/sentinel2/database/querying.py @@ -79,7 +79,7 @@ def find_raw_data_by_bbox( optional cloud cover threshold to filter datasets by scene cloud coverage. Must be provided as number between 0 and 100%. :returns: - dataframe with references to found Sentinel-2 scenes + dataframe with references to found Sentinel-2 mapper """ # translate processing level @@ -152,7 +152,7 @@ def find_raw_data_by_tile( optional cloud cover threshold to filter datasets by scene cloud coverage. Must be provided as number between 0 and 100%. :returns: - dataframe with references to found Sentinel-2 scenes + dataframe with references to found Sentinel-2 mapper """ # translate processing level diff --git a/eodal/metadata/sentinel2/parsing.py b/eodal/metadata/sentinel2/parsing.py index e6b15200..e09477a2 100644 --- a/eodal/metadata/sentinel2/parsing.py +++ b/eodal/metadata/sentinel2/parsing.py @@ -389,7 +389,7 @@ def parse_s2_scene_metadata( ) -> Tuple[Dict[str, Any]]: """ wrapper function to extract metadata from ESA Sentinel-2 - scenes. It returns a dict with the metadata most important + mapper. It returns a dict with the metadata most important to characterize a given Sentinel-2 scene (mtd_msi). Optionally, some information about the datastrip can be extracted as well (MTD_DS.xml); this information is required @@ -401,8 +401,8 @@ def parse_s2_scene_metadata( reduced in the case of L1C since no scene classification information is available. - NOTE: In order to identify scenes and their processing level - correctly, L2A scenes must have '_MSIL2A_' occuring somewhere + NOTE: In order to identify mapper and their processing level + correctly, L2A mapper must have '_MSIL2A_' occuring somewhere in the filepath. For L1C, it must be '_MSIL1C_'. :param in_dir: @@ -463,7 +463,7 @@ def loop_s2_archive( ) -> Tuple[pd.DataFrame]: """ wrapper function to loop over an entire archive (i.e., collection) of - Sentinel-2 scenes in either L1C or L2A processing level or a mixture + Sentinel-2 mapper in either L1C or L2A processing level or a mixture thereof. The function returns a pandas dataframe for all found entries in the @@ -471,7 +471,7 @@ def loop_s2_archive( :param in_dir: directory containing the Sentinel-2 data (L1C and/or L2A - processing level). Sentinel-2 scenes are assumed to follow ESA's + processing level). Sentinel-2 mapper are assumed to follow ESA's .SAFE naming convention and structure :param extract_datastrip: If True reads also metadata from the datastrip xml file @@ -479,14 +479,14 @@ def loop_s2_archive( :param get_newest_datasets: if set to True only datasets newer than a user-defined time stamp will be considered for ingestion into the database. This is particularly - useful for updating the database after new scenes have been downloaded + useful for updating the database after new mapper have been downloaded or processed. :param last_execution_date: if get_newest_datasets is True this variable needs to be set. All datasets younger than that date will be considered for ingestion into the database. :return: - dataframe with metadata of all scenes handled by the function + dataframe with metadata of all mapper handled by the function call """ @@ -497,7 +497,7 @@ def loop_s2_archive( "A timestamp must be provided when the only newest datasets shall be considered" ) - # search for .SAFE subdirectories identifying the single scenes + # search for .SAFE subdirectories identifying the single mapper # some data providers, however, do not name their products following the # ESA convention (.SAFE is missing) s2_scenes = glob.glob(str(in_dir.joinpath("*.SAFE"))) @@ -507,9 +507,9 @@ def loop_s2_archive( s2_scenes = [f for f in in_dir.iterdir() if f.is_dir()] n_scenes = len(s2_scenes) if n_scenes == 0: - raise UnknownProcessingLevel("No Sentinel-2 scenes were found") + raise UnknownProcessingLevel("No Sentinel-2 mapper were found") - # if only scenes after a specific timestamp shall be considered drop + # if only mapper after a specific timestamp shall be considered drop # those from the list which are "too old" if get_newest_datasets: filtered_scenes = [] @@ -522,10 +522,10 @@ def loop_s2_archive( s2_scenes = filtered_scenes if len(s2_scenes) == 0: raise NothingToDo( - f'No scenes younger than {datetime.strftime(last_execution_date, "%Y-%m-%d")} found' + f'No mapper younger than {datetime.strftime(last_execution_date, "%Y-%m-%d")} found' ) - # loop over the scenes + # loop over the mapper metadata_scenes = [] ql_ds_scenes = [] error_file = open(in_dir.joinpath("errored_datasets.txt"), "w+") diff --git a/eodal/metadata/sentinel2/utils.py b/eodal/metadata/sentinel2/utils.py index c7661185..50fa2283 100644 --- a/eodal/metadata/sentinel2/utils.py +++ b/eodal/metadata/sentinel2/utils.py @@ -31,15 +31,15 @@ def identify_updated_scenes( in the ``product_uri`` entry in the scene metadata or .SAFE name). :param metadata_df: - dataframe from metadata base query in which to search for scenes with + dataframe from metadata base query in which to search for mapper with the same sensing date and data take but different baseline versions :param return_highest_baseline: - if True (default) return those scenes with the highest baseline. Otherwise + if True (default) return those mapper with the highest baseline. Otherwise return the baseline most products belong to :return: Tuple with two entries. The first entries contains a ``DataFrame`` with - those S2 scenes belonging to either the highest PDGS baseline or the most - common baseline version. The other "older" scenes are in the second + those S2 mapper belonging to either the highest PDGS baseline or the most + common baseline version. The other "older" mapper are in the second tuple item. """ diff --git a/eodal/metadata/stac/client.py b/eodal/metadata/stac/client.py index d586a351..397653a0 100644 --- a/eodal/metadata/stac/client.py +++ b/eodal/metadata/stac/client.py @@ -76,7 +76,7 @@ def sentinel2( :param kwargs: keyword arguments to pass to `query_stac` function :returns: - dataframe with references to found Sentinel-2 scenes + dataframe with references to found Sentinel-2 mapper """ # check for processing level of the data and set the collection accordingly processing_level_stac = eval(f"Settings.STAC_BACKEND.S2{processing_level.name}") @@ -86,7 +86,7 @@ def sentinel2( scenes = query_stac(**kwargs) # get STAC provider specific naming conventions s2 = Settings.STAC_BACKEND.Sentinel2 - # loop over found scenes and check their cloud cover + # loop over found mapper and check their cloud cover metadata_list = [] for scene in scenes: # extract scene metadata required for Sentinel-2 @@ -140,7 +140,7 @@ def sentinel2( @prepare_bbox def sentinel1(collection: Optional[str] = 'sentinel-1-rtc', **kwargs) -> pd.DataFrame: """ - Sentinel-1 specific STAC query function to retrieve scenes from MSPC + Sentinel-1 specific STAC query function to retrieve mapper from MSPC :param collection: Sentinel-1 collection to use. Must be one of 'sentinel-1-grd' (ground @@ -149,7 +149,7 @@ def sentinel1(collection: Optional[str] = 'sentinel-1-rtc', **kwargs) -> pd.Data :param kwargs: keyword arguments to pass to `query_stac` function :returns: - dataframe with references to found Sentinel-1 scenes + dataframe with references to found Sentinel-1 mapper """ if Settings.STAC_BACKEND != STAC_Providers.MSPC: diff --git a/eodal/metadata/utils.py b/eodal/metadata/utils.py index 0fdb6976..fb459fa2 100644 --- a/eodal/metadata/utils.py +++ b/eodal/metadata/utils.py @@ -45,7 +45,7 @@ def _check_linux_cifs(ip: Union[str, Path]) -> Path: lines = response.split("\n") for line in lines: if str(ip) in line: - # data is on mounted share -> get local file system scenes + # data is on mounted share -> get local file system mapper local_path = line.split(" ")[1] # check if current user has access to read local path, otherwise keep # searching (might happen if another user manually mounts the NAS) diff --git a/eodal/operational/archive/sentinel1.py b/eodal/operational/archive/sentinel1.py index addacb75..6f5ef2de 100644 --- a/eodal/operational/archive/sentinel1.py +++ b/eodal/operational/archive/sentinel1.py @@ -139,7 +139,7 @@ def pull_from_creodias( ) datasets_filtered = datasets[datasets.product_uri.isin(missing_datasets)] - # download those scenes not available in the local database from Creodias + # download those mapper not available in the local database from Creodias download_datasets( datasets=datasets_filtered, download_dir=path_out, diff --git a/eodal/operational/archive/sentinel2.py b/eodal/operational/archive/sentinel2.py index 4c7d503f..c810e83b 100644 --- a/eodal/operational/archive/sentinel2.py +++ b/eodal/operational/archive/sentinel2.py @@ -83,7 +83,7 @@ def pull_from_creodias( box extent is taken from the metadata DB based on the region identifier. :param cloud_cover_threshold: cloud cover threshold (0-100%) to be used for CREODIAS query. Defaults - to 100% (i.e., also completely cloudy scenes are downloaded). + to 100% (i.e., also completely cloudy mapper are downloaded). :param unzip: if True (default) datasets are unzipped and zip archives are deleted :param overwrite_existing_zips: @@ -143,7 +143,7 @@ def pull_from_creodias( ) datasets_filtered = datasets[datasets.product_uri.isin(missing_datasets)] - # download those scenes not available in the local database from Creodias + # download those mapper not available in the local database from Creodias download_datasets( datasets=datasets_filtered, download_dir=path_out, diff --git a/eodal/operational/cli/planet_scope.py b/eodal/operational/cli/planet_scope.py index 00bd40e6..c538e847 100644 --- a/eodal/operational/cli/planet_scope.py +++ b/eodal/operational/cli/planet_scope.py @@ -30,7 +30,7 @@ def cli_ps_scenes_ingestion( with different mount points of the NAS share. If not provided, the absolute path of the dataset is used in the database. """ - # loop over all scenes + # loop over all mapper for scene_dir in ps_raw_data_archive.iterdir(): try: diff --git a/eodal/operational/cli/sentinel2.py b/eodal/operational/cli/sentinel2.py index eb598f27..908d30ae 100644 --- a/eodal/operational/cli/sentinel2.py +++ b/eodal/operational/cli/sentinel2.py @@ -186,7 +186,7 @@ def cli_s2_creodias_update( :param processing_level: Sentinel-2 processing level (L1C or L2A) to check. :param cloud_cover_threshold: - optional cloud cover threshold to filter out to cloudy scenes as integer + optional cloud cover threshold to filter out to cloudy mapper as integer between 0 and 100%. :param path_options: optional dictionary specifying storage_device_ip, storage_device_ip_alias @@ -350,7 +350,7 @@ def cli_s2_sen2cor_update( with different mount points of the NAS share. If not provided, the absolute path of the dataset is used in the database. """ - # loop over all scenes (S2*.SAFE) + # loop over all mapper (S2*.SAFE) for scene_dir in s2_raw_data_archive.rglob("S2*.SAFE"): try: @@ -413,10 +413,10 @@ def cli_s2_scene_selection( :param out_dir: directory where to store the subset metadata CSV file and the cloud cover plot. :param cloud_cover_threshold: - optional cloud cover threshold to filter out to cloudy scenes as integer + optional cloud cover threshold to filter out to cloudy mapper as integer between 0 and 100%. :returns: - metadata of scenes + metadata of mapper """ # query metadata from database @@ -440,7 +440,7 @@ def cli_s2_scene_selection( # write out metadata of the query as CSV metadata.to_csv(out_dir.joinpath(f"{query_time}_query.csv"), index=False) - # Plot available scenes for query + # Plot available mapper for query fig = plt.figure(figsize=(15, 10), dpi=300) ax = fig.add_subplot(111) ax.plot( @@ -453,7 +453,7 @@ def cli_s2_scene_selection( ax.set_ylabel("Cloud cover [%]") ax.set_ylim(0.0, 100.0) ax.set_title( - f"Tile {tile} - No. of scenes: {metadata.shape[0]}" + f"Tile {tile} - No. of mapper: {metadata.shape[0]}" + "\n" + f"Average cloud cover: {np.round(cc_avg, 2)}%" ) diff --git a/eodal/operational/mapping/mapper.py b/eodal/operational/mapping/mapper.py index 8ba3cdeb..ede85bbc 100644 --- a/eodal/operational/mapping/mapper.py +++ b/eodal/operational/mapping/mapper.py @@ -254,11 +254,11 @@ def __delattr__(self, *args): def _get_scenes(self, sensor: str) -> None: """ - Method to query available scenes. Works sensor-agnostic but requires a + Method to query available mapper. Works sensor-agnostic but requires a sensor to be specified to select the correct metadata queries :param sensor: - name of the sensor for which to search for scenes + name of the sensor for which to search for mapper """ # prepare features aoi_features = self._prepare_features() @@ -329,7 +329,7 @@ def _get_scenes(self, sensor: str) -> None: # check if the satellite data is in different projections in_single_crs = scenes_df.epsg.unique().shape[0] == 1 - # check if there are several scenes available for a single sensing date + # check if there are several mapper available for a single sensing date # in this case merging of different datasets might be necessary scenes_df_split = identify_split_scenes(scenes_df) scenes_df["is_split"] = False @@ -339,10 +339,10 @@ def _get_scenes(self, sensor: str) -> None: scenes_df.product_uri.isin(scenes_df_split.product_uri), "is_split" ] = True - # in case the scenes have different projections (most likely different UTM + # in case the mapper have different projections (most likely different UTM # zone numbers) figure out which will be target UTM zone. To avoid too many # reprojection operations of raster data later, the target CRS is that CRS - # most scenes have (expressed as EPSG code) + # most mapper have (expressed as EPSG code) scenes_df["target_crs"] = scenes_df.epsg if not in_single_crs: most_common_epsg = scenes_df.epsg.mode().values @@ -446,7 +446,7 @@ def get_feature(self, feature_id: Any) -> Dict[str, Any]: def get_feature_scenes(self, feature_identifier: Any) -> DataFrame: """ - Returns a ``DataFrame`` with all scenes found for a + Returns a ``DataFrame`` with all mapper found for a feature in the feature collection NOTE: @@ -458,7 +458,7 @@ def get_feature_scenes(self, feature_identifier: Any) -> DataFrame: unique identifier of the aoi. Must be the same identifier used during the database query :returns: - ``DataFrame`` with all scenes found for a given + ``DataFrame`` with all mapper found for a given set of search parameters """ try: @@ -472,7 +472,7 @@ def _get_observation( """ Returns the scene data (observations) for a selected feature and date. - If for the date provided no scenes are found, the data from the scene(s) + If for the date provided no mapper are found, the data from the scene(s) closest in time is returned :param feature_id: @@ -490,7 +490,7 @@ def _get_observation( ``GeoDataFrame`` (geometry type: ``Point``) or ``Sentinel2Handler`` (geometry types ``Polygon`` or ``MultiPolygon``) is returned. if the observation contains nodata, only, None is returned. If multiple - scenes must be read to get a single observation, the status 'multiple' + mapper must be read to get a single observation, the status 'multiple' is returned. """ # define variable for returning results @@ -499,7 +499,7 @@ def _get_observation( scenes_df = self.observations.get(feature_id, None) if scenes_df is None: raise DataNotFoundError( - f'Could not find any scenes for feature with ID "{feature_id}"' + f'Could not find any mapper for feature with ID "{feature_id}"' ) # get scene(s) closest to the sensing_date provided @@ -516,7 +516,7 @@ def _get_observation( ) except Exception as e: raise DataNotFoundError( - f"Cannot find the scenes on the file system: {e}" + f"Cannot find the mapper on the file system: {e}" ) # get properties and geometry of the current feature from the collection feature_dict = self.get_feature(feature_id) @@ -525,7 +525,7 @@ def _get_observation( # parse feature geometry in kwargs so that only a spatial subset is read # in addition parse the S2 gain factor as "scale" argument kwargs.update({"vector_features": feature_gdf}) - # multiple scenes for a single date + # multiple mapper for a single date # check what to do (re-projection, merging) if scenes_date.shape[0] > 1: return ('multiple', scenes_date, feature_gdf) diff --git a/eodal/operational/mapping/sentinel1.py b/eodal/operational/mapping/sentinel1.py index 8c5ed36f..16dd3bca 100644 --- a/eodal/operational/mapping/sentinel1.py +++ b/eodal/operational/mapping/sentinel1.py @@ -18,7 +18,7 @@ class Sentinel1Mapper(Mapper): """ - Spatial scenes class for Sentinel-1 data. + Spatial mapper class for Sentinel-1 data. """ def __init__( self, @@ -44,15 +44,15 @@ def get_scenes(self) -> None: The scene selection and processing workflow contains several steps: - 1. Query the metadata catalog for **ALL** available scenes that overlap + 1. Query the metadata catalog for **ALL** available mapper that overlap the bounding box of a given ``Polygon`` or ``MultiPolygon`` feature. - 2. Check if for a single sensing date several scenes are available + 2. Check if for a single sensing date several mapper are available 3. If yes check if that's due to Sentinel-1 tiling grid design. If yes - flag these scenes as potential merge candidates. - 4. If the scenes found have different spatial coordinate systems (CRS) + flag these mapper as potential merge candidates. + 4. If the mapper found have different spatial coordinate systems (CRS) (usually different UTM zones) flag the data accordingly. The target - CRS is defined as that CRS the majority of scenes shares. + CRS is defined as that CRS the majority of mapper shares. """ self._get_scenes(sensor='sentinel1') @@ -62,7 +62,7 @@ def get_observation( """ Returns the scene data (observations) for a selected feature and date. - If for the date provided no scenes are found, the data from the scene(s) + If for the date provided no mapper are found, the data from the scene(s) closest in time is returned :param feature_id: diff --git a/eodal/operational/mapping/sentinel2.py b/eodal/operational/mapping/sentinel2.py index bccbd5c9..1133c65b 100644 --- a/eodal/operational/mapping/sentinel2.py +++ b/eodal/operational/mapping/sentinel2.py @@ -45,22 +45,22 @@ class Sentinel2Mapper(Mapper): """ - Spatial scenes class for Sentinel-2 data. + Spatial mapper class for Sentinel-2 data. :attrib processing_level: Sentinel-2 data processing level (L1C or L2A) :attrib cloud_cover_threshold: global (scene-wide) cloud coverage threshold between 0 and 100% cloud cover. Scenes with cloud coverage reported higher than the threshold are discarded. - To obtain *all* scenes in the archive use the default of 100%. + To obtain *all* mapper in the archive use the default of 100%. :attrib use_latest_pdgs_baseline: since a scene can possible occur in different PDGS baseline numbers (the scene_id and product_uri will be different, which is supported by our data model) - it is necessary to decide for a baseline in those cases where multiple scenes + it is necessary to decide for a baseline in those cases where multiple mapper from the same sensing and data take time are available (originating from the same Sentinel-2 data but processed slightly differently). By default we use - those scenes from the latest baseline. Otherwise, it is possible to use - the baseline most scenes were processed with. + those mapper from the latest baseline. Otherwise, it is possible to use + the baseline most mapper were processed with. """ def __init__( @@ -78,13 +78,13 @@ def __init__( Sentinel-2 processing level to query :param cloud_cover_threshold: cloud cover threshold in percent (0-100%). Default is 100% to - consider all scenes in the archive + consider all mapper in the archive :param use_latest_pdgs_baseline: if True (default) forces *eodal* to use the latest processing baseline in case a scene is available in different processing levels :param tile_selection: optional list of Sentinel-2 tiles (e.g., ['T32TMT','T32TGM']) to use for - filtering. Only scenes belonging to these tiles are returned then + filtering. Only mapper belonging to these tiles are returned then :param args: arguments to pass to the constructor of the ``Mapper`` super class :param kwargs: @@ -106,25 +106,25 @@ def get_scenes(self) -> None: NOTE: By passing a list of Sentinel-2 tiles you can explicitly control which Sentinel-2 tiles are considered. This might be useful for - scenes tasks where your feature collection lies completely within + mapper tasks where your feature collection lies completely within a single Sentinel-2 tile but also overlaps with neighboring tiles. The scene selection and processing workflow contains several steps: - 1. Query the metadata catalog for **ALL** available scenes that overlap + 1. Query the metadata catalog for **ALL** available mapper that overlap the bounding box of a given ``Polygon`` or ``MultiPolygon`` feature. **IMPORTANT**: By passing a list of Sentinel-2 tiles to consider (``tile_ids``) you can explicitly control which Sentinel-2 tiles are considered! - 2. Check if for a single sensing date several scenes are available + 2. Check if for a single sensing date several mapper are available 3. If yes check if that's due to Sentinel-2 data take or tiling grid - design. If yes flag these scenes as potential merge candidates. A - second reason for multiple scenes are differences in PDGS baseline, + design. If yes flag these mapper as potential merge candidates. A + second reason for multiple mapper are differences in PDGS baseline, i.e., the dataset builds upon the **same** Sentinel-2 data but was processed by different base-line version. - 4. If the scenes found have different spatial coordinate systems (CRS) + 4. If the mapper found have different spatial coordinate systems (CRS) (usually different UTM zones) flag the data accordingly. The target - CRS is defined as that CRS the majority of scenes shares. + CRS is defined as that CRS the majority of mapper shares. """ self._get_scenes(sensor='sentinel2') @@ -167,7 +167,7 @@ def _read_multiple_scenes( is available for a given sensing date and feature (area of interest) :param scenes_date: - `DataFrame` with all Sentinel-2 scenes of a single date + `DataFrame` with all Sentinel-2 mapper of a single date :param feature_id: ID of the feature for which to extract data :param kwargs: @@ -214,7 +214,7 @@ def _read_multiple_scenes( res['sensing_time'] = candidate_scene['sensing_time'] res["scene_id"] = candidate_scene["scene_id"] break - # in case of a (Multi-)Polygon: check if one of the candidate scenes complete + # in case of a (Multi-)Polygon: check if one of the candidate mapper complete # contains the feature (i.e., its bounding box). If that's the case and the # returned data is not black-filled, we can take that data set. If none of the # candidate contains the scene complete, merging and (depending on the CRS) @@ -232,7 +232,7 @@ def _read_multiple_scenes( in_dir = updated_scenes["assets"].iloc[0] else: in_dir = updated_scenes["real_path"].iloc[0] - # if there were only two input scenes we're done + # if there were only two input mapper we're done # otherwise we have to check if we have to merge data if scenes_date.shape[0] == 2: res = Sentinel2.from_safe( @@ -242,14 +242,14 @@ def _read_multiple_scenes( ) self._resample_s2_scene(s2_scene=res) return res - # if updated scenes is not empty update the scenes_date DataFrame + # if updated mapper is not empty update the scenes_date DataFrame if not updated_scenes.empty: - # drop "out-dated" scenes + # drop "out-dated" mapper appended = pd.concat([scenes_date, old_scenes]) appended.drop_duplicates(subset=['product_uri', 'tile_id'], keep=False, inplace=True) scenes_date = appended.copy() # if there is a single scene from a another tile in the - # "old" scenes append it to the scenes_date + # "old" mapper append it to the scenes_date old_scenes_grouped = old_scenes.groupby(by='tile_id') for tile_scenes in old_scenes_grouped: if tile_scenes[0] not in scenes_date.tile_id.unique(): @@ -336,7 +336,7 @@ def get_observation( """ Returns the scene data (observations) for a selected feature and date. - If for the date provided no scenes are found, the data from the scene(s) + If for the date provided no mapper are found, the data from the scene(s) closest in time is returned :param feature_id: @@ -356,7 +356,7 @@ def get_observation( # call super class method for getting the observation res = self._get_observation(feature_id=feature_id, sensing_date=sensing_date, sensor='sentinel2', **kwargs) - # for multiple scenes a Sentinel-2 specific class must be called + # for multiple mapper a Sentinel-2 specific class must be called if isinstance(res, tuple): _, scenes_date, _ = res res = self._read_multiple_scenes( @@ -371,15 +371,15 @@ def get_complete_timeseries( """ Extracts all observation with a time period for a feature collection. - This function takes the Sentinel-2 scenes retrieved from the metadata DB query + This function takes the Sentinel-2 mapper retrieved from the metadata DB query in `~Mapper.get_sentinel2_scenes` and extracts the Sentinel-2 data from the - original .SAFE archives for all available scenes. + original .SAFE archives for all available mapper. :param feature_selection: optional subset of features ids (you can only select features included in the current feature collection) :param drop_blackfilled_scenes: - drop scenes having no data values only (default) + drop mapper having no data values only (default) :param kwargs: optional key-word arguments to pass to `~eodal.core.band.Band.from_rasterio` """ @@ -396,8 +396,8 @@ def get_complete_timeseries( if feature not in feature_selection: continue - # loop over scenes, they are already ordered by date (ascending) - # and check for each date which scenes are relevant and require + # loop over mapper, they are already ordered by date (ascending) + # and check for each date which mapper are relevant and require # potential reprojection or merging sensing_dates = scenes_df.sensing_date.unique() n_sensing_dates = len(sensing_dates) @@ -433,7 +433,7 @@ def get_complete_timeseries( if isinstance(res, gpd.GeoDataFrame): assets[feature] = pd.concat(feature_res) else: - # order scenes by acquisition time + # order mapper by acquisition time timestamps = [x.scene_properties.acquisition_time for x in feature_res] sorted_indices = np.argsort(np.array(timestamps)) feature_res_ordered = [feature_res[idx] for idx in sorted_indices] diff --git a/eodal/operational/mapping/utils.py b/eodal/operational/mapping/utils.py index 09823b9f..ff0ebb9e 100644 --- a/eodal/operational/mapping/utils.py +++ b/eodal/operational/mapping/utils.py @@ -30,7 +30,7 @@ def plot_feature(feature_scenes: List[RasterCollection], band_selection: str | L feature_name: Optional[str] = '', **kwargs) -> plt.Figure: """ - Plots all scenes retrieved for a feature + Plots all mapper retrieved for a feature :param band_selection: selection of band(s) to use for plotting. Must be either a single @@ -48,12 +48,12 @@ def plot_feature(feature_scenes: List[RasterCollection], band_selection: str | L if len(band_selection) == 1: plot_multiple_bands = False - # check number of scenes in feature_scenes and determine figure size + # check number of mapper in feature_scenes and determine figure size n_scenes = len(feature_scenes) nrows = 1 ncols = 1 if n_scenes == 0: - raise ValueError('No scenes available for plotting') + raise ValueError('No mapper available for plotting') elif n_scenes == 1: f, ax = plt.subplots(**kwargs) # cast to array to allow indexing @@ -69,7 +69,7 @@ def plot_feature(feature_scenes: List[RasterCollection], band_selection: str | L ncols = max_scenes_in_row f, ax = plt.subplots(ncols=ncols, nrows=nrows, **kwargs) - # get acquisition times of the scenes if available. If not label the + # get acquisition times of the mapper if available. If not label the # plots by ascending numbers (Scene 1, Scene 2, Scene 3,...) scene_labels = [ f'{x.scene_properties.acquisition_time} {x.scene_properties.platform}' \ diff --git a/eodal/operational/resampling/sentinel2/__init__.py b/eodal/operational/resampling/sentinel2/__init__.py index 401ebb62..bf05ff8f 100644 --- a/eodal/operational/resampling/sentinel2/__init__.py +++ b/eodal/operational/resampling/sentinel2/__init__.py @@ -4,7 +4,7 @@ - resampling from 20 to 10m spatial resolution - generation of a RGB preview image per scene -- merging of split scenes (due to data take issues) +- merging of split mapper (due to data take issues) - resampling of SCL data (L2A processing level, only) and generation of a preview - generation of metadata of the processed data (links to the input datasets) @@ -62,7 +62,7 @@ def do_parallel( Returns a dict containing the file-paths to the generated datasets. :param in_df: - dataframe containing metadata of S2 scenes (must follow AgripySat convention) + dataframe containing metadata of S2 mapper (must follow AgripySat convention) :param loopcounter: Index to get actual S2 scene in loop. :param out_dir: @@ -118,7 +118,7 @@ def exec_pipeline( -> resampling from 20 to 10m spatial resolution -> generation of a RGB preview image per scene - -> merging of split scenes (due to data take issues) + -> merging of split mapper (due to data take issues) -> resampling of SCL data (L2A processing level, only) NOTE: @@ -152,12 +152,12 @@ def exec_pipeline( failed datasets in the second tuple item [1] """ - # make sub-directory for logging successfully processed scenes in out_dir + # make sub-directory for logging successfully processed mapper in out_dir log_dir = processed_data_archive.joinpath("log") if not log_dir.exists(): log_dir.mkdir(parents=True, exist_ok=True) - # query metadata DB for available Sentinel-2 scenes + # query metadata DB for available Sentinel-2 mapper try: metadata = find_raw_data_by_tile( date_start=date_start, @@ -174,13 +174,13 @@ def exec_pipeline( num_scenes = metadata.shape[0] meta_blackfill = identify_split_scenes(metadata_df=metadata) - # exclude these duplicated scenes from the main (parallelized) workflow! + # exclude these duplicated mapper from the main (parallelized) workflow! metadata = metadata[~metadata.product_uri.isin(meta_blackfill["product_uri"])] if meta_blackfill.empty: - logger.info(f"Found {num_scenes} scenes out of which 0 must be merged") + logger.info(f"Found {num_scenes} mapper out of which 0 must be merged") else: logger.info( - f"Found {num_scenes} scenes out of which {meta_blackfill.shape[0]} must be merged" + f"Found {num_scenes} mapper out of which {meta_blackfill.shape[0]} must be merged" ) t = time.time() @@ -195,11 +195,11 @@ def exec_pipeline( # concatenate the metadata of the stacked image files into a pandas dataframe bandstack_meta = pd.DataFrame(result) - # merge black-fill scenes (data take issue) if any + # merge black-fill mapper (data take issue) if any if not meta_blackfill.empty: - logger.info("Starting merging of blackfill scenes") + logger.info("Starting merging of blackfill mapper") - # after regular scene processsing, process the blackfill scenes single-threaded + # after regular scene processsing, process the blackfill mapper single-threaded for date in meta_blackfill.sensing_date.unique(): scenes = meta_blackfill[meta_blackfill.sensing_date == date] product_id_1 = scenes.product_uri.iloc[0] @@ -252,7 +252,7 @@ def exec_pipeline( # also the storage location shall be inserted into the database later # bandstack_meta['storage_share'] = target_s2_archive - logger.info("Finished merging of blackfill scenes") + logger.info("Finished merging of blackfill mapper") # check for any empty bandstack paths (indicates that something went wrong # during the processing diff --git a/eodal/operational/resampling/sentinel2/merge_blackfill.py b/eodal/operational/resampling/sentinel2/merge_blackfill.py index 58bc6d39..7ea70488 100644 --- a/eodal/operational/resampling/sentinel2/merge_blackfill.py +++ b/eodal/operational/resampling/sentinel2/merge_blackfill.py @@ -1,11 +1,11 @@ """ Sentinel-2 records data in so-called datatakes. When a datatake is over and a new begins the acquired image data is written to different files (based on the datatake -time). Sometimes, this cause scenes of a single acquisition date to be split into +time). Sometimes, this cause mapper of a single acquisition date to be split into two datasets which differ in their datatake. Thus, both datasets have a reasonable amount of blackfill in those areas not covered by the datatake they belong to. For users of satellite data, however, it is much more convenient to have those split -scenes merged into one since the division into two scenes by the datatake has +mapper merged into one since the division into two mapper by the datatake has technical reasons only. Copyright (C) 2022 Gregor Perich & Lukas Valentin Graf @@ -52,16 +52,16 @@ def merge_split_scenes( merges two Sentinel-2 datasets in .SAFE formatof the same sensing date and tile split by the datatake beginning/ end. - First, both scenes are resampled to 10m and stacked in a temporary working directory; - second, they are merged together so that the blackfill of the first scenes is replaced + First, both mapper are resampled to 10m and stacked in a temporary working directory; + second, they are merged together so that the blackfill of the first mapper is replaced by the values of the second one. SCL (if available) and previews are managed accordingly. :param scene_1: - .SAFE directory containing the first of two scenes split by the datatake beginning/ + .SAFE directory containing the first of two mapper split by the datatake beginning/ end of Sentinel-2 :param scene_2: - .SAFE directory containing the first of two scenes split by the datatake beginning/ + .SAFE directory containing the first of two mapper split by the datatake beginning/ end of Sentinel-2 :param out_dir: directory where to save the final outputs to. In this directory a temporary @@ -77,7 +77,7 @@ def merge_split_scenes( if not working_dir.exists(): working_dir.mkdir() - # save the outputs of the two scenes to different sub-directories within the working + # save the outputs of the two mapper to different sub-directories within the working # directory to avoid to override the output out_dirs = [working_dir.joinpath("1"), working_dir.joinpath("2")] for _out_dir in out_dirs: @@ -86,7 +86,7 @@ def merge_split_scenes( shutil.rmtree(_out_dir) _out_dir.mkdir() - # do the spatial resampling for the two scenes + # do the spatial resampling for the two mapper # first scene try: scene_out_1 = resample_and_stack_s2( diff --git a/eodal/operational/resampling/utils.py b/eodal/operational/resampling/utils.py index 3c7bea8c..001810e5 100644 --- a/eodal/operational/resampling/utils.py +++ b/eodal/operational/resampling/utils.py @@ -26,14 +26,14 @@ def identify_split_scenes( """ Returns entries in a pandas ``DataFrame`` retrieved from a query in eodal's metadata base that have the same sensing date. This could indicate, e.g., - that scenes have been split because of data take changes which sometimes cause - Sentinel-2 scenes to be split into two separate .SAFE archives, each of them + that mapper have been split because of data take changes which sometimes cause + Sentinel-2 mapper to be split into two separate .SAFE archives, each of them with a large amount of blackfill. :param metadata_df: - dataframe from metadata base query in which to search for scenes with + dataframe from metadata base query in which to search for mapper with the same sensing_date :return: - scenes with the same sensing date (might also be empty) + mapper with the same sensing date (might also be empty) """ return metadata_df[metadata_df.sensing_date.duplicated(keep=False)] diff --git a/examples/planet_download.py b/examples/planet_download.py index 2ea59f5b..6574553c 100644 --- a/examples/planet_download.py +++ b/examples/planet_download.py @@ -40,7 +40,7 @@ order_name = f'{date.today()}_ZH_Polygon_73129' cloud_cover = 50. -# query the data API to get available scenes (no order placement, no download!) +# query the data API to get available mapper (no order placement, no download!) # retrieves metadata, only client = PlanetAPIClient.query_planet_api( start_date=start_date, diff --git a/examples/random_sentinel2_pixels.py b/examples/random_sentinel2_pixels.py index 26458e3b..643b35b3 100644 --- a/examples/random_sentinel2_pixels.py +++ b/examples/random_sentinel2_pixels.py @@ -72,14 +72,14 @@ def get_pixels(date_start: date, date_end: date, scene_cloud_cover_threshold: in :param aois: areas of interest (1 to N) for which to extract random pixel observations """ - # setup Sentinel-2 scenes to get the relevant scenes + # setup Sentinel-2 mapper to get the relevant mapper mapper_configs = MapperConfigs( spatial_resolution=10., resampling_method=cv2.INTER_NEAREST_EXACT, band_names=['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A', 'B09', 'B11', 'B12'] ) - # get a new scenes instance + # get a new mapper instance mapper = Sentinel2Mapper( date_start=date_start, date_end=date_end, @@ -88,7 +88,7 @@ def get_pixels(date_start: date, date_end: date, scene_cloud_cover_threshold: in mapper_configs=mapper_configs, feature_collection=aois ) - # query the available scenes (spatio-temporal query in the metadata catalog) + # query the available mapper (spatio-temporal query in the metadata catalog) mapper.get_scenes() # extract the actual S2 data s2_data = mapper.get_complete_timeseries() @@ -99,9 +99,9 @@ def get_pixels(date_start: date, date_end: date, scene_cloud_cover_threshold: in # loop over features and extract scene data for idx, feature in enumerate(features): feature_id = mapper.get_feature_ids()[idx] - # scenes of the actual feature + # mapper of the actual feature feature_scenes = s2_data[feature_id] - # loop over scenes, drop non-cloudfree observations and save spectral values to GeoDataFrame + # loop over mapper, drop non-cloudfree observations and save spectral values to GeoDataFrame feature_refl_list = [] for feature_scene in feature_scenes: # drop all observations but SCL classes 4 and 5 diff --git a/examples/satellite_archive_query.py b/examples/satellite_archive_query.py index e649b593..48885d8b 100644 --- a/examples/satellite_archive_query.py +++ b/examples/satellite_archive_query.py @@ -1,6 +1,6 @@ """ sample script showing how to perform a simple metadata query to identify the -number of available scenes for a Sentinel-2 tile below a user-defined cloud +number of available mapper for a Sentinel-2 tile below a user-defined cloud cover threshold on data already downloaded and ingested into the metadata base (offline mode) diff --git a/examples/sentinel1_mapping_example.py b/examples/sentinel1_mapping_example.py index d61c3dfd..90767bab 100644 --- a/examples/sentinel1_mapping_example.py +++ b/examples/sentinel1_mapping_example.py @@ -1,12 +1,12 @@ """ -Example script to extract a time series of Sentinel-1 scenes for a +Example script to extract a time series of Sentinel-1 mapper for a custom area of interest (AOI). The script shows how to use the Sentinel1Mapper class that takes over data handling such as * querying of spatio-temporal metadata catalogs to identify - available Sentinel-1 scenes + available Sentinel-1 mapper * merging data from different Sentinel-1 tiles if required * re-projection of imagery from one UTM zone into another if required @@ -60,7 +60,7 @@ #%% executable part -# get a new scenes instance +# get a new mapper instance mapper = Sentinel1Mapper( date_start=date_start, date_end=date_end, @@ -68,7 +68,7 @@ collection=collection ) -# retrieve metadata of scenes found (no reading) +# retrieve metadata of mapper found (no reading) mapper.get_scenes() mapper.observations diff --git a/examples/sentinel2_mapping_example.py b/examples/sentinel2_mapping_example.py index 18b7baa9..c0c5fe31 100644 --- a/examples/sentinel2_mapping_example.py +++ b/examples/sentinel2_mapping_example.py @@ -1,16 +1,16 @@ """ -Example script to extract a time series of Sentinel-2 scenes for a +Example script to extract a time series of Sentinel-2 mapper for a custom area of interest (AOI). The script shows how to use the Sentinel2Mapper class that takes over data handling such as * querying of spatio-temporal metadata catalogs to identify - available Sentinel-2 scenes + available Sentinel-2 mapper * merging data from different Sentinel-2 tiles if required * re-projection of imagery from one UTM zone into another if required - * removal of black-filled scenes + * removal of black-filled mapper This script works either using local data sources or by retrieving Sentinel-2 imagery from Microsoft Planetary Computer (https://planetarycomputer.microsoft.com). @@ -65,13 +65,13 @@ aoi: Path = Path('../data/sample_polygons/lake_lucerne.gpkg') #%% executable part -# Sentinel-2 scenes configuration +# Sentinel-2 mapper configuration mapper_configs = MapperConfigs( spatial_resolution=spatial_resolution, resampling_method=resampling_method, ) -# get a new scenes instance +# get a new mapper instance mapper = Sentinel2Mapper( date_start=date_start, date_end=date_end, @@ -81,9 +81,9 @@ feature_collection=aoi ) -# retrieve metadata of scenes found (no reading) +# retrieve metadata of mapper found (no reading) mapper.get_scenes() -mapper.observations # displays scenes found +mapper.observations # displays mapper found # read data into eodal's RasterCollection objects s2_data = mapper.get_complete_timeseries() diff --git a/tests/metadata/test_stac_client.py b/tests/metadata/test_stac_client.py index f08d2cfe..34cfd50e 100644 --- a/tests/metadata/test_stac_client.py +++ b/tests/metadata/test_stac_client.py @@ -25,7 +25,7 @@ def test_mspc_sentinel1(get_polygons): vector_features=polys ) - assert not res_s1.empty, 'no scenes found' + assert not res_s1.empty, 'no mapper found' assert 'assets' in res_s1.columns, 'no assets provided' url = _url_to_safe_name(res_s1.iloc[0].assets['vh']['href']) assert 'GRDH' in url, 'GRD not found in file name' @@ -39,7 +39,7 @@ def test_mspc_sentinel1(get_polygons): collection='sentinel-1-grd' ) - assert not res_grd_s1.empty, 'no scenes found' + assert not res_grd_s1.empty, 'no mapper found' assert 'assets' in res_grd_s1.columns, 'no assets provided' url = _url_to_safe_name(res_grd_s1.iloc[0].assets['vh']['href']) assert 'GRDH' in url, 'GRD not found in file name' diff --git a/tests/operational/test_sentinel2_mapper.py b/tests/operational/test_sentinel2_mapper.py index 34cd8e39..f4187aa2 100644 --- a/tests/operational/test_sentinel2_mapper.py +++ b/tests/operational/test_sentinel2_mapper.py @@ -16,7 +16,7 @@ (date(2016,12,1), date(2017,1,31), ProcessingLevels.L2A)] ) def test_point_extraction(get_points, date_start, date_end, processing_level): - """Extraction of points from Sentinel-2 scenes""" + """Extraction of points from Sentinel-2 mapper""" points = get_points() mapping_config = MapperConfigs() @@ -29,7 +29,7 @@ def test_point_extraction(get_points, date_start, date_end, processing_level): ) assert isinstance(mapper.feature_collection, Path), 'expected a path-like object' - # query the DB to get all S2 scenes available for the points + # query the DB to get all S2 mapper available for the points mapper.get_scenes() assert isinstance(mapper.feature_collection, dict), 'expected a dict-like object' assert len(mapper.get_feature_ids()) == 12, 'wrong number of point features' @@ -73,7 +73,7 @@ def test_field_parcel_extraction(get_polygons_3, date_start, date_end, processin mapper_configs=mapping_config ) assert isinstance(mapper.feature_collection, Path), 'expected a path-like object' - # query the DB to get all S2 scenes available for the Polygon + # query the DB to get all S2 mapper available for the Polygon mapper.get_scenes() assert len(mapper.observations) == 1, 'expected a single feature' feature_id = mapper.get_feature_ids()[0] @@ -82,12 +82,12 @@ def test_field_parcel_extraction(get_polygons_3, date_start, date_end, processin assert set(obs.tile_id.unique()) == {'T32TLT', 'T31TGN', 'T32TLS'}, \ 'expected three different tiles here' # the target CRS should be 32632 (UTM Zone 32N) because the majority of the - # scenes is in that projection + # mapper is in that projection assert (obs.target_crs == 32632).all(), 'wrong target CRS' if processing_level == ProcessingLevels.L1C: assert set(obs.sensing_date.unique()) == {date(2016,12,1), date(2017,1,3)}, \ 'expected two different dates' - assert obs.is_split.all(), 'all scenes must be flagged as "split"' + assert obs.is_split.all(), 'all mapper must be flagged as "split"' # get single observation res = mapper.get_observation( From 05ce83711f22faafa6df1b23ad6bf965473abeed Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 21 Nov 2022 18:06:30 +0100 Subject: [PATCH 026/125] deleted module as content is moved to core.scene --- eodal/mapper/scene_collection.py | 6 ------ 1 file changed, 6 deletions(-) delete mode 100644 eodal/mapper/scene_collection.py diff --git a/eodal/mapper/scene_collection.py b/eodal/mapper/scene_collection.py deleted file mode 100644 index 7908a426..00000000 --- a/eodal/mapper/scene_collection.py +++ /dev/null @@ -1,6 +0,0 @@ -''' -Created on Nov 13, 2022 - -@author: graflu -''' - From fcbb032fe483917a740f5a6c7c1873474a725e44 Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 21 Nov 2022 18:06:41 +0100 Subject: [PATCH 027/125] fixed bug in __repr__() method --- eodal/core/raster.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eodal/core/raster.py b/eodal/core/raster.py index 9c2d4968..c5c7e444 100644 --- a/eodal/core/raster.py +++ b/eodal/core/raster.py @@ -322,8 +322,8 @@ def __repr__(self) -> str: return 'Empty EOdal RasterCollection' else: return f'EOdal RasterCollection\n----------------------\n' + \ - f'# Bands: {len(self)}\n Band names: {", ".join(self.band_names)}\n' + \ - f'Is band stack: {self.is_bandstacked}' + f'# Bands: {len(self)}\nBand names: {", ".join(self.band_names)}\n' + \ + f'Band aliases: {", ".join(self.band_aliases)}' @property def band_names(self) -> List[str]: From 9955c18db7e474f2e44021b378aacdb4bb3263d8 Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 21 Nov 2022 18:07:00 +0100 Subject: [PATCH 028/125] uncommented parts due to circular import problem (unresolved) --- eodal/core/scene.py | 171 ++++++++++++++++++++++---------------------- 1 file changed, 85 insertions(+), 86 deletions(-) diff --git a/eodal/core/scene.py b/eodal/core/scene.py index 4ea15964..1729ff7d 100644 --- a/eodal/core/scene.py +++ b/eodal/core/scene.py @@ -25,7 +25,6 @@ from numbers import Number from typing import Callable, List, Optional -from eodal.core.raster import RasterCollection from eodal.utils.constants import ProcessingLevels class SceneProperties(object): @@ -154,88 +153,88 @@ def mode(self, value: str) -> None: self._mode = value -class SceneCollection(MutableMapping): - """ - Collection of 0:N scenes where each scene is a RasterCollection with - **non-empty** `SceneProperties` as each scene is indexed by its - acquistion time. - """ - def __init__( - self, - scene_constructor: Optional[Callable[..., RasterCollection]] = None, - *args, - **kwargs - ): - """ - Initializes a SceneCollection object with 0 to N scenes. - - :param scene_constructor: - optional callable returning an `~eodal.core.raster.RasterCollection` - instance. - :param args: - arguments to pass to `scene_constructor` or one of RasterCollection's - class methods (e.g., `RasterCollection.from_multi_band_raster`) - :param kwargs: - key-word arguments to pass to `scene_constructor` or one of RasterCollection's - class methods (e.g., `RasterCollection.from_multi_band_raster`) - """ - # mapper are stored in a dictionary like collection - self._frozen = False - self.collection = dict() - self._frozen = True - - if scene_constructor is not None: - scene = scene_constructor.__call__(*args, **kwargs) - if not isinstance(scene, RasterCollection): - raise TypeError('Only RasterCollection objects can be passed') - self.__setitem__(scene) - - def __getitem__(self, key: str) -> RasterCollection: - return self.collection[key] - - def __setitem__(self, item: RasterCollection): - if not isinstance(item, RasterCollection): - raise TypeError("Only RasterCollection objects can be passed") - key = item.scene_properties.acquisition_time - if key in self.collection.keys(): - raise KeyError("Duplicate scene names are not permitted") - if key is None: - raise ValueError("RasterCollection passed must have an acquistion time stamp") - value = item.copy() - self.collection[key] = value - - def __delitem__(self, key: str): - del self.collection[key] - - def __iter__(self): - for k, v in self.collection.items(): - yield k, v - - def __len__(self) -> int: - return len(self.collection) - - def __repr__(self) -> str: - pass - - @property - def scene_names(self) -> List[str]: - """scene names in collection""" - return list(self.collection.keys()) - - def apply(self, func: Callable): - pass - - def dump(self): - pass - - def filter(self): - pass - - def load(self): - pass - - def plot(self): - pass - - def to_xarray(self): - pass +# class SceneCollection(MutableMapping): +# """ +# Collection of 0:N scenes where each scene is a RasterCollection with +# **non-empty** `SceneProperties` as each scene is indexed by its +# acquistion time. +# """ +# def __init__( +# self, +# scene_constructor: Optional[Callable[..., RasterCollection]] = None, +# *args, +# **kwargs +# ): +# """ +# Initializes a SceneCollection object with 0 to N scenes. +# +# :param scene_constructor: +# optional callable returning an `~eodal.core.raster.RasterCollection` +# instance. +# :param args: +# arguments to pass to `scene_constructor` or one of RasterCollection's +# class methods (e.g., `RasterCollection.from_multi_band_raster`) +# :param kwargs: +# key-word arguments to pass to `scene_constructor` or one of RasterCollection's +# class methods (e.g., `RasterCollection.from_multi_band_raster`) +# """ +# # mapper are stored in a dictionary like collection +# self._frozen = False +# self.collection = dict() +# self._frozen = True +# +# if scene_constructor is not None: +# scene = scene_constructor.__call__(*args, **kwargs) +# if not isinstance(scene, RasterCollection): +# raise TypeError('Only RasterCollection objects can be passed') +# self.__setitem__(scene) +# +# def __getitem__(self, key: str) -> RasterCollection: +# return self.collection[key] +# +# def __setitem__(self, item: RasterCollection): +# if not isinstance(item, RasterCollection): +# raise TypeError("Only RasterCollection objects can be passed") +# key = item.scene_properties.acquisition_time +# if key in self.collection.keys(): +# raise KeyError("Duplicate scene names are not permitted") +# if key is None: +# raise ValueError("RasterCollection passed must have an acquistion time stamp") +# value = item.copy() +# self.collection[key] = value +# +# def __delitem__(self, key: str): +# del self.collection[key] +# +# def __iter__(self): +# for k, v in self.collection.items(): +# yield k, v +# +# def __len__(self) -> int: +# return len(self.collection) +# +# def __repr__(self) -> str: +# pass +# +# @property +# def scene_names(self) -> List[str]: +# """scene names in collection""" +# return list(self.collection.keys()) +# +# def apply(self, func: Callable): +# pass +# +# def dump(self): +# pass +# +# def filter(self): +# pass +# +# def load(self): +# pass +# +# def plot(self): +# pass +# +# def to_xarray(self): +# pass From 575d62de9f5d48bfff8917ac4967f9bc4b9a1eb3 Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 22 Nov 2022 09:06:08 +0100 Subject: [PATCH 029/125] fixed import errors --- eodal/core/raster.py | 127 ++++++++++++- eodal/core/scene.py | 268 +++++++--------------------- tests/core/test_scene_collection.py | 13 ++ 3 files changed, 208 insertions(+), 200 deletions(-) create mode 100644 tests/core/test_scene_collection.py diff --git a/eodal/core/raster.py b/eodal/core/raster.py index c5c7e444..b9d55933 100644 --- a/eodal/core/raster.py +++ b/eodal/core/raster.py @@ -68,6 +68,7 @@ along with this program. If not, see . """ +import datetime import geopandas as gpd import matplotlib.pyplot as plt import matplotlib.ticker as ticker @@ -94,10 +95,134 @@ from eodal.core.band import Band from eodal.core.operators import Operator -from eodal.core.scene import SceneProperties from eodal.core.spectral_indices import SpectralIndices +from eodal.utils.constants import ProcessingLevels from eodal.utils.decorators import check_band_names +class SceneProperties(object): + """ + A class for storing scene-relevant properties + + :attribute acquisition_time: + image acquisition time + :attribute platform: + name of the imaging platform + :attribute sensor: + name of the imaging sensor + :attribute processing_level: + processing level of the remotely sensed data (if + known and applicable) + :attribute product_uri: + unique product (scene) identifier + :attribute mode: + imaging mode of SAR sensors + """ + + def __init__( + self, + acquisition_time: Optional[datetime.datetime] = datetime.datetime(2999, 1, 1), + platform: Optional[str] = "", + sensor: Optional[str] = "", + processing_level: Optional[ProcessingLevels] = ProcessingLevels.UNKNOWN, + product_uri: Optional[str] = "", + mode: Optional[str] = "" + ): + """ + Class constructor + + :param acquisition_time: + image acquisition time + :param platform: + name of the imaging platform + :param sensor: + name of the imaging sensor + :param processing_level: + processing level of the remotely sensed data (if + known and applicable) + :param product_uri: + unique product (scene) identifier + :attribute mode: + imaging mode of SAR sensors + """ + + self.acquisition_time = acquisition_time + self.platform = platform + self.sensor = sensor + self.processing_level = processing_level + self.product_uri = product_uri + self.mode = mode + + def __repr__(self) -> str: + return str(self.__dict__) + + @property + def acquisition_time(self) -> datetime.datetime: + """acquisition time of the scene""" + return self._acquisition_time + + @acquisition_time.setter + def acquisition_time(self, time: datetime.datetime) -> None: + """acquisition time of the scene""" + if not isinstance(time, datetime.datetime): + raise TypeError("Expected a datetime.datetime object") + self._acquisition_time = time + + @property + def platform(self) -> str: + """name of the imaging platform""" + return self._platform + + @platform.setter + def platform(self, value: str) -> None: + """name of the imaging plaform""" + if not isinstance(value, str): + raise TypeError("Expected a str object") + self._platform = value + + @property + def sensor(self) -> str: + """name of the sensor""" + return self._sensor + + @sensor.setter + def sensor(self, value: str) -> None: + """name of the sensor""" + if not isinstance(value, str): + raise TypeError("Expected a str object") + self._sensor = value + + @property + def processing_level(self) -> ProcessingLevels: + """current processing level""" + return self._processing_level + + @processing_level.setter + def processing_level(self, value: ProcessingLevels): + """current processing level""" + self._processing_level = value + + @property + def product_uri(self) -> str: + """unique product (scene) identifier""" + return self._product_uri + + @product_uri.setter + def product_uri(self, value: str) -> None: + """unique product (scene) identifier""" + if not isinstance(value, str): + raise TypeError("Expected a str object") + self._product_uri = value + + @property + def mode(self) -> str: + """imaging mode of SAR sensors""" + return self._mode + + @mode.setter + def mode(self, value: str) -> None: + if not isinstance(value, str): + raise TypeError("Expected a str object") + self._mode = value class RasterOperator(Operator): """ diff --git a/eodal/core/scene.py b/eodal/core/scene.py index 1729ff7d..7df8b8c6 100644 --- a/eodal/core/scene.py +++ b/eodal/core/scene.py @@ -18,223 +18,93 @@ along with this program. If not, see . """ -import datetime -import numpy as np - from collections.abc import MutableMapping -from numbers import Number from typing import Callable, List, Optional -from eodal.utils.constants import ProcessingLevels +import eodal.core.raster as raster -class SceneProperties(object): +class SceneCollection(MutableMapping): """ - A class for storing scene-relevant properties - - :attribute acquisition_time: - image acquisition time - :attribute platform: - name of the imaging platform - :attribute sensor: - name of the imaging sensor - :attribute processing_level: - processing level of the remotely sensed data (if - known and applicable) - :attribute product_uri: - unique product (scene) identifier - :attribute mode: - imaging mode of SAR sensors + Collection of 0:N scenes where each scene is a RasterCollection with + **non-empty** `SceneProperties` as each scene is indexed by its + acquistion time. """ - def __init__( self, - acquisition_time: Optional[datetime.datetime] = datetime.datetime(2999, 1, 1), - platform: Optional[str] = "", - sensor: Optional[str] = "", - processing_level: Optional[ProcessingLevels] = ProcessingLevels.UNKNOWN, - product_uri: Optional[str] = "", - mode: Optional[str] = "" + scene_constructor: Optional[Callable[..., raster.RasterCollection]] = None, + *args, + **kwargs ): """ - Class constructor - - :param acquisition_time: - image acquisition time - :param platform: - name of the imaging platform - :param sensor: - name of the imaging sensor - :param processing_level: - processing level of the remotely sensed data (if - known and applicable) - :param product_uri: - unique product (scene) identifier - :attribute mode: - imaging mode of SAR sensors + Initializes a SceneCollection object with 0 to N scenes. + + :param scene_constructor: + optional callable returning an `~eodal.core.raster.RasterCollection` + instance. + :param args: + arguments to pass to `scene_constructor` or one of RasterCollection's + class methods (e.g., `RasterCollection.from_multi_band_raster`) + :param kwargs: + key-word arguments to pass to `scene_constructor` or one of RasterCollection's + class methods (e.g., `RasterCollection.from_multi_band_raster`) """ - - self.acquisition_time = acquisition_time - self.platform = platform - self.sensor = sensor - self.processing_level = processing_level - self.product_uri = product_uri - self.mode = mode + # mapper are stored in a dictionary like collection + self._frozen = False + self.collection = dict() + self._frozen = True + + if scene_constructor is not None: + scene = scene_constructor.__call__(*args, **kwargs) + if not isinstance(scene, raster.RasterCollection): + raise TypeError('Only RasterCollection objects can be passed') + self.__setitem__(scene) + + def __getitem__(self, key: str) -> raster.RasterCollection: + return self.collection[key] + + def __setitem__(self, item: raster.RasterCollection): + if not isinstance(item, raster.RasterCollection): + raise TypeError("Only RasterCollection objects can be passed") + key = item.scene_properties.acquisition_time + if key in self.collection.keys(): + raise KeyError("Duplicate scene names are not permitted") + if key is None: + raise ValueError("RasterCollection passed must have an acquistion time stamp") + value = item.copy() + self.collection[key] = value + + def __delitem__(self, key: str): + del self.collection[key] + + def __iter__(self): + for k, v in self.collection.items(): + yield k, v + + def __len__(self) -> int: + return len(self.collection) def __repr__(self) -> str: - return str(self.__dict__) - - @property - def acquisition_time(self) -> datetime.datetime: - """acquisition time of the scene""" - return self._acquisition_time - - @acquisition_time.setter - def acquisition_time(self, time: datetime.datetime) -> None: - """acquisition time of the scene""" - if not isinstance(time, datetime.datetime): - raise TypeError("Expected a datetime.datetime object") - self._acquisition_time = time + return '' @property - def platform(self) -> str: - """name of the imaging platform""" - return self._platform + def scene_names(self) -> List[str]: + """scene names in collection""" + return list(self.collection.keys()) - @platform.setter - def platform(self, value: str) -> None: - """name of the imaging plaform""" - if not isinstance(value, str): - raise TypeError("Expected a str object") - self._platform = value + def apply(self, func: Callable): + pass - @property - def sensor(self) -> str: - """name of the sensor""" - return self._sensor - - @sensor.setter - def sensor(self, value: str) -> None: - """name of the sensor""" - if not isinstance(value, str): - raise TypeError("Expected a str object") - self._sensor = value - - @property - def processing_level(self) -> ProcessingLevels: - """current processing level""" - return self._processing_level + def dump(self): + pass - @processing_level.setter - def processing_level(self, value: ProcessingLevels): - """current processing level""" - self._processing_level = value + def filter(self): + pass - @property - def product_uri(self) -> str: - """unique product (scene) identifier""" - return self._product_uri + def load(self): + pass - @product_uri.setter - def product_uri(self, value: str) -> None: - """unique product (scene) identifier""" - if not isinstance(value, str): - raise TypeError("Expected a str object") - self._product_uri = value + def plot(self): + pass - @property - def mode(self) -> str: - """imaging mode of SAR sensors""" - return self._mode - - @mode.setter - def mode(self, value: str) -> None: - if not isinstance(value, str): - raise TypeError("Expected a str object") - self._mode = value - - -# class SceneCollection(MutableMapping): -# """ -# Collection of 0:N scenes where each scene is a RasterCollection with -# **non-empty** `SceneProperties` as each scene is indexed by its -# acquistion time. -# """ -# def __init__( -# self, -# scene_constructor: Optional[Callable[..., RasterCollection]] = None, -# *args, -# **kwargs -# ): -# """ -# Initializes a SceneCollection object with 0 to N scenes. -# -# :param scene_constructor: -# optional callable returning an `~eodal.core.raster.RasterCollection` -# instance. -# :param args: -# arguments to pass to `scene_constructor` or one of RasterCollection's -# class methods (e.g., `RasterCollection.from_multi_band_raster`) -# :param kwargs: -# key-word arguments to pass to `scene_constructor` or one of RasterCollection's -# class methods (e.g., `RasterCollection.from_multi_band_raster`) -# """ -# # mapper are stored in a dictionary like collection -# self._frozen = False -# self.collection = dict() -# self._frozen = True -# -# if scene_constructor is not None: -# scene = scene_constructor.__call__(*args, **kwargs) -# if not isinstance(scene, RasterCollection): -# raise TypeError('Only RasterCollection objects can be passed') -# self.__setitem__(scene) -# -# def __getitem__(self, key: str) -> RasterCollection: -# return self.collection[key] -# -# def __setitem__(self, item: RasterCollection): -# if not isinstance(item, RasterCollection): -# raise TypeError("Only RasterCollection objects can be passed") -# key = item.scene_properties.acquisition_time -# if key in self.collection.keys(): -# raise KeyError("Duplicate scene names are not permitted") -# if key is None: -# raise ValueError("RasterCollection passed must have an acquistion time stamp") -# value = item.copy() -# self.collection[key] = value -# -# def __delitem__(self, key: str): -# del self.collection[key] -# -# def __iter__(self): -# for k, v in self.collection.items(): -# yield k, v -# -# def __len__(self) -> int: -# return len(self.collection) -# -# def __repr__(self) -> str: -# pass -# -# @property -# def scene_names(self) -> List[str]: -# """scene names in collection""" -# return list(self.collection.keys()) -# -# def apply(self, func: Callable): -# pass -# -# def dump(self): -# pass -# -# def filter(self): -# pass -# -# def load(self): -# pass -# -# def plot(self): -# pass -# -# def to_xarray(self): -# pass + def to_xarray(self): + pass diff --git a/tests/core/test_scene_collection.py b/tests/core/test_scene_collection.py new file mode 100644 index 00000000..14840d94 --- /dev/null +++ b/tests/core/test_scene_collection.py @@ -0,0 +1,13 @@ +''' +Created on Nov 22, 2022 + +@author: graflu +''' + +import pytest + +from eodal.core.scene import SceneCollection + +def test_scene_collection(): + + sc = SceneCollection() From 611614e9959749c29d8dc82d043221bb4b368571 Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 23 Nov 2022 17:14:58 +0100 Subject: [PATCH 030/125] adding Exception when band was not found in collection --- eodal/utils/exceptions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/eodal/utils/exceptions.py b/eodal/utils/exceptions.py index 4ccccb8e..643bbfc6 100644 --- a/eodal/utils/exceptions.py +++ b/eodal/utils/exceptions.py @@ -79,3 +79,5 @@ class APIError(Exception): class AuthenticationError(Exception): pass +class SceneNotFoundError(Exception): + pass From 39a0858119a1dd58be628cb07ce91abce7b83798 Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 23 Nov 2022 17:16:04 +0100 Subject: [PATCH 031/125] implementing slicing RasterCollection by band names/ aliases --- eodal/core/raster.py | 64 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 4 deletions(-) diff --git a/eodal/core/raster.py b/eodal/core/raster.py index b9d55933..dbd9754c 100644 --- a/eodal/core/raster.py +++ b/eodal/core/raster.py @@ -98,6 +98,7 @@ from eodal.core.spectral_indices import SpectralIndices from eodal.utils.constants import ProcessingLevels from eodal.utils.decorators import check_band_names +from eodal.utils.exceptions import BandNotFoundError class SceneProperties(object): """ @@ -384,14 +385,69 @@ class methods (`Band.from_rasterio`, `Band.from_vector`) self._band_aliases.append(band.band_alias) self.__setitem__(band) - def __getitem__(self, key: str) -> Band: - # check for band alias if any - if self.has_band_aliases: + def __getitem__(self, key: str | slice) -> Band: + + def _get_band_from_key(key: str) -> Band: + """ + helper function returning a Band object identified + by its name from a RasterCollection + """ if key not in self.band_names: if key in self.band_aliases: band_idx = self.band_aliases.index(key) key = self.band_names[band_idx] - return self.collection[key] + return self.collection[key] + + # has a single key or slice been passed? + if isinstance(key, str): + try: + return _get_band_from_key(key=key) + except IndexError: + raise BandNotFoundError(f'Could not find band {key}') + + elif isinstance(key, slice): + # find the index of the start and the end of the slice + slice_start = key.start + slice_end = key.stop + # return an empty RasterCollection if start and stop is the same + # (numpy array behavior) + if slice_start is None and slice_end is None: + return RasterCollection() + # if start is None use the first band name or its alias + if slice_start is None: + if slice_end in self.band_names: + slice_start = self.band_names[0] + elif slice_end in self.band_aliases: + slice_start = self.band_aliases[0] + # if end is None use the last band name or its alias + end_increment = 0 + if slice_end is None: + if slice_start in self.band_names: + slice_end = self.band_names[-1] + elif slice_start in self.band_aliases: + slice_end = self.band_aliases[-1] + # to ensure that the :: operator works, we need to make + # sure the last band is also included in the slice + end_increment = 1 + + if set([slice_start, slice_end]).issubset(set(self.band_names)): + idx_start = self.band_names.index(slice_start) + idx_end = self.band_names.index(slice_end) + end_increment + bands = self.band_names + elif set([slice_start, slice_end]).issubset(set(self.band_aliases)): + idx_start = self.band_aliases.index(slice_start) + idx_end = self.band_aliases.index(slice_end) + end_increment + bands = self.band_aliases + else: + raise BandNotFoundError(f'Could not find bands in {key}') + slice_step = key.step + if slice_step is None: + slice_step = 1 + # get an empty RasterCollection for returing the slide + out_raster = RasterCollection() + for idx in range(idx_start, idx_end, slice_step): + out_raster.add_band(_get_band_from_key(key=bands[idx])) + return out_raster def __setitem__(self, item: Band): if not isinstance(item, Band): From 0a2ce1374ee62c7d2eb57405f5d3fd055dae7179 Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 23 Nov 2022 17:16:16 +0100 Subject: [PATCH 032/125] adding tests for slicing RasterCollections --- tests/core/test_raster_slicing.py | 60 +++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 tests/core/test_raster_slicing.py diff --git a/tests/core/test_raster_slicing.py b/tests/core/test_raster_slicing.py new file mode 100644 index 00000000..09fa5b42 --- /dev/null +++ b/tests/core/test_raster_slicing.py @@ -0,0 +1,60 @@ +''' +Tests for slicing RasterCollections in a numpy style manner +''' + +import pytest + +from eodal.core.band import Band +from eodal.core.raster import RasterCollection + +def test_raster_slice(get_bandstack): + """test slicing of RasterCollections to get a subset""" + + fpath_raster = get_bandstack() + color_names = ['blue', 'green', 'red', 'red_edge1', 'red_edge2', 'red_edge3', \ + 'nir1', 'nir2', 'swir1', 'swir2'] + ds = RasterCollection.from_multi_band_raster( + fpath_raster=fpath_raster, + band_aliases=color_names + ) + + # slicing using band names + ds_sliced = ds['B02':'B04'] + assert len(ds_sliced) == 2, 'wrong number of bands returned from slice' + assert ds_sliced.band_names == ['B02','B03'], 'wrong bands returned from slice' + + ds_sliced = ds['B03':'B8A'] + assert len(ds_sliced) == 6, 'wrong number of bands returned from slice' + assert ds_sliced.band_names == ['B03', 'B04', 'B05', 'B06', 'B07', 'B08'], \ + 'wrong bands returned from slice' + + # slicing using aliases + ds_sliced_aliases = ds['blue':'red'] + assert len(ds_sliced_aliases) == 2, 'wrong number of bands returned from slice' + assert ds_sliced_aliases.band_names == ['B02','B03'], 'wrong bands returned from slice' + + # slicing using same start and stop -> should return an empty collection + assert ds['B04':'B04'].empty, 'expected an empty RasterCollection' + + # slicing using reverse order -> should return an empty collection + assert ds['B08':'B03'].empty, 'expected an empty RasterCollection' + + # slices with open bounds + slice_open_end = ds['B02':] + assert len(slice_open_end) == 10, 'wrong number of bands returned' + assert slice_open_end.band_names == ds.band_names, 'messed up band names' + + slice_open_start = ds[:'B05'] + assert len(slice_open_start) == 3, 'wrong number of bands returned' + assert slice_open_start.band_names == ['B02', 'B03', 'B04'], 'messed up band names' + + slice_with_stride = ds['B03':'B8A':2] + assert len(slice_with_stride) == 3, 'wrong number of bands returned' + assert slice_with_stride.band_names == ['B03', 'B05', 'B07'], 'messed up band names' + + single_band = ds['B04'] + assert isinstance(single_band, Band), 'expected a band object' + + single_band = ds['red'] + assert isinstance(single_band, Band), 'expected a band object' + \ No newline at end of file From 883dda67480cc87448e42eefe1f7b83026ccb123 Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 23 Nov 2022 17:16:37 +0100 Subject: [PATCH 033/125] preparing indexing of scenes in SceneCollection --- eodal/core/scene.py | 45 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/eodal/core/scene.py b/eodal/core/scene.py index 7df8b8c6..6cd8b1aa 100644 --- a/eodal/core/scene.py +++ b/eodal/core/scene.py @@ -1,6 +1,7 @@ """ -A scene is a collection of raster bands with an acquisition date, an unique identifier -and a (remote sensing) platform that acquired the raster data. +A SceneCollection is a collection of scenes. A Scene is a RasterCollections with an +acquisition date, an unique identifier and a (remote sensing) platform that acquired +the raster data. Copyright (C) 2022 Lukas Valentin Graf @@ -22,6 +23,7 @@ from typing import Callable, List, Optional import eodal.core.raster as raster +from eodal.utils.exceptions import SceneNotFoundError class SceneCollection(MutableMapping): """ @@ -53,23 +55,32 @@ class methods (e.g., `RasterCollection.from_multi_band_raster`) self.collection = dict() self._frozen = True + self._identifiers = [] if scene_constructor is not None: scene = scene_constructor.__call__(*args, **kwargs) + self._identifiers.append(scene.scene_properties.scene_id) if not isinstance(scene, raster.RasterCollection): raise TypeError('Only RasterCollection objects can be passed') self.__setitem__(scene) def __getitem__(self, key: str) -> raster.RasterCollection: - return self.collection[key] + if key in self.timestamps: + return self.collection[key] + elif key in self.idenfiers: + scene_idx = self.idenfiers.index(key) + return self.collection[self.timestamps[scene_idx]] + else: + raise SceneNotFoundError(f'Could not find a scene for key {key} in collection') def __setitem__(self, item: raster.RasterCollection): if not isinstance(item, raster.RasterCollection): raise TypeError("Only RasterCollection objects can be passed") + # scenes are index by their acquisition time key = item.scene_properties.acquisition_time if key in self.collection.keys(): raise KeyError("Duplicate scene names are not permitted") if key is None: - raise ValueError("RasterCollection passed must have an acquistion time stamp") + raise ValueError("RasterCollection passed must have an acquisition time stamp") value = item.copy() self.collection[key] = value @@ -84,20 +95,38 @@ def __len__(self) -> int: return len(self.collection) def __repr__(self) -> str: - return '' + if self.empty: + return 'Empty EOdal SceneCollection' + else: + return f'EOdal SceneCollection\n----------------------\n' + \ + f'# Scenes: {len(self)}\nTimestamps: {", ".join(self.timestamps)}\n' + \ + f'Scene Identifiers: {", ".join(self.band_aliases)}' @property - def scene_names(self) -> List[str]: - """scene names in collection""" + def empty(self) -> bool: + """Scene Collection is empty""" + return len(self) > 0 + + @property + def timestamps(self) -> List[str]: + """acquisition timestamps of scenes in collection""" return list(self.collection.keys()) + @property + def idenfiers(self) -> List[str]: + """list of scene identifiers""" + return self._identifiers + + def add_scene(self): + pass + def apply(self, func: Callable): pass def dump(self): pass - def filter(self): + def get_pixels(self): pass def load(self): From 18baba4fcfe24326b7fbad319a8b7f55c18c85ba Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 23 Nov 2022 17:16:59 +0100 Subject: [PATCH 034/125] updated CHANGELOG with information about slicing --- CHANGELOG.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 52f47c27..f398fbd9 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -17,6 +17,7 @@ Release date: YYYY-MM-DD - Added: RasterCollection objects are now iterable (iterate over bands in collection) - Added: RasterCollection now have a "apply" method allowing to pass custom functions to RasterCollection objects +- Added: RasterCollection now supports numpy-array like slicing using band names or band aliases Version `0.0.1 < https://github.com/EOA-team/eodal/releases/tag/v0.0.1>`__ From 9ca16dd081db80e04ebbf94b058ff7727fb846a5 Mon Sep 17 00:00:00 2001 From: lukas Date: Thu, 24 Nov 2022 13:43:13 +0100 Subject: [PATCH 035/125] testing iterating over a slice of bands --- tests/core/test_raster_iterator.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/core/test_raster_iterator.py b/tests/core/test_raster_iterator.py index 2fe5bd49..f33e94b2 100644 --- a/tests/core/test_raster_iterator.py +++ b/tests/core/test_raster_iterator.py @@ -26,3 +26,10 @@ def test_raster_iterator(get_bandstack): idx += 1 assert idx == len(ds.band_names), 'iterator did not cover all bands' + + # test iterating over a slice of a RasterCollection + idx = 0 + for band_name, band_obj in ds['B03':'B05']: + assert isinstance(band_obj, Band), 'no band object returned' + idx += 1 + assert idx == len(ds['B03':'B05']), 'iterator did not cover all bands in slice' From 1f2ba50954471f8715eeace9020d000b1eeacca0 Mon Sep 17 00:00:00 2001 From: lukas Date: Thu, 24 Nov 2022 14:18:35 +0100 Subject: [PATCH 036/125] adding is_scene attribute to RasterCollection --- eodal/core/raster.py | 99 +++++++++++++++++++++++++++----------------- 1 file changed, 61 insertions(+), 38 deletions(-) diff --git a/eodal/core/raster.py b/eodal/core/raster.py index dbd9754c..ad6895bb 100644 --- a/eodal/core/raster.py +++ b/eodal/core/raster.py @@ -121,18 +121,19 @@ class SceneProperties(object): def __init__( self, - acquisition_time: Optional[datetime.datetime] = datetime.datetime(2999, 1, 1), - platform: Optional[str] = "", - sensor: Optional[str] = "", + acquisition_time: Optional[datetime.datetime | Number] = None, + platform: Optional[str] = None, + sensor: Optional[str] = None, processing_level: Optional[ProcessingLevels] = ProcessingLevels.UNKNOWN, - product_uri: Optional[str] = "", - mode: Optional[str] = "" + product_uri: Optional[str] = None, + mode: Optional[str] = None ): """ Class constructor :param acquisition_time: - image acquisition time + image acquisition time. Can be a timestamp or any kind of numeric + index. :param platform: name of the imaging platform :param sensor: @@ -162,35 +163,39 @@ def acquisition_time(self) -> datetime.datetime: return self._acquisition_time @acquisition_time.setter - def acquisition_time(self, time: datetime.datetime) -> None: + def acquisition_time(self, time: datetime.datetime | None) -> None: """acquisition time of the scene""" - if not isinstance(time, datetime.datetime): - raise TypeError("Expected a datetime.datetime object") - self._acquisition_time = time + if time is not None: + if not isinstance(time, datetime.datetime) and \ + not isinstance(time, Number): + raise TypeError("Expected a datetime.datetime or Number object") + self._acquisition_time = time @property - def platform(self) -> str: + def platform(self) -> str | None: """name of the imaging platform""" return self._platform @platform.setter - def platform(self, value: str) -> None: + def platform(self, value: str | None) -> None: """name of the imaging plaform""" - if not isinstance(value, str): - raise TypeError("Expected a str object") - self._platform = value + if value is not None: + if not isinstance(value, str): + raise TypeError("Expected a str object") + self._platform = value @property - def sensor(self) -> str: + def sensor(self) -> str | None: """name of the sensor""" return self._sensor @sensor.setter - def sensor(self, value: str) -> None: + def sensor(self, value: str | None) -> None: """name of the sensor""" - if not isinstance(value, str): - raise TypeError("Expected a str object") - self._sensor = value + if value is not None: + if not isinstance(value, str): + raise TypeError("Expected a str object") + self._sensor = value @property def processing_level(self) -> ProcessingLevels: @@ -198,32 +203,45 @@ def processing_level(self) -> ProcessingLevels: return self._processing_level @processing_level.setter - def processing_level(self, value: ProcessingLevels): - """current processing level""" - self._processing_level = value + def processing_level(self, value: ProcessingLevels | None) -> None: + if value is not None: + if not isinstance(value, ProcessingLevels): + raise TypeError('Expected a proper ProcessingLevels object') + self._processing_level = value @property - def product_uri(self) -> str: + def product_uri(self) -> str | None: """unique product (scene) identifier""" return self._product_uri @product_uri.setter - def product_uri(self, value: str) -> None: + def product_uri(self, value: str | None) -> None: """unique product (scene) identifier""" - if not isinstance(value, str): - raise TypeError("Expected a str object") - self._product_uri = value + if value is not None: + if not isinstance(value, str): + raise TypeError("Expected a str object") + self._product_uri = value @property - def mode(self) -> str: + def mode(self) -> str | None: """imaging mode of SAR sensors""" return self._mode @mode.setter - def mode(self, value: str) -> None: - if not isinstance(value, str): - raise TypeError("Expected a str object") - self._mode = value + def mode(self, value: str | None) -> None: + if value is not None: + if not isinstance(value, str): + raise TypeError("Expected a str object") + self._mode = value + + def are_populated(self) -> bool: + """ + returns a Boolean flag indicating if the class attributes + have been populated with actual data or still equal defaults. + + A scene must have at least a time stamp. + """ + return hasattr(self, 'acquisition_time') class RasterOperator(Operator): """ @@ -521,11 +539,6 @@ def empty(self) -> bool: """Handler has bands loaded""" return len(self.collection) == 0 - @property - def has_band_aliases(self) -> bool: - """collection supports aliasing""" - return len(self.band_aliases) > 0 - @property def collection(self) -> MutableMapping: """collection of the bands currently loaded""" @@ -541,6 +554,16 @@ def collection(self, value): if not self._frozen: self._collection = value + @property + def has_band_aliases(self) -> bool: + """collection supports aliasing""" + return len(self.band_aliases) > 0 + + @property + def is_scene(self) -> bool: + """is the RasterCollection a scene""" + return self.scene_properties.are_populated() + @check_band_names def get_band_alias(self, band_name: str) -> Union[Dict[str, str], None]: """ From a71186ffc2f9527f83a775bb5b04278f4a472708 Mon Sep 17 00:00:00 2001 From: lukas Date: Thu, 24 Nov 2022 14:18:50 +0100 Subject: [PATCH 037/125] adding tests for the is_scene attribute of RasterCollections --- tests/core/test_raster_scene.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 tests/core/test_raster_scene.py diff --git a/tests/core/test_raster_scene.py b/tests/core/test_raster_scene.py new file mode 100644 index 00000000..6ea76c0f --- /dev/null +++ b/tests/core/test_raster_scene.py @@ -0,0 +1,23 @@ +''' +Created on Nov 24, 2022 + +@author: graflu +''' + +import pytest + +from eodal.core.band import Band +from eodal.core.raster import RasterCollection + +def test_raster_is_scene(get_bandstack): + """test the is_scene attribute of RasterCollections""" + + fpath_raster = get_bandstack() + ds = RasterCollection.from_multi_band_raster( + fpath_raster=fpath_raster + ) + assert not ds.is_scene, 'scene metadata have not been set, so it is not a scene' + + ds.scene_properties.acquisition_time = 2000 + ds.scene_properties.platform = 'test' + assert ds.is_scene, 'scene metadata have been set, so it is a scene' From e040fc9749004344d339e80be08220b97e93b238 Mon Sep 17 00:00:00 2001 From: lukas Date: Thu, 24 Nov 2022 17:11:32 +0100 Subject: [PATCH 038/125] updated function call --- tests/conftest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index bef9ac26..facad437 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -72,7 +72,7 @@ def _get_s2_safe_l2a(): fd.write(chunk) # unzip dataset - unzip_datasets(download_dir=testdata_dir) + unzip_datasets(download_dir=testdata_dir, platform='S2') return testdata_fname @@ -106,7 +106,7 @@ def _get_s2_safe_l1c(): fd.write(chunk) # unzip dataset - unzip_datasets(download_dir=testdata_dir) + unzip_datasets(download_dir=testdata_dir, platform='S2') return testdata_fname From 3ba2e6f81f410c042197e032b67b94e6ed607860 Mon Sep 17 00:00:00 2001 From: lukas Date: Thu, 24 Nov 2022 17:12:17 +0100 Subject: [PATCH 039/125] updated package import statement (scene_properties) --- eodal/core/sensors/planet_scope.py | 1 - eodal/core/sensors/sentinel1.py | 3 +-- eodal/core/sensors/sentinel2.py | 3 +-- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/eodal/core/sensors/planet_scope.py b/eodal/core/sensors/planet_scope.py index 3aeae903..e83792f0 100644 --- a/eodal/core/sensors/planet_scope.py +++ b/eodal/core/sensors/planet_scope.py @@ -11,7 +11,6 @@ from eodal.utils.constants.planet_scope import super_dove_band_mapping, super_dove_gain_factor from eodal.utils.exceptions import BandNotFoundError -# TODO: opening tiff-files and reading quality masks class PlanetScope(RasterCollection): @staticmethod diff --git a/eodal/core/sensors/sentinel1.py b/eodal/core/sensors/sentinel1.py index 22dba266..728dcaf7 100644 --- a/eodal/core/sensors/sentinel1.py +++ b/eodal/core/sensors/sentinel1.py @@ -34,8 +34,7 @@ from eodal.config import get_settings from eodal.core.band import Band -from eodal.core.raster import RasterCollection -from eodal.core.scene import SceneProperties +from eodal.core.raster import RasterCollection, SceneProperties from eodal.utils.decorators import prepare_point_features from eodal.utils.sentinel1 import get_S1_platform_from_safe, \ get_S1_acquistion_time_from_safe, _url_to_safe_name, \ diff --git a/eodal/core/sensors/sentinel2.py b/eodal/core/sensors/sentinel2.py index dad60011..1df29d5e 100644 --- a/eodal/core/sensors/sentinel2.py +++ b/eodal/core/sensors/sentinel2.py @@ -38,8 +38,7 @@ from typing import Dict, Optional, List, Tuple, Union from eodal.core.band import Band, WavelengthInfo, GeoInfo -from eodal.core.raster import RasterCollection -from eodal.core.scene import SceneProperties +from eodal.core.raster import RasterCollection, SceneProperties from eodal.utils.constants.sentinel2 import ( band_resolution, band_widths, From e4192ee605615bc43296b98128bc6a9774297444 Mon Sep 17 00:00:00 2001 From: lukas Date: Thu, 24 Nov 2022 17:16:28 +0100 Subject: [PATCH 040/125] adding the 'is_scene' property to check if timestamp is set A scene **must** have a timestamp so that the RasterCollection can be localized in, both, geographic and temporal space. --- eodal/core/raster.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/eodal/core/raster.py b/eodal/core/raster.py index ad6895bb..a1431e0f 100644 --- a/eodal/core/raster.py +++ b/eodal/core/raster.py @@ -205,8 +205,6 @@ def processing_level(self) -> ProcessingLevels: @processing_level.setter def processing_level(self, value: ProcessingLevels | None) -> None: if value is not None: - if not isinstance(value, ProcessingLevels): - raise TypeError('Expected a proper ProcessingLevels object') self._processing_level = value @property From f4e5e50c8bd8615b06472ed8ee8af45113e19ce6 Mon Sep 17 00:00:00 2001 From: lukas Date: Thu, 24 Nov 2022 17:22:25 +0100 Subject: [PATCH 041/125] improving get/set item methods and implememted add_scene --- eodal/core/scene.py | 78 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 60 insertions(+), 18 deletions(-) diff --git a/eodal/core/scene.py b/eodal/core/scene.py index 6cd8b1aa..ab815410 100644 --- a/eodal/core/scene.py +++ b/eodal/core/scene.py @@ -19,10 +19,13 @@ along with this program. If not, see . """ +import dateutil.parser + from collections.abc import MutableMapping +from copy import deepcopy from typing import Callable, List, Optional -import eodal.core.raster as raster +from eodal.core.raster import RasterCollection from eodal.utils.exceptions import SceneNotFoundError class SceneCollection(MutableMapping): @@ -33,7 +36,7 @@ class SceneCollection(MutableMapping): """ def __init__( self, - scene_constructor: Optional[Callable[..., raster.RasterCollection]] = None, + scene_constructor: Optional[Callable[..., RasterCollection]] = None, *args, **kwargs ): @@ -58,30 +61,40 @@ class methods (e.g., `RasterCollection.from_multi_band_raster`) self._identifiers = [] if scene_constructor is not None: scene = scene_constructor.__call__(*args, **kwargs) - self._identifiers.append(scene.scene_properties.scene_id) - if not isinstance(scene, raster.RasterCollection): - raise TypeError('Only RasterCollection objects can be passed') self.__setitem__(scene) - def __getitem__(self, key: str) -> raster.RasterCollection: + def __getitem__(self, key: str) -> RasterCollection: if key in self.timestamps: + # most likely time stamps are passed as strings + if isinstance(key, str): + # we infer the format using dateutil + key = dateutil.parser.parse(key) return self.collection[key] - elif key in self.idenfiers: - scene_idx = self.idenfiers.index(key) - return self.collection[self.timestamps[scene_idx]] + elif key in self.identifiers: + scene_idx = self.identifiers.index(key) + return self.__getitem__(self.timestamps[scene_idx]) else: raise SceneNotFoundError(f'Could not find a scene for key {key} in collection') - def __setitem__(self, item: raster.RasterCollection): - if not isinstance(item, raster.RasterCollection): + def __setitem__(self, item: RasterCollection): + if not isinstance(item, RasterCollection): raise TypeError("Only RasterCollection objects can be passed") + if not item.is_scene: + raise ValueError( + 'Only RasterCollection with timestamps in their scene_properties can be passed' + ) + # use the scene uri as an alias if available + if hasattr(item.scene_properties, 'product_uri'): + self._identifiers.append(item.scene_properties.product_uri) # scenes are index by their acquisition time key = item.scene_properties.acquisition_time if key in self.collection.keys(): raise KeyError("Duplicate scene names are not permitted") if key is None: raise ValueError("RasterCollection passed must have an acquisition time stamp") - value = item.copy() + # it's important to make a copy of the scene before adding it + # to the collection + value = deepcopy(item) self.collection[key] = value def __delitem__(self, key: str): @@ -100,25 +113,54 @@ def __repr__(self) -> str: else: return f'EOdal SceneCollection\n----------------------\n' + \ f'# Scenes: {len(self)}\nTimestamps: {", ".join(self.timestamps)}\n' + \ - f'Scene Identifiers: {", ".join(self.band_aliases)}' + f'Scene Identifiers: {", ".join(self.identifiers)}' @property def empty(self) -> bool: """Scene Collection is empty""" - return len(self) > 0 + return len(self) == 0 @property def timestamps(self) -> List[str]: """acquisition timestamps of scenes in collection""" - return list(self.collection.keys()) + return [str(x) for x in list(self.collection.keys())] @property - def idenfiers(self) -> List[str]: + def identifiers(self) -> List[str]: """list of scene identifiers""" return self._identifiers - def add_scene(self): - pass + def add_scene( + self, scene_constructor: Callable[...,RasterCollection] | RasterCollection, *args, **kwargs + ): + """ + Adds a Scene to the collection of scenes. + + Raises an error if a scene with the same timestamp already exists (unique + timestamp constraint) + + :param scene_constructor: + callable returning a `~eodal.core.raster.RasterCollection` instance or + existing `RasterCollection` instance + :param args: + positional arguments to pass to `scene_constructor` + :param kwargs: + keyword arguments to pass to `scene_constructor` + """ + # if a RasterCollection is passed no constructor call is required + try: + if isinstance(scene_constructor, RasterCollection): + scene = scene_constructor + else: + scene = scene_constructor.__call__(*args, **kwargs) + except Exception as e: + raise ValueError(f'Cannot initialize new Scene instance: {e}') + # try to add the scene to the SceneCollection + try: + self.__setitem__(scene) + except Exception as e: + raise KeyError(f'Cannot add scene: {e}') + def apply(self, func: Callable): pass From 8c0d077e767e682ac5855889c4a3e18c4fd37d1b Mon Sep 17 00:00:00 2001 From: lukas Date: Thu, 24 Nov 2022 17:22:41 +0100 Subject: [PATCH 042/125] drafting first tests for SceneCollection objects --- tests/core/test_raster_scene.py | 61 +++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/tests/core/test_raster_scene.py b/tests/core/test_raster_scene.py index 6ea76c0f..3b18b34c 100644 --- a/tests/core/test_raster_scene.py +++ b/tests/core/test_raster_scene.py @@ -5,9 +5,12 @@ ''' import pytest +import datetime from eodal.core.band import Band from eodal.core.raster import RasterCollection +from eodal.core.scene import SceneCollection +from eodal.core.sensors import Sentinel2 def test_raster_is_scene(get_bandstack): """test the is_scene attribute of RasterCollections""" @@ -21,3 +24,61 @@ def test_raster_is_scene(get_bandstack): ds.scene_properties.acquisition_time = 2000 ds.scene_properties.platform = 'test' assert ds.is_scene, 'scene metadata have been set, so it is a scene' + +def test_scene_collection(get_s2_safe_l2a, get_polygons_2, get_bandstack): + """test scene collection constructor calls""" + + # prepare inputs + polys = get_polygons_2() + fpath_s2 = get_s2_safe_l2a() + # read the scene two times so that we can "fake" a time series + s2_ds_1 = Sentinel2.from_safe(fpath_s2, vector_features=polys) + assert s2_ds_1.is_scene, 'SceneProperties not set' + s2_ds_2 = Sentinel2.from_safe(fpath_s2, vector_features=polys) + assert s2_ds_2.is_scene, 'SceneProperties not set' + # set the timing of the second scene to today + s2_ds_2.scene_properties.acquisition_time = datetime.datetime.now() + + # open an empty SceneCollection + scoll = SceneCollection() + assert scoll.empty, 'SceneCollection must be empty' + + # open a SceneCollection by passing a constructor (RasterCollection + Timestamp) + scoll = SceneCollection( + scene_constructor=Sentinel2.from_safe, + in_dir=fpath_s2, + vector_features=polys + ) + assert len(scoll) == 1, 'wrong number of scenes in collection' + assert len(scoll.timestamps) == 1, 'wrong number of time stamps' + assert len(scoll.timestamps) == len(scoll.identifiers), 'time stamps and identifers do not match' + # try to get the scene by its timestamp + rcoll = scoll[scoll.timestamps[0]] + assert isinstance(rcoll, RasterCollection), 'expected a raster collection' + assert not rcoll.empty, 'RasterCollection must not be empty' + # try to get the scene by its identifier + rcoll_id = scoll[scoll.identifiers[0]] + assert isinstance(rcoll_id, RasterCollection), 'expected a raster collection' + assert not rcoll_id.empty, 'RasterCollection must not be empty' + assert rcoll.scene_properties.acquisition_time == rcoll_id.scene_properties.acquisition_time, \ + 'selection by timestamp and identifier returned different results' + + # open a SceneCollection by passing a RasterCollection -> should raise an error + # because the timestamp is missing + fpath_no_scene = get_bandstack() + with pytest.raises(ValueError): + scoll = SceneCollection( + scene_constructor=RasterCollection.from_multi_band_raster, + fpath_raster=fpath_no_scene + ) + + # add another scene + scoll.add_scene(s2_ds_2) + assert len(scoll) == 2, 'wrong number of scenes' + assert scoll.timestamps[-1] == str(s2_ds_2.scene_properties.acquisition_time), 'wrong timestamp' + assert scoll.timestamps[0] == str(s2_ds_1.scene_properties.acquisition_time), 'wrong timestamp' + # add the same scene -> should raise an error + with pytest.raises(KeyError): + scoll.add_scene(s2_ds_2) + + \ No newline at end of file From 2bd2012528ab7af58975c8fc6c5cb85b45827142 Mon Sep 17 00:00:00 2001 From: lukas Date: Fri, 25 Nov 2022 12:29:38 +0100 Subject: [PATCH 043/125] fixed import statements after moving class to different module --- eodal/operational/mapping/merging.py | 2 +- eodal/operational/mapping/sentinel2.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/eodal/operational/mapping/merging.py b/eodal/operational/mapping/merging.py index 34658085..7822ad32 100644 --- a/eodal/operational/mapping/merging.py +++ b/eodal/operational/mapping/merging.py @@ -30,7 +30,7 @@ from eodal.config import get_settings from eodal.core.band import Band, GeoInfo from eodal.core.raster import RasterCollection -from eodal.core.scene import SceneProperties +from eodal.core.raster import SceneProperties Settings = get_settings() diff --git a/eodal/operational/mapping/sentinel2.py b/eodal/operational/mapping/sentinel2.py index 1133c65b..dbcf88f0 100644 --- a/eodal/operational/mapping/sentinel2.py +++ b/eodal/operational/mapping/sentinel2.py @@ -38,7 +38,7 @@ DataNotFoundError ) from eodal.metadata.sentinel2.utils import identify_updated_scenes -from eodal.core.scene import SceneProperties +from eodal.core.raster import SceneProperties settings = get_settings() logger = settings.logger From 16cd297e2880545efa3d05d95e3443c9887d7536 Mon Sep 17 00:00:00 2001 From: lukas Date: Fri, 25 Nov 2022 15:25:24 +0100 Subject: [PATCH 044/125] implementing __repr__() for Band class --- eodal/core/band.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/eodal/core/band.py b/eodal/core/band.py index 4379cf14..050a3862 100644 --- a/eodal/core/band.py +++ b/eodal/core/band.py @@ -498,6 +498,9 @@ def __gt__(self, other): def __lt__(self, other): return BandOperator.calc(a=self, other=other, operator="<") + def __repr__(self) -> str: + return f'EOdal Band\n---------.\nName: {self.band_name}\nGeoInfo: {self.geo_info}' + @property def alias(self) -> Union[str, None]: """Alias of the band name (if available)""" From a07c495a028ab69b38066c75a07a3c5ea0c6d2b8 Mon Sep 17 00:00:00 2001 From: lukas Date: Fri, 25 Nov 2022 15:45:34 +0100 Subject: [PATCH 045/125] implementing slicing of SceneCollections (untested) --- eodal/core/scene.py | 91 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 79 insertions(+), 12 deletions(-) diff --git a/eodal/core/scene.py b/eodal/core/scene.py index ab815410..33fc196c 100644 --- a/eodal/core/scene.py +++ b/eodal/core/scene.py @@ -19,6 +19,7 @@ along with this program. If not, see . """ +import datetime import dateutil.parser from collections.abc import MutableMapping @@ -63,18 +64,84 @@ class methods (e.g., `RasterCollection.from_multi_band_raster`) scene = scene_constructor.__call__(*args, **kwargs) self.__setitem__(scene) - def __getitem__(self, key: str) -> RasterCollection: - if key in self.timestamps: - # most likely time stamps are passed as strings - if isinstance(key, str): - # we infer the format using dateutil - key = dateutil.parser.parse(key) - return self.collection[key] - elif key in self.identifiers: - scene_idx = self.identifiers.index(key) - return self.__getitem__(self.timestamps[scene_idx]) - else: - raise SceneNotFoundError(f'Could not find a scene for key {key} in collection') + def __getitem__(self, key: str | slice) -> RasterCollection: + + def _get_scene_from_key(key: str) -> RasterCollection: + if key in self.timestamps: + # most likely time stamps are passed as strings + if isinstance(key, str): + # we infer the format using dateutil + key = dateutil.parser.parse(key) + return self.__getitem__(key) + elif key in self.identifiers: + scene_idx = self.identifiers.index(key) + return self.__getitem__(self.timestamps[scene_idx]) + + # has a single key or slice been passed? + if isinstance(key, str): + try: + return _get_scene_from_key(key=key) + except IndexError: + raise SceneNotFoundError( + f'Could not find a scene for key {key} in collection' + ) + + elif isinstance(key, slice): + # find the index of the start and the end of the slice + slice_start = key.start + slice_end = key.stop + # return an empty SceneCollection if start and stop is the same + # (numpy array behavior) + if slice_start is None and slice_end is None: + return SceneCollection() + # if start is None use the first scene + if slice_start is None: + if isinstance(slice_end, datetime.date): + slice_start = self.timestamps[0].date() + else: + if slice_end in self.identifiers: + slice_start = self.identifiers[0] + else: + slice_start = self.timestamps[0] + # if end is None use the last scene + end_increment = 0 + if slice_end is None: + if isinstance(slice_start, datetime.date): + slice_end = self.timestamps[-1].date() + else: + if slice_start in self.identifiers: + slice_end = self.identifiers[-1] + else: + slice_end = self.timestamps[-1] + # to ensure that the :: operator works, we need to make + # sure the last band is also included in the slice + end_increment = 1 + + if set([slice_start, slice_end]).issubset(set(self.timestamps)): + idx_start = self.timestamps.index(slice_start) + idx_end = self.timestamps.index(slice_end) + end_increment + scenes = self.timestamps + elif set([slice_start, slice_end]).issubset(set(self.identifiers)): + idx_start = self.identifiers.index(slice_start) + idx_end = self.identifiers.index(slice_end) + end_increment + scenes = self.identifiers + # allow selection by date range + elif isinstance(slice_start, datetime.date) and isinstance(slice_end, datetime.date): + out_scoll = SceneCollection() + for timestamp, scene in self: + if slice_start <= timestamp < slice_end: + out_scoll.add_scene(scene.copy()) + return out_scoll + else: + raise SceneNotFoundError(f'Could not find scenes in {key}') + slice_step = key.step + if slice_step is None: + slice_step = 1 + # get an empty SceneCollection for returning the slide + out_scoll = SceneCollection() + for idx in range(idx_start, idx_end, slice_step): + out_scoll.add_scene(_get_scene_from_key(key=scenes[idx])) + return out_scoll def __setitem__(self, item: RasterCollection): if not isinstance(item, RasterCollection): From 09074fc4abf3f497ad012f18e2c08caaf1ea8e74 Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 28 Nov 2022 09:41:26 +0100 Subject: [PATCH 046/125] deleted test module in favor of test_scene_collection.py --- tests/core/test_raster_scene.py | 84 --------------------------------- 1 file changed, 84 deletions(-) delete mode 100644 tests/core/test_raster_scene.py diff --git a/tests/core/test_raster_scene.py b/tests/core/test_raster_scene.py deleted file mode 100644 index 3b18b34c..00000000 --- a/tests/core/test_raster_scene.py +++ /dev/null @@ -1,84 +0,0 @@ -''' -Created on Nov 24, 2022 - -@author: graflu -''' - -import pytest -import datetime - -from eodal.core.band import Band -from eodal.core.raster import RasterCollection -from eodal.core.scene import SceneCollection -from eodal.core.sensors import Sentinel2 - -def test_raster_is_scene(get_bandstack): - """test the is_scene attribute of RasterCollections""" - - fpath_raster = get_bandstack() - ds = RasterCollection.from_multi_band_raster( - fpath_raster=fpath_raster - ) - assert not ds.is_scene, 'scene metadata have not been set, so it is not a scene' - - ds.scene_properties.acquisition_time = 2000 - ds.scene_properties.platform = 'test' - assert ds.is_scene, 'scene metadata have been set, so it is a scene' - -def test_scene_collection(get_s2_safe_l2a, get_polygons_2, get_bandstack): - """test scene collection constructor calls""" - - # prepare inputs - polys = get_polygons_2() - fpath_s2 = get_s2_safe_l2a() - # read the scene two times so that we can "fake" a time series - s2_ds_1 = Sentinel2.from_safe(fpath_s2, vector_features=polys) - assert s2_ds_1.is_scene, 'SceneProperties not set' - s2_ds_2 = Sentinel2.from_safe(fpath_s2, vector_features=polys) - assert s2_ds_2.is_scene, 'SceneProperties not set' - # set the timing of the second scene to today - s2_ds_2.scene_properties.acquisition_time = datetime.datetime.now() - - # open an empty SceneCollection - scoll = SceneCollection() - assert scoll.empty, 'SceneCollection must be empty' - - # open a SceneCollection by passing a constructor (RasterCollection + Timestamp) - scoll = SceneCollection( - scene_constructor=Sentinel2.from_safe, - in_dir=fpath_s2, - vector_features=polys - ) - assert len(scoll) == 1, 'wrong number of scenes in collection' - assert len(scoll.timestamps) == 1, 'wrong number of time stamps' - assert len(scoll.timestamps) == len(scoll.identifiers), 'time stamps and identifers do not match' - # try to get the scene by its timestamp - rcoll = scoll[scoll.timestamps[0]] - assert isinstance(rcoll, RasterCollection), 'expected a raster collection' - assert not rcoll.empty, 'RasterCollection must not be empty' - # try to get the scene by its identifier - rcoll_id = scoll[scoll.identifiers[0]] - assert isinstance(rcoll_id, RasterCollection), 'expected a raster collection' - assert not rcoll_id.empty, 'RasterCollection must not be empty' - assert rcoll.scene_properties.acquisition_time == rcoll_id.scene_properties.acquisition_time, \ - 'selection by timestamp and identifier returned different results' - - # open a SceneCollection by passing a RasterCollection -> should raise an error - # because the timestamp is missing - fpath_no_scene = get_bandstack() - with pytest.raises(ValueError): - scoll = SceneCollection( - scene_constructor=RasterCollection.from_multi_band_raster, - fpath_raster=fpath_no_scene - ) - - # add another scene - scoll.add_scene(s2_ds_2) - assert len(scoll) == 2, 'wrong number of scenes' - assert scoll.timestamps[-1] == str(s2_ds_2.scene_properties.acquisition_time), 'wrong timestamp' - assert scoll.timestamps[0] == str(s2_ds_1.scene_properties.acquisition_time), 'wrong timestamp' - # add the same scene -> should raise an error - with pytest.raises(KeyError): - scoll.add_scene(s2_ds_2) - - \ No newline at end of file From 928bd540e21701f1fb662ec23c7fde97cb521454 Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 28 Nov 2022 09:42:06 +0100 Subject: [PATCH 047/125] introducing sorting of scenes and new classmethod wrapping init call --- eodal/core/scene.py | 145 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 132 insertions(+), 13 deletions(-) diff --git a/eodal/core/scene.py b/eodal/core/scene.py index 33fc196c..f6138a19 100644 --- a/eodal/core/scene.py +++ b/eodal/core/scene.py @@ -21,10 +21,11 @@ import datetime import dateutil.parser +import numpy as np from collections.abc import MutableMapping from copy import deepcopy -from typing import Callable, List, Optional +from typing import Callable, List, Optional, Tuple from eodal.core.raster import RasterCollection from eodal.utils.exceptions import SceneNotFoundError @@ -58,6 +59,7 @@ class methods (e.g., `RasterCollection.from_multi_band_raster`) self._frozen = False self.collection = dict() self._frozen = True + self._is_sorted = True self._identifiers = [] if scene_constructor is not None: @@ -67,12 +69,12 @@ class methods (e.g., `RasterCollection.from_multi_band_raster`) def __getitem__(self, key: str | slice) -> RasterCollection: def _get_scene_from_key(key: str) -> RasterCollection: - if key in self.timestamps: + if str(key) in self.timestamps: # most likely time stamps are passed as strings if isinstance(key, str): # we infer the format using dateutil key = dateutil.parser.parse(key) - return self.__getitem__(key) + return self.collection[key] elif key in self.identifiers: scene_idx = self.identifiers.index(key) return self.__getitem__(self.timestamps[scene_idx]) @@ -87,6 +89,8 @@ def _get_scene_from_key(key: str) -> RasterCollection: ) elif isinstance(key, slice): + if not self.is_sorted: + raise ValueError('Slices are not permitted on unsorted SceneCollections') # find the index of the start and the end of the slice slice_start = key.start slice_end = key.stop @@ -97,7 +101,7 @@ def _get_scene_from_key(key: str) -> RasterCollection: # if start is None use the first scene if slice_start is None: if isinstance(slice_end, datetime.date): - slice_start = self.timestamps[0].date() + slice_start = list(self.collection.keys())[0].date() else: if slice_end in self.identifiers: slice_start = self.identifiers[0] @@ -107,7 +111,7 @@ def _get_scene_from_key(key: str) -> RasterCollection: end_increment = 0 if slice_end is None: if isinstance(slice_start, datetime.date): - slice_end = self.timestamps[-1].date() + slice_end = list(self.collection.keys())[-1].date() else: if slice_start in self.identifiers: slice_end = self.identifiers[-1] @@ -129,8 +133,12 @@ def _get_scene_from_key(key: str) -> RasterCollection: elif isinstance(slice_start, datetime.date) and isinstance(slice_end, datetime.date): out_scoll = SceneCollection() for timestamp, scene in self: - if slice_start <= timestamp < slice_end: - out_scoll.add_scene(scene.copy()) + if end_increment == 0: + if slice_start <= timestamp.date() < slice_end: + out_scoll.add_scene(scene.copy()) + else: + if slice_start <= timestamp.date() <= slice_end: + out_scoll.add_scene(scene.copy()) return out_scoll else: raise SceneNotFoundError(f'Could not find scenes in {key}') @@ -150,9 +158,6 @@ def __setitem__(self, item: RasterCollection): raise ValueError( 'Only RasterCollection with timestamps in their scene_properties can be passed' ) - # use the scene uri as an alias if available - if hasattr(item.scene_properties, 'product_uri'): - self._identifiers.append(item.scene_properties.product_uri) # scenes are index by their acquisition time key = item.scene_properties.acquisition_time if key in self.collection.keys(): @@ -163,9 +168,18 @@ def __setitem__(self, item: RasterCollection): # to the collection value = deepcopy(item) self.collection[key] = value + # last, use the scene uri as an alias if available + if hasattr(item.scene_properties, 'product_uri'): + self._identifiers.append(item.scene_properties.product_uri) - def __delitem__(self, key: str): + def __delitem__(self, key: str | datetime.datetime): + # get index of the scene to be deleted to also delete its identifier + idx = self.timestamps.index(str(key)) + # casts strings back to datetime objects + if isinstance(key, str): + key = dateutil.parser.parse(key) del self.collection[key] + _ = self.identifiers.pop(idx) def __iter__(self): for k, v in self.collection.items(): @@ -182,6 +196,25 @@ def __repr__(self) -> str: f'# Scenes: {len(self)}\nTimestamps: {", ".join(self.timestamps)}\n' + \ f'Scene Identifiers: {", ".join(self.identifiers)}' + @staticmethod + def _sort_keys( + sort_direction: str, + raster_collections: List[RasterCollection] | Tuple[RasterCollection] + ) -> np.ndarray: + """ + Returns sorted indices from a list/ tuple of RasterCollections. + """ + # check sort_direction passed + if sort_direction not in ['asc', 'desc']: + raise ValueError('Sort direction must be one of: `asc`, `desc`') + # get timestamps of the scenes and use np.argsort to bring them into the desired order + timestamps = [x.scene_properties.acquisition_time for x in raster_collections] + if sort_direction == 'asc': + sort_idx = np.argsort(timestamps) + elif sort_direction == 'desc': + sort_idx = np.argsort(timestamps)[::-1] + return sort_idx + @property def empty(self) -> bool: """Scene Collection is empty""" @@ -197,9 +230,66 @@ def identifiers(self) -> List[str]: """list of scene identifiers""" return self._identifiers - def add_scene( - self, scene_constructor: Callable[...,RasterCollection] | RasterCollection, *args, **kwargs + @property + def is_sorted(self) -> bool: + """are the scenes sorted by their timstamps?""" + return self._is_sorted + + @is_sorted.setter + def is_sorted(self, value: bool) -> None: + """are the scenes sorted by their timestamps?""" + if not type(value) == bool: + raise TypeError('Only boolean types are accepted') + self._is_sorted = value + + @classmethod + def from_raster_collections( + cls, + raster_collections: List[RasterCollection] | Tuple[RasterCollection], + sort_scenes: Optional[bool] = True, + sort_direction: Optional[str] = 'asc' ): + """ + Create a SceneCollection from a list/tuple of N RasterCollection objects. + + :param raster_collections: + list or tuple of RasterCollections from which to create a new scene + collection. + :param sort_scenes: + if True (default) scenes are order in chronological order by their + acquisition time. + :param sort_direction: + direction of sorting. Must be either 'asc' (ascending) or 'desc' + (descending). Ignored if `sort_scenes` is False. + :returns: + SceneCollection instance + """ + # check inputs + if not isinstance(raster_collections, list) and not isinstance(raster_collections, tuple): + raise TypeError(f'Can only handle lists or tuples of RasterCollections') + if not np.array([isinstance(x, RasterCollection) for x in raster_collections]).all(): + raise TypeError(f'All items passed must be RasterCollection instances') + if not np.array([x.is_scene for x in raster_collections]).all(): + raise TypeError(f'All items passed must have an acquisition timestamp') + # check if scenes shall be sorted + if sort_scenes: + sort_idx = cls._sort_keys(sort_direction, raster_collections) + is_sorted = True + else: + sort_idx = np.array([x for x in range(len(raster_collections))]) + is_sorted = False + # open a SceneCollection instance and add the scenes + scoll = cls() + scoll.is_sorted = is_sorted + for idx in sort_idx: + scoll.add_scene(scene_constructor=raster_collections[idx].copy()) + return scoll + + def add_scene( + self, + scene_constructor: Callable[...,RasterCollection] | RasterCollection, + *args, **kwargs + ) -> None: """ Adds a Scene to the collection of scenes. @@ -225,6 +315,7 @@ def add_scene( # try to add the scene to the SceneCollection try: self.__setitem__(scene) + except Exception as e: raise KeyError(f'Cannot add scene: {e}') @@ -232,6 +323,10 @@ def add_scene( def apply(self, func: Callable): pass + def copy(self): + """returns a true copy of the SceneCollection""" + return deepcopy(self) + def dump(self): pass @@ -244,5 +339,29 @@ def load(self): def plot(self): pass + def sort( + self, + sort_direction: Optional[str] = 'asc' + ): + """ + Returns a sorted copy of the SceneCollection. + + :param sort_direction: + direction of sorting. Must be either 'asc' (ascending) or 'desc' + (descending). Ignored if `sort_scenes` is False. + :returns: + sorted SceneCollection. + """ + # empty SceneCollections cannot be sorted + if self.empty: + return self.copy() + # get a list of all scenes in the collection and sort them + scenes = [v for _, v in self] + sort_idx = self._sort_keys(sort_direction, raster_collections=scenes) + scoll = SceneCollection() + for idx in sort_idx: + scoll.add_scene(scenes[idx].copy()) + return scoll + def to_xarray(self): pass From 0380fdce56ec0dcba61a4f8cf4f02f1d5818ceea Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 28 Nov 2022 09:42:18 +0100 Subject: [PATCH 048/125] enhanced test cases for SceneCollections --- tests/core/test_scene_collection.py | 144 +++++++++++++++++++++++++++- 1 file changed, 140 insertions(+), 4 deletions(-) diff --git a/tests/core/test_scene_collection.py b/tests/core/test_scene_collection.py index 14840d94..a08309ee 100644 --- a/tests/core/test_scene_collection.py +++ b/tests/core/test_scene_collection.py @@ -1,13 +1,149 @@ ''' -Created on Nov 22, 2022 +Created on Nov 24, 2022 @author: graflu ''' import pytest +import datetime +from datetime import date + +from eodal.core.band import Band +from eodal.core.raster import RasterCollection from eodal.core.scene import SceneCollection +from eodal.core.sensors import Sentinel2 + +def test_raster_is_scene(get_bandstack): + """test the is_scene attribute of RasterCollections""" + + fpath_raster = get_bandstack() + ds = RasterCollection.from_multi_band_raster( + fpath_raster=fpath_raster + ) + assert not ds.is_scene, 'scene metadata have not been set, so it is not a scene' + + ds.scene_properties.acquisition_time = 2000 + ds.scene_properties.platform = 'test' + assert ds.is_scene, 'scene metadata have been set, so it is a scene' + +def test_scene_collection(get_s2_safe_l2a, get_polygons_2, get_bandstack): + """test scene collection constructor calls and built-ins""" + + # prepare inputs + polys = get_polygons_2() + fpath_s2 = get_s2_safe_l2a() + # read the scene two times so that we can "fake" a time series + s2_ds_1 = Sentinel2.from_safe(fpath_s2, vector_features=polys) + assert s2_ds_1.is_scene, 'SceneProperties not set' + s2_ds_2 = Sentinel2.from_safe(fpath_s2, vector_features=polys) + assert s2_ds_2.is_scene, 'SceneProperties not set' + # set the timing of the second scene to today + s2_ds_2.scene_properties.acquisition_time = datetime.datetime.now() + + # open an empty SceneCollection + scoll = SceneCollection() + assert scoll.empty, 'SceneCollection must be empty' + + # open a SceneCollection by passing a constructor (RasterCollection + Timestamp) + scoll = SceneCollection( + scene_constructor=Sentinel2.from_safe, + in_dir=fpath_s2, + vector_features=polys + ) + assert len(scoll) == 1, 'wrong number of scenes in collection' + assert len(scoll.timestamps) == 1, 'wrong number of time stamps' + assert len(scoll.timestamps) == len(scoll.identifiers), 'time stamps and identifers do not match' + # try to get the scene by its timestamp + rcoll = scoll[scoll.timestamps[0]] + assert isinstance(rcoll, RasterCollection), 'expected a raster collection' + assert not rcoll.empty, 'RasterCollection must not be empty' + # try to get the scene by its identifier + rcoll_id = scoll[scoll.identifiers[0]] + assert isinstance(rcoll_id, RasterCollection), 'expected a raster collection' + assert not rcoll_id.empty, 'RasterCollection must not be empty' + assert rcoll.scene_properties.acquisition_time == rcoll_id.scene_properties.acquisition_time, \ + 'selection by timestamp and identifier returned different results' + + # open a SceneCollection by passing a RasterCollection -> should raise an error + # because the timestamp is missing + fpath_no_scene = get_bandstack() + with pytest.raises(ValueError): + scoll = SceneCollection( + scene_constructor=RasterCollection.from_multi_band_raster, + fpath_raster=fpath_no_scene + ) + + # add another scene + scoll.add_scene(s2_ds_2) + assert len(scoll) == 2, 'wrong number of scenes' + assert len(scoll.timestamps) == len(scoll.identifiers) == len(scoll), \ + 'wrong number of items' + assert scoll.timestamps[-1] == str(s2_ds_2.scene_properties.acquisition_time), 'wrong timestamp' + assert scoll.timestamps[0] == str(s2_ds_1.scene_properties.acquisition_time), 'wrong timestamp' + # add the same scene -> should raise an error + with pytest.raises(KeyError): + scoll.add_scene(s2_ds_2) + + # try working with slices + # slice by date range + scoll_daterange = scoll[date(2022,1,1):date(2999,12,31)] + assert isinstance(scoll_daterange, SceneCollection), 'expected a SceneCollection' + assert len(scoll_daterange) == 1, 'expected only a single scene in collection' + # slice by date range, open end of slice + scoll_daterange_openend = scoll[date(2022,1,1):] + assert isinstance(scoll_daterange_openend, SceneCollection), 'expected a SceneCollection' + assert len(scoll_daterange_openend) == 1, 'expected only a single scene in collection' + # slice by date range, open start of slice + scoll_daterange_openstart = scoll[:date(2022,1,1)] + assert isinstance(scoll_daterange_openstart, SceneCollection), 'expected a SceneCollection' + assert len(scoll_daterange_openstart) == 1, 'expected only a single scene in collection' + # slice outside of daterange covered by SceneCollection + assert scoll[date(1900,1,1):date(1901,12,31)].empty, 'SceneCollection returned must be empty' + + # test deleting a scene by its timestamp + del(scoll[scoll.timestamps[0]]) + assert len(scoll) == 1, 'scene was not deleted' + assert len(scoll.timestamps) == len(scoll.identifiers) == len(scoll), \ + 'wrong number of items' + + # SceneCollection from list of scenes + scenes_list = [s2_ds_1, s2_ds_2] + scoll = SceneCollection.from_raster_collections(scenes_list) + assert len(scoll) == 2, 'wrong number of scenes' + assert scoll.is_sorted, 'expected a sorted SceneCollection' + assert scoll.timestamps[0] == str(scenes_list[0].scene_properties.acquisition_time), \ + 'wrong order of scenes' + # from tuple + scenes_tuple = tuple(scenes_list) + scoll = SceneCollection.from_raster_collections(scenes_tuple) + assert len(scoll) == 2, 'wrong number of scenes' + assert scoll.is_sorted, 'expected a sorted SceneCollection' + assert scoll.timestamps[0] == str(scenes_list[0].scene_properties.acquisition_time), \ + 'wrong order of scenes' + # descending order of scenes + scoll = SceneCollection.from_raster_collections(scenes_tuple, sort_direction='desc') + assert len(scoll) == 2, 'wrong number of scenes' + assert scoll.is_sorted, 'expected a sorted SceneCollection' + assert scoll.timestamps[-1] == str(scenes_list[0].scene_properties.acquisition_time), \ + 'wrong order of scenes' + # no sorting + scoll = SceneCollection.from_raster_collections(scenes_tuple, sort_scenes=False) + assert len(scoll) == 2, 'wrong number of scenes' + assert not scoll.is_sorted, 'expected an unsorted SceneCollection' + assert scoll.timestamps[0] == str(scenes_list[0].scene_properties.acquisition_time), \ + 'wrong order of scenes' -def test_scene_collection(): - - sc = SceneCollection() + # sort the scene collection using its sort method + s2_ds_3 = s2_ds_2.copy() + test_time = datetime.datetime(1900,1,1) + s2_ds_3.scene_properties.acquisition_time = test_time + scoll = SceneCollection.from_raster_collections([s2_ds_1, s2_ds_2, s2_ds_3], sort_scenes=False) + scoll_sorted = scoll.sort() + assert scoll_sorted.is_sorted, 'expected a sorted SceneCollection' + assert scoll_sorted.timestamps[0] == str(test_time), 'expected a different timestamp' + # sort descending + scoll_sorted_desc = scoll.sort(sort_direction='desc') + assert scoll_sorted_desc.is_sorted, 'expected a sorted SceneCollection' + assert scoll_sorted_desc.timestamps[-1] == str(test_time), 'expected a different timestamp' + \ No newline at end of file From a7e1d5186b5433f63927a9118170b5eea7f7a5d5 Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 28 Nov 2022 12:25:04 +0100 Subject: [PATCH 049/125] drafting spatial clipping functionality (to be tested and extended) --- eodal/core/band.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/eodal/core/band.py b/eodal/core/band.py index 050a3862..201b5e22 100644 --- a/eodal/core/band.py +++ b/eodal/core/band.py @@ -1124,6 +1124,36 @@ def copy(self): attrs = deepcopy(self.__dict__) return Band(**attrs) + def clip( + self, + clipping_bounds: Path | gpd.GeoDataFrame | Tuple[float,float,float,float] + ): + """ + Clip a band object to a spatial extent. + + :param clipping_bounds: + spatial bounds to clip the Band to. Only clipping to rectangular shapes + is supported. Can be either a vector file, a `GeoDataFrame` or a tuple + with (xmin, ymin, xmax, ymax). Vector files and `GeoDataFrame` are + reprojected into the bands' coordinate system if required, while the + coordinate tuple MUST be provided in the CRS of the band. + :returns: + clipped band instance. + """ + if isinstance(clipping_bounds, Path): + clipping_bounds = gpd.read_file(clipping_bounds) + # check inputs + if isinstance(clipping_bounds, tuple): + if len(clipping_bounds) != 4: + raise ValueError('Expected four coordinates (xmin, ymin, xmax, ymax)') + xmin, ymin, xmax, ymax = clipping_bounds + elif isinstance(clipping_bounds, gpd.GeoDataFrame): + # get the bounding box of the FIRST feature + _clipping_bounds = clipping_bounds.copy() + _clipping_bounds = _clipping_bounds.bounds + xmin, ymin, xmax, ymax = list(clipping_bounds) + + def get_attributes(self, **kwargs) -> Dict[str, Any]: """ Returns raster data attributes in ``rasterio`` compatible way From c42f14a2bdaf8dc724e0d2b41af416ca5408da44 Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 28 Nov 2022 12:25:29 +0100 Subject: [PATCH 050/125] drafting get_feature_timseries method to extract time series --- eodal/core/scene.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/eodal/core/scene.py b/eodal/core/scene.py index f6138a19..0c0e63e6 100644 --- a/eodal/core/scene.py +++ b/eodal/core/scene.py @@ -21,10 +21,12 @@ import datetime import dateutil.parser +import geopandas as gpd import numpy as np from collections.abc import MutableMapping from copy import deepcopy +from pathlib import Path from typing import Callable, List, Optional, Tuple from eodal.core.raster import RasterCollection @@ -330,7 +332,10 @@ def copy(self): def dump(self): pass - def get_pixels(self): + def get_feature_timeseries( + self, + vector_features: Path | gpd.GeoDataFrame + ) -> gpd.GeoDataFrame: pass def load(self): From ff2f0f7ae8c5426cd9377219295b250f7426a796 Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 28 Nov 2022 15:52:52 +0100 Subject: [PATCH 051/125] further drafting of clip method --- eodal/core/band.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/eodal/core/band.py b/eodal/core/band.py index 201b5e22..c9b394af 100644 --- a/eodal/core/band.py +++ b/eodal/core/band.py @@ -1126,17 +1126,18 @@ def copy(self): def clip( self, - clipping_bounds: Path | gpd.GeoDataFrame | Tuple[float,float,float,float] + clipping_bounds: Path | gpd.GeoDataFrame | Tuple[float,float,float,float] | Polygon ): """ Clip a band object to a spatial extent. :param clipping_bounds: spatial bounds to clip the Band to. Only clipping to rectangular shapes - is supported. Can be either a vector file, a `GeoDataFrame` or a tuple - with (xmin, ymin, xmax, ymax). Vector files and `GeoDataFrame` are - reprojected into the bands' coordinate system if required, while the - coordinate tuple MUST be provided in the CRS of the band. + is supported. Can be either a vector file, a shapely `Polygon`, a + `GeoDataFrame` or a coordinate tuple with (xmin, ymin, xmax, ymax). + Vector files and `GeoDataFrame` are reprojected into the bands' coordinate + system if required, while the coordinate tuple and shapely geometry **MUST** + be provided in the CRS of the band. :returns: clipped band instance. """ @@ -1152,6 +1153,18 @@ def clip( _clipping_bounds = clipping_bounds.copy() _clipping_bounds = _clipping_bounds.bounds xmin, ymin, xmax, ymax = list(clipping_bounds) + elif isinstance(clipping_bounds, Polygon): + xmin, ymin, xmax, ymax = clipping_bounds.bounds + else: + raise TypeError(f'{type(clipping_bounds)} is not supported') + # actual clipping operation. Calculate the rows and columns where to clip + # the band + x_coords, y_coords = self.coordinates['x'], self.coordinates['y'] + + if xmin > x_coords[0]: + min_col = np.argmin(abs(xmin - x_coords)) + else: + xmin = x_coords[0] def get_attributes(self, **kwargs) -> Dict[str, Any]: From d590d21bf05b07dcb54b28a74fbff0b8fa99b220 Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 28 Nov 2022 15:53:03 +0100 Subject: [PATCH 052/125] defining test for clip method --- tests/core/test_band.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/core/test_band.py b/tests/core/test_band.py index 9a1c5cd4..b20ba1c8 100644 --- a/tests/core/test_band.py +++ b/tests/core/test_band.py @@ -376,3 +376,15 @@ def test_from_vector(get_polygons): assert band_from_points.values.dtype == 'uint32', 'wrong data type' assert band_from_points.reduce(method='max')['max'] == \ point_gdf.GIS_ID.values.astype(int).max(), 'miss-match in band statistics' + +def test_clip_band(get_test_band): + """ + test clipping a band by a rectangle (spatial sub-setting) + """ + band = get_test_band() + # define a polygon to clip the band to + # first case: the polygon is smaller than the band and lies within its bounds + band_bounds = band.bounds + clip_bounds = band_bounds.buffer(-20) + band_clipped = band.clip(clipping_bounds=clip_bounds) + From 28bad2b3a917c7bb85e3b9ff6e13cc2e276e3d72 Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 28 Nov 2022 17:58:22 +0100 Subject: [PATCH 053/125] finished draft of clip method (untested) --- eodal/core/band.py | 45 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/eodal/core/band.py b/eodal/core/band.py index c9b394af..01426b71 100644 --- a/eodal/core/band.py +++ b/eodal/core/band.py @@ -615,7 +615,7 @@ def _get_pixel_geometries( of the raster band and extraction of centroid coordinates if the vector features are of type ``Polygon`` or ``MultiPolygon`` - :param vector_features: + :param vector_features: passed vector features to calling instance or class method :param fpath_raster: optional file path to the raster dataset. To be used when @@ -1161,11 +1161,50 @@ def clip( # the band x_coords, y_coords = self.coordinates['x'], self.coordinates['y'] + # left column index if xmin > x_coords[0]: min_col = np.argmin(abs(xmin - x_coords)) else: - xmin = x_coords[0] - + min_col = x_coords[0] + # right column index + if xmax < x_coords[-1]: + max_col = np.argmin(abs(xmax - x_coords)) + else: + max_col = x_coords[-1] + # lower row index + if ymin > y_coords[0]: + min_row = np.argmin(abs(ymin - y_coords)) + else: + min_row = y_coords[0] + # upper row index + if ymax < y_coords[-1]: + max_row = np.argmin(abs(ymax - y_coords)) + else: + max_row = y_coords[-1] + + # get attributes of the raster + attrs = self.get_attributes() + # get its GeoInfo and update it accordingly + geo_info = self.geo_info + new_geo_info = GeoInfo( + epsg=geo_info.epsg, + ulx=min_col, + uly=max_row, + pixres_x=geo_info.pixres_x, + pixres_y=geo_info.pixres_y + ) + values = self.values.copy() + new_values = values[min_col:max_col+1, min_row:max_row+1] + attrs.update({ + 'geo_info': new_geo_info, + 'values': new_values, + 'band_name': self.band_name, + 'band_alias': self.band_alias, + 'wavelength_info': self.wavelength_info + }) + # open a new Band instance and add the sliced array + out_band = Band(**attrs) + return out_band def get_attributes(self, **kwargs) -> Dict[str, Any]: """ From e2a3341c53cef1f0ab2218811675b4365494b26a Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 29 Nov 2022 11:00:41 +0100 Subject: [PATCH 054/125] fixed a lot of bugs in the clip() method, tests passing now --- eodal/core/band.py | 54 ++++++++++++++++++++++++++++++---------------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/eodal/core/band.py b/eodal/core/band.py index 01426b71..b0344428 100644 --- a/eodal/core/band.py +++ b/eodal/core/band.py @@ -72,6 +72,7 @@ ReprojectionError, ) from eodal.utils.reprojection import reproject_raster_dataset, check_aoi_geoms +from _ast import Or class BandOperator(Operator): @@ -1160,48 +1161,65 @@ def clip( # actual clipping operation. Calculate the rows and columns where to clip # the band x_coords, y_coords = self.coordinates['x'], self.coordinates['y'] - + # check for overlap first + clip_shape = Polygon( + zip( + np.arange(xmin, xmax, abs(self.geo_info.pixres_x)), + np.arange(ymin, ymax, abs(self.geo_info.pixres_y)) + ) + ) + if not (clip_shape.overlaps(self.bounds) or self.bounds.covers(clip_shape) + or self.bounds.equals(clip_shape) or self.bounds.overlaps(clip_shape) + ): + raise ValueError(f'Clipping bounds do not overlap Band') # left column index if xmin > x_coords[0]: min_col = np.argmin(abs(xmin - x_coords)) + ulx = x_coords[min_col] else: - min_col = x_coords[0] + min_col = 0 + ulx = x_coords[0] # right column index if xmax < x_coords[-1]: max_col = np.argmin(abs(xmax - x_coords)) else: - max_col = x_coords[-1] - # lower row index - if ymin > y_coords[0]: - min_row = np.argmin(abs(ymin - y_coords)) + max_col = len(x_coords) + # lower row index (y coordinates are sorted descending!) + if ymin > y_coords[-1]: + min_row = np.argmin(abs(ymin - y_coords[::-1])) else: - min_row = y_coords[0] + min_row = 0 # upper row index - if ymax < y_coords[-1]: - max_row = np.argmin(abs(ymax - y_coords)) + if ymax < y_coords[0]: + max_row = np.argmin(abs(ymax - y_coords[::-1])) + uly = y_coords[::-1][max_row] else: - max_row = y_coords[-1] + max_row = len(y_coords) + uly = y_coords[0] - # get attributes of the raster - attrs = self.get_attributes() # get its GeoInfo and update it accordingly geo_info = self.geo_info new_geo_info = GeoInfo( epsg=geo_info.epsg, - ulx=min_col, - uly=max_row, + ulx=ulx, + uly=uly, pixres_x=geo_info.pixres_x, pixres_y=geo_info.pixres_y ) values = self.values.copy() - new_values = values[min_col:max_col+1, min_row:max_row+1] - attrs.update({ + new_values = values[min_row:max_row,min_col:max_col] + attrs = { 'geo_info': new_geo_info, 'values': new_values, 'band_name': self.band_name, 'band_alias': self.band_alias, - 'wavelength_info': self.wavelength_info - }) + 'wavelength_info': self.wavelength_info, + 'nodata': self.nodata, + 'scale': self.scale, + 'offset': self.offset, + 'unit': self.unit, + 'is_tiled': self.is_tiled + } # open a new Band instance and add the sliced array out_band = Band(**attrs) return out_band From 36bae632c814688ebff411085f0b0d994fee723b Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 29 Nov 2022 11:00:59 +0100 Subject: [PATCH 055/125] added more tests for the Band.clip() method --- tests/core/test_band.py | 66 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/tests/core/test_band.py b/tests/core/test_band.py index b20ba1c8..363318f8 100644 --- a/tests/core/test_band.py +++ b/tests/core/test_band.py @@ -387,4 +387,68 @@ def test_clip_band(get_test_band): band_bounds = band.bounds clip_bounds = band_bounds.buffer(-20) band_clipped = band.clip(clipping_bounds=clip_bounds) - + assert isinstance(band_clipped, Band), 'expected a band object' + assert band_clipped.band_name == band.band_name, 'band name not copied' + assert band_clipped.alias == band.alias, 'band alias not copied' + assert band_clipped.unit == band.unit, 'unit not copied' + assert band_clipped.scale == band.scale, 'scale not copied' + assert band_clipped.offset == band.offset, 'offset not copied' + assert band_clipped.transform != band.transform, 'the transformation must not be the same' + assert band_clipped.nrows < band.nrows, 'number of rows of clipped band must be smaller' + assert band_clipped.ncols < band.ncols, 'number of columns of clipped band must be smaller' + expected_shape = ( + int(band.nrows - 4), # -4 because of 20m inwards buffering (resolution is 10m) + int(band.ncols - 4) # -4 because of 20m inwards buffering (resolution is 10m) + ) + assert band_clipped.values.shape == expected_shape, 'wrong shape of clipped band' + + # second case: clip to a polygon larger than the Band -> should return the same Band + clip_bounds = band.bounds.buffer(20) + band_clipped = band.clip(clip_bounds) + assert (band_clipped == band).values.all(), 'the bands must be the same' + + # third case: bounding box outside the Band -> should raise an error + clip_bounds = (100, 100, 300, 300) + with pytest.raises(ValueError): + band_clipped = band.clip(clip_bounds) + + # fourth case: bounding box is the same as the bounds of the Band + clip_bounds = band.bounds + band_clipped = band.clip(clip_bounds) + assert (band_clipped == band).values.all(), 'the bands must be the same' + + # fifth case: bounding box partially overlaps the Band (different test cases) + band_bounds_xy = clip_bounds.exterior.xy + clip_bounds = ( + min(band_bounds_xy[0]) - 100, # xmin + min(band_bounds_xy[1]) - 231, # ymin + max(band_bounds_xy[0]) - 44, # xmax + max(band_bounds_xy[1]) - 85 # ymax + ) + band_clipped = band.clip(clip_bounds) + assert band_clipped.nrows < band.nrows, 'number of rows must not be the same' + assert band_clipped.ncols < band.ncols, 'number of columns must not be the same' + # all rows should be the same but not the columns + clip_bounds = ( + min(band_bounds_xy[0]) - 1000, # xmin + min(band_bounds_xy[1]), # ymin + max(band_bounds_xy[0]) - 1000, # xmax + max(band_bounds_xy[1]) # ymax + ) + band_clipped = band.clip(clip_bounds) + assert band_clipped.nrows == band.nrows, 'number of rows must be the same' + assert band_clipped.ncols < band.ncols, 'number of columns must not be the same' + assert band_clipped.geo_info.ulx == band.geo_info.ulx, 'upper left x should be the same' + assert band_clipped.geo_info.uly == band.geo_info.uly, 'upper left y should be the same' + # all columns should be the same but not the rows + clip_bounds = ( + min(band_bounds_xy[0]), # xmin + min(band_bounds_xy[1]) - 2000, # ymin + max(band_bounds_xy[0]), # xmax + max(band_bounds_xy[1]) - 2000 # ymax + ) + band_clipped = band.clip(clip_bounds) + assert band_clipped.nrows < band.nrows, 'number of rows must not be the same' + assert band_clipped.ncols == band.ncols, 'number of columns must be the same' + assert band_clipped.geo_info.ulx == band.geo_info.ulx, 'upper left x should be the same' + assert band_clipped.geo_info.uly == band.geo_info.uly - 2000, 'wrong upper left y coordinate' From 4dcb6d2ce29fbb8ffdda463a4d117e84ad007cf3 Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 29 Nov 2022 11:12:10 +0100 Subject: [PATCH 056/125] updated tests to latest changes in core modules --- tests/core/test_raster_collection.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/core/test_raster_collection.py b/tests/core/test_raster_collection.py index ea88164a..1e129236 100644 --- a/tests/core/test_raster_collection.py +++ b/tests/core/test_raster_collection.py @@ -24,7 +24,7 @@ def test_ndarray_constructor(): handler = RasterCollection() assert handler.empty, 'RasterCollection is not empty' - assert handler.scene_properties.acquisition_time == datetime.datetime(2999,1,1) + assert not handler.is_scene, 'empty RasterCollection cannot be a Scene' assert len(handler) == 0, 'there should not be any items so far' assert handler.is_bandstack() is None, 'cannot check for bandstack without bands' @@ -78,7 +78,7 @@ def test_ndarray_constructor(): values=zeros, geo_info=geo_info ) - + handler.scene_properties.acquisition_time = datetime.datetime.now() # mask the second band based on the first one masked = handler.mask(mask='random', mask_values=[0.15988288, 0.38599023]) assert masked.band_names == handler.band_names, 'band names not passed on correctly' From 55a72657a6cfda01ecac1fc6de0a287e8b5430bd Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 29 Nov 2022 11:28:39 +0100 Subject: [PATCH 057/125] adding inplace option for clipping method --- eodal/core/band.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/eodal/core/band.py b/eodal/core/band.py index b0344428..07551158 100644 --- a/eodal/core/band.py +++ b/eodal/core/band.py @@ -1127,7 +1127,8 @@ def copy(self): def clip( self, - clipping_bounds: Path | gpd.GeoDataFrame | Tuple[float,float,float,float] | Polygon + clipping_bounds: Path | gpd.GeoDataFrame | Tuple[float,float,float,float] | Polygon, + inplace: Optional[bool] = False ): """ Clip a band object to a spatial extent. @@ -1139,6 +1140,10 @@ def clip( Vector files and `GeoDataFrame` are reprojected into the bands' coordinate system if required, while the coordinate tuple and shapely geometry **MUST** be provided in the CRS of the band. + :param inplace: + if False (default) returns a copy of the ``Band`` instance + with the changes applied. If True overwrites the values + in the current instance. :returns: clipped band instance. """ @@ -1208,21 +1213,14 @@ def clip( ) values = self.values.copy() new_values = values[min_row:max_row,min_col:max_col] - attrs = { - 'geo_info': new_geo_info, - 'values': new_values, - 'band_name': self.band_name, - 'band_alias': self.band_alias, - 'wavelength_info': self.wavelength_info, - 'nodata': self.nodata, - 'scale': self.scale, - 'offset': self.offset, - 'unit': self.unit, - 'is_tiled': self.is_tiled - } - # open a new Band instance and add the sliced array - out_band = Band(**attrs) - return out_band + + if inplace: + object.__setattr__(self, "values", new_values) + object.__setattr__(self, "geo_info", new_geo_info) + else: + attrs = deepcopy(self.__dict__) + attrs.update({"values": new_values, "geo_info": new_geo_info}) + return Band(**attrs) def get_attributes(self, **kwargs) -> Dict[str, Any]: """ From 16e038c797c7af1485a95efcad0df392ab9a269c Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 29 Nov 2022 11:28:53 +0100 Subject: [PATCH 058/125] adding test for inplace option of clipping method --- tests/core/test_band.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/core/test_band.py b/tests/core/test_band.py index 363318f8..6e648754 100644 --- a/tests/core/test_band.py +++ b/tests/core/test_band.py @@ -452,3 +452,11 @@ def test_clip_band(get_test_band): assert band_clipped.ncols == band.ncols, 'number of columns must be the same' assert band_clipped.geo_info.ulx == band.geo_info.ulx, 'upper left x should be the same' assert band_clipped.geo_info.uly == band.geo_info.uly - 2000, 'wrong upper left y coordinate' + + # test with inplace == True + band_before_clip = band.copy() + band.clip(clip_bounds, inplace=True) + assert band_clipped.nrows < band_before_clip.nrows, 'number of rows must not be the same' + assert band_clipped.ncols == band_before_clip.ncols, 'number of columns must be the same' + assert band_clipped.geo_info.ulx == band_before_clip.geo_info.ulx, 'upper left x should be the same' + assert band_clipped.geo_info.uly == band_before_clip.geo_info.uly - 2000, 'wrong upper left y coordinate' From 383e7db9be59a2a7ab54dbec13a05bf710752755 Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 29 Nov 2022 11:29:11 +0100 Subject: [PATCH 059/125] drafting RasterCollection.clip_bands for spatial subsetting --- eodal/core/raster.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/eodal/core/raster.py b/eodal/core/raster.py index dba621de..419fe598 100644 --- a/eodal/core/raster.py +++ b/eodal/core/raster.py @@ -935,6 +935,41 @@ class methods (`Band.from_rasterio`, `Band.from_vector`) except Exception as e: raise KeyError(f"Cannot add raster band: {e}") + @check_band_names + def clip_bands( + self, + band_selection: Optional[List[str]] = None, + inplace: Optional[bool] = False, + **kwargs + ): + """ + Clip bands in RasterCollection to a user-defined spatial bounds. + """ + if band_selection is None: + band_selection = self.band_names + # loop over bands and try to subset them spatially + # initialize a new raster collection if inplace is False + collection = None + if inplace: + kwargs.update({'inplace': True}) + if not inplace: + attrs = deepcopy(self.__dict__) + attrs.pop("_collection") + collection = RasterCollection(**attrs) + + # loop over band reproject the selected ones + for band_name in band_selection: + if inplace: + self.collection[band_name].reproject(**kwargs) + else: + band = self.get_band(band_name) + collection.add_band(band_constructor=band.clip, **kwargs) + + if not inplace: + return collection + + if + @check_band_names def plot_band(self, band_name: str, **kwargs) -> Figure: """ From 5309b64b79019903748c58a5cc8742b4060770a7 Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 29 Nov 2022 15:17:24 +0100 Subject: [PATCH 060/125] implemented RasterCollection.clip_bands() method --- eodal/core/raster.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/eodal/core/raster.py b/eodal/core/raster.py index 419fe598..9ba068f8 100644 --- a/eodal/core/raster.py +++ b/eodal/core/raster.py @@ -960,7 +960,7 @@ def clip_bands( # loop over band reproject the selected ones for band_name in band_selection: if inplace: - self.collection[band_name].reproject(**kwargs) + self.collection[band_name].clip(**kwargs) else: band = self.get_band(band_name) collection.add_band(band_constructor=band.clip, **kwargs) @@ -968,8 +968,6 @@ def clip_bands( if not inplace: return collection - if - @check_band_names def plot_band(self, band_name: str, **kwargs) -> Figure: """ From 1079e27acb54852ad54e9e606dd9a7b2eb308de1 Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 29 Nov 2022 15:17:41 +0100 Subject: [PATCH 061/125] added tests for RasterCollection.clip_bands() method --- tests/core/test_raster_collection.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/tests/core/test_raster_collection.py b/tests/core/test_raster_collection.py index 1e129236..87fdf96b 100644 --- a/tests/core/test_raster_collection.py +++ b/tests/core/test_raster_collection.py @@ -8,15 +8,11 @@ import matplotlib.pyplot as plt import numpy as np -from pathlib import Path - from eodal.core.band import GeoInfo from eodal.core.band import Band -from eodal.core.raster import SceneProperties from eodal.core.raster import RasterCollection from eodal.utils.exceptions import BandNotFoundError - def test_ndarray_constructor(): """ basic test with ``np.ndarray`` backend in raster collection @@ -236,3 +232,26 @@ def test_resampling(datadir,get_bandstack): fpath_out = datadir.joinpath('test.jp2') resampled.to_rasterio(fpath_out) assert fpath_out.exists(), 'output-file not created' + +def test_clipping(get_bandstack): + """Spatial clipping (subsetting) of RasterCollections""" + fpath_raster = get_bandstack() + rcoll = RasterCollection.from_multi_band_raster( + fpath_raster=fpath_raster + ) + # clip the collection to a Polygon buffered 100m inwards of the + # bounding box of the first band + clipping_bounds = rcoll[rcoll.band_names[0]].bounds.buffer(-100) + rcoll_clipped = rcoll.clip_bands(clipping_bounds=clipping_bounds) + assert isinstance(rcoll_clipped, RasterCollection), 'expected a RasterCollection' + assert rcoll_clipped[rcoll_clipped.band_names[0]].bounds != \ + rcoll[rcoll.band_names[0]].bounds, 'band was not clipped' + # do the same inplace + rcoll.clip_bands(clipping_bounds=clipping_bounds, inplace=True) + assert rcoll_clipped[rcoll_clipped.band_names[0]].bounds == \ + rcoll[rcoll.band_names[0]].bounds, 'band was not clipped' + # clipping outside the boundaries of the RasterCollection -> should throw an error + clipping_bounds = (0, 0, 100, 100) + with pytest.raises(ValueError): + rcoll.clip_bands(clipping_bounds=clipping_bounds, inplace=True) + \ No newline at end of file From e12f4250129dcdff30a46c3ee5097c5e2d6bc48e Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 29 Nov 2022 20:52:26 +0100 Subject: [PATCH 062/125] added fixture to get a SceneCollection --- tests/conftest.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index facad437..9f13eaa4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -8,6 +8,9 @@ from distutils import dir_util from pathlib import Path + +from eodal.core.raster import RasterCollection +from eodal.core.scene import SceneCollection from eodal.downloader.utils import unzip_datasets @pytest.fixture @@ -216,3 +219,18 @@ def _get_polygons(): ) return testdata_polys return _get_polygons + +@pytest.fixture() +def get_scene_collection(get_bandstack): + """fixture returing a SceneCollection with three scenes""" + def _get_scene_collection(): + fpath_raster = get_bandstack() + # open three scenes + scene_list = [] + for i in range(3): + ds = RasterCollection.from_multi_band_raster(fpath_raster=fpath_raster) + ds.scene_properties.acquisition_time = 1000 * (i+1) + scene_list.append(ds) + scoll = SceneCollection.from_raster_collections(scene_list, indexed_by_timestamps=False) + return scoll + return _get_scene_collection From 058d1377383d4118dbc7eb12f9365fadd579db56 Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 29 Nov 2022 20:52:50 +0100 Subject: [PATCH 063/125] updating missing method documentation --- eodal/core/raster.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/eodal/core/raster.py b/eodal/core/raster.py index 9ba068f8..8ef2dea3 100644 --- a/eodal/core/raster.py +++ b/eodal/core/raster.py @@ -1615,6 +1615,12 @@ def to_rasterio( def to_xarray(self, band_selection: Optional[List[str]] = None) -> xr.DataArray: """ Converts bands in collection a ``xarray.DataArray`` + + :param band_selection: + selection of bands to process. If not provided uses all + bands + :returns: + `xarray.DataArray` created from RasterCollection. """ if band_selection is None: band_selection = self.band_names From 210b99e487b65f5a8697e9c3b681c565492d1d63 Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 29 Nov 2022 20:53:04 +0100 Subject: [PATCH 064/125] added SceneCollection.to_xarray() method --- eodal/core/scene.py | 102 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 82 insertions(+), 20 deletions(-) diff --git a/eodal/core/scene.py b/eodal/core/scene.py index 0c0e63e6..b99a789a 100644 --- a/eodal/core/scene.py +++ b/eodal/core/scene.py @@ -23,11 +23,12 @@ import dateutil.parser import geopandas as gpd import numpy as np +import pandas as pd +import xarray as xr from collections.abc import MutableMapping from copy import deepcopy -from pathlib import Path -from typing import Callable, List, Optional, Tuple +from typing import Any, Callable, List, Optional, Tuple from eodal.core.raster import RasterCollection from eodal.utils.exceptions import SceneNotFoundError @@ -41,15 +42,19 @@ class SceneCollection(MutableMapping): def __init__( self, scene_constructor: Optional[Callable[..., RasterCollection]] = None, + indexed_by_timestamps: Optional[bool] = True, *args, **kwargs ): """ Initializes a SceneCollection object with 0 to N scenes. - :param scene_constructor: + :param scene_constructor: optional callable returning an `~eodal.core.raster.RasterCollection` instance. + :param indexed_by_timestamps: + if True, all scene indices are interpreted as timestamps (`datetime.datetime`). + Set to False if scene indices should be treated as different data types :param args: arguments to pass to `scene_constructor` or one of RasterCollection's class methods (e.g., `RasterCollection.from_multi_band_raster`) @@ -63,6 +68,8 @@ class methods (e.g., `RasterCollection.from_multi_band_raster`) self._frozen = True self._is_sorted = True + object.__setattr__(self, 'indexed_by_timestamps', indexed_by_timestamps) + self._identifiers = [] if scene_constructor is not None: scene = scene_constructor.__call__(*args, **kwargs) @@ -70,19 +77,22 @@ class methods (e.g., `RasterCollection.from_multi_band_raster`) def __getitem__(self, key: str | slice) -> RasterCollection: - def _get_scene_from_key(key: str) -> RasterCollection: - if str(key) in self.timestamps: - # most likely time stamps are passed as strings - if isinstance(key, str): + def _get_scene_from_key(key: str | Any) -> RasterCollection: + if self.indexed_by_timestamps: + if str(key) in self.timestamps: + # most likely time stamps are passed as strings # we infer the format using dateutil key = dateutil.parser.parse(key) - return self.collection[key] - elif key in self.identifiers: + return self.collection[key] + else: + if key in self.timestamps: + return self.collection[key] + if key in self.identifiers: scene_idx = self.identifiers.index(key) return self.__getitem__(self.timestamps[scene_idx]) # has a single key or slice been passed? - if isinstance(key, str): + if not isinstance(key, slice): try: return _get_scene_from_key(key=key) except IndexError: @@ -90,7 +100,7 @@ def _get_scene_from_key(key: str) -> RasterCollection: f'Could not find a scene for key {key} in collection' ) - elif isinstance(key, slice): + else: if not self.is_sorted: raise ValueError('Slices are not permitted on unsorted SceneCollections') # find the index of the start and the end of the slice @@ -103,6 +113,10 @@ def _get_scene_from_key(key: str) -> RasterCollection: # if start is None use the first scene if slice_start is None: if isinstance(slice_end, datetime.date): + if not self.indexed_by_timestamps: + raise ValueError( + 'Cannot slice on timestamps when `indexed_by_timestamps` is False' + ) slice_start = list(self.collection.keys())[0].date() else: if slice_end in self.identifiers: @@ -113,6 +127,10 @@ def _get_scene_from_key(key: str) -> RasterCollection: end_increment = 0 if slice_end is None: if isinstance(slice_start, datetime.date): + if not self.indexed_by_timestamps: + raise ValueError( + 'Cannot slice on timestamps when `indexed_by_timestamps` is False' + ) slice_end = list(self.collection.keys())[-1].date() else: if slice_start in self.identifiers: @@ -133,6 +151,10 @@ def _get_scene_from_key(key: str) -> RasterCollection: scenes = self.identifiers # allow selection by date range elif isinstance(slice_start, datetime.date) and isinstance(slice_end, datetime.date): + if not self.indexed_by_timestamps: + raise ValueError( + 'Cannot slice on timestamps when `indexed_by_timestamps` is False' + ) out_scoll = SceneCollection() for timestamp, scene in self: if end_increment == 0: @@ -194,8 +216,12 @@ def __repr__(self) -> str: if self.empty: return 'Empty EOdal SceneCollection' else: + if self.indexed_by_timestamps: + timestamps = ', '.join(self.timestamps) + else: + timestamps = ', '.join([str(x) for x in self.timestamps]) return f'EOdal SceneCollection\n----------------------\n' + \ - f'# Scenes: {len(self)}\nTimestamps: {", ".join(self.timestamps)}\n' + \ + f'# Scenes: {len(self)}\nTimestamps: {timestamps}\n' + \ f'Scene Identifiers: {", ".join(self.identifiers)}' @staticmethod @@ -223,9 +249,12 @@ def empty(self) -> bool: return len(self) == 0 @property - def timestamps(self) -> List[str]: + def timestamps(self) -> List[str | Any]: """acquisition timestamps of scenes in collection""" - return [str(x) for x in list(self.collection.keys())] + if self.indexed_by_timestamps: + return [str(x) for x in list(self.collection.keys())] + else: + return list(self.collection.keys()) @property def identifiers(self) -> List[str]: @@ -249,7 +278,8 @@ def from_raster_collections( cls, raster_collections: List[RasterCollection] | Tuple[RasterCollection], sort_scenes: Optional[bool] = True, - sort_direction: Optional[str] = 'asc' + sort_direction: Optional[str] = 'asc', + **kwargs ): """ Create a SceneCollection from a list/tuple of N RasterCollection objects. @@ -263,6 +293,8 @@ def from_raster_collections( :param sort_direction: direction of sorting. Must be either 'asc' (ascending) or 'desc' (descending). Ignored if `sort_scenes` is False. + :param kwargs: + key word arguments to pass to `SceneCollection` constructor call. :returns: SceneCollection instance """ @@ -281,7 +313,7 @@ def from_raster_collections( sort_idx = np.array([x for x in range(len(raster_collections))]) is_sorted = False # open a SceneCollection instance and add the scenes - scoll = cls() + scoll = cls(**kwargs) scoll.is_sorted = is_sorted for idx in sort_idx: scoll.add_scene(scene_constructor=raster_collections[idx].copy()) @@ -334,9 +366,23 @@ def dump(self): def get_feature_timeseries( self, - vector_features: Path | gpd.GeoDataFrame + **kwargs ) -> gpd.GeoDataFrame: - pass + """ + Get a time series for 1:N vector features from SceneCollection. + + :param kwargs: + key word arguments to pass to `~RasterCollection.get_pixels()`. + :returns: + ``GeoDataFrame`` with extracted raster values per feature and time stamp + """ + # loop over scenes in collection and get the feature values + gdf_list = [] + for timestamp, scene in self: + _gdf = scene.get_pixels(**kwargs) + _gdf['acquisition_time'] = timestamp + gdf_list.append(_gdf) + return pd.concat(gdf_list) def load(self): pass @@ -368,5 +414,21 @@ def sort( scoll.add_scene(scenes[idx].copy()) return scoll - def to_xarray(self): - pass + def to_xarray(self, **kwargs) -> xr.DataArray: + """ + Converts all scenes in a SceneCollection to a single `xarray.DataArray`. + + :param kwargs: + key word arguments to pass to `~RasterCollection.to_xarray` + :returns: + SceneCollection as `xarray.DataArray` + """ + # loop over scenes in Collection and convert them to xarray.DataArray + xarray_list = [] + for timestamp, scene in self: + _xr = scene.to_xarray(**kwargs) + # _xr = _xr.to_dataset() + _xr = _xr.expand_dims(time=[timestamp]) + xarray_list.append(_xr) + # concatenate into a single xarray along the time dimension + return xr.concat(xarray_list, dim='time') From 92a75ac774d019b0806749774c4d6c5bd406be59 Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 29 Nov 2022 20:53:21 +0100 Subject: [PATCH 065/125] added test for SceneCollection.to_xarray() method --- tests/core/test_scene_collection.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/core/test_scene_collection.py b/tests/core/test_scene_collection.py index a08309ee..474e7679 100644 --- a/tests/core/test_scene_collection.py +++ b/tests/core/test_scene_collection.py @@ -6,6 +6,7 @@ import pytest import datetime +import xarray as xr from datetime import date @@ -14,6 +15,7 @@ from eodal.core.scene import SceneCollection from eodal.core.sensors import Sentinel2 + def test_raster_is_scene(get_bandstack): """test the is_scene attribute of RasterCollections""" @@ -146,4 +148,16 @@ def test_scene_collection(get_s2_safe_l2a, get_polygons_2, get_bandstack): scoll_sorted_desc = scoll.sort(sort_direction='desc') assert scoll_sorted_desc.is_sorted, 'expected a sorted SceneCollection' assert scoll_sorted_desc.timestamps[-1] == str(test_time), 'expected a different timestamp' + +def test_scene_collection_to_xarray(get_scene_collection): + """convert SceneCollection to xarray""" + scoll = get_scene_collection() + xarr = scoll.to_xarray() + assert isinstance(scoll[1000], RasterCollection), 'expected a RasterCollection' + assert isinstance(xarr, xr.DataArray), 'expected a DataArray' + assert len(xarr) == len(scoll), 'wrong length of DataArray' + assert (xarr.time.values == scoll.timestamps).all(), 'wrong timestamps in DataArray' + for idx in range(len(scoll)): + assert (xarr.values[idx,:,:,:] == scoll[scoll.timestamps[idx]].get_values()).all(), 'wrong ' + \ No newline at end of file From cabf893a36b21987850b9c02d1e852d212fe7711 Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 29 Nov 2022 20:53:34 +0100 Subject: [PATCH 066/125] removing blank lines --- tests/core/test_scene_collection.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/core/test_scene_collection.py b/tests/core/test_scene_collection.py index 474e7679..28272f90 100644 --- a/tests/core/test_scene_collection.py +++ b/tests/core/test_scene_collection.py @@ -159,5 +159,3 @@ def test_scene_collection_to_xarray(get_scene_collection): assert (xarr.time.values == scoll.timestamps).all(), 'wrong timestamps in DataArray' for idx in range(len(scoll)): assert (xarr.values[idx,:,:,:] == scoll[scoll.timestamps[idx]].get_values()).all(), 'wrong ' - - \ No newline at end of file From 9d8b8a5ce351d68414f1979db1a2e2350a8ea0d4 Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 29 Nov 2022 20:56:20 +0100 Subject: [PATCH 067/125] updating CHANGELOG --- CHANGELOG.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f398fbd9..235b3e28 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -10,7 +10,7 @@ The format is based on `Keep a Changelog`_, and this project adheres to `Semanti Categories for changes are: Added, Changed, Deprecated, Removed, Fixed, Security. -Version `0.0.2 < https://github.com/EOA-team/eodal/releases/tag/v0.0.2>`__ +Version `0.0.2 < https://github.com/EOA-team/eodal/releases/tag/v0.1.0>`__ -------------------------------------------------------------------------------- Release date: YYYY-MM-DD @@ -18,6 +18,8 @@ Release date: YYYY-MM-DD - Added: RasterCollection objects are now iterable (iterate over bands in collection) - Added: RasterCollection now have a "apply" method allowing to pass custom functions to RasterCollection objects - Added: RasterCollection now supports numpy-array like slicing using band names or band aliases +- Added: Band and RasterCollection objects now support clipping to rectangular bounds (i.e., spatial sub-setting) +- Added: SceneCollections are collections of 0 to N Scenes (RasterCollection + timestamp) and allow to store multiple Scenes over time Version `0.0.1 < https://github.com/EOA-team/eodal/releases/tag/v0.0.1>`__ From f79e49780ccf2c41adbbbfd80db0b22243e825cb Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 4 Dec 2022 14:33:54 +0100 Subject: [PATCH 068/125] extraction of pixel time series working --- eodal/core/scene.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/eodal/core/scene.py b/eodal/core/scene.py index b99a789a..fc933b38 100644 --- a/eodal/core/scene.py +++ b/eodal/core/scene.py @@ -366,11 +366,16 @@ def dump(self): def get_feature_timeseries( self, + reindex_dataframe: Optional[bool] = False, **kwargs ) -> gpd.GeoDataFrame: """ Get a time series for 1:N vector features from SceneCollection. + :param reindex_dataframe: + boolean flag whether to reindex the resulting GeoDataFrame after extracting + data from all scenes. Set to `True` to ensure that the returned GeoDataFrame + has a unique index. `False` by default. :param kwargs: key word arguments to pass to `~RasterCollection.get_pixels()`. :returns: @@ -382,7 +387,14 @@ def get_feature_timeseries( _gdf = scene.get_pixels(**kwargs) _gdf['acquisition_time'] = timestamp gdf_list.append(_gdf) - return pd.concat(gdf_list) + # reindex the resulting GeoDataFrame if required + if reindex_dataframe: + gdf = pd.concat(gdf_list) + # reindexing is done by counting the features starting from zero + gdf.index = [x for x in range(gdf.shape[0])] + return gdf + else: + return pd.concat(gdf_list) def load(self): pass From 25d337b390777658408d43792bf63fb0410ce0ee Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 4 Dec 2022 14:34:07 +0100 Subject: [PATCH 069/125] added tests for extracting pixel time series from SceneCollection --- tests/core/test_scene_collection.py | 40 +++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/core/test_scene_collection.py b/tests/core/test_scene_collection.py index 28272f90..0a89c96f 100644 --- a/tests/core/test_scene_collection.py +++ b/tests/core/test_scene_collection.py @@ -6,15 +6,40 @@ import pytest import datetime +import geopandas as gpd +import random import xarray as xr from datetime import date +from shapely.geometry import Point, Polygon +from typing import List from eodal.core.band import Band from eodal.core.raster import RasterCollection from eodal.core.scene import SceneCollection from eodal.core.sensors import Sentinel2 +@pytest.fixture() +def generate_random_points(): + def _generate_random(number: int, polygon: Polygon) -> List[Point]: + """ + Generates random points within a polygon + + :param number: + number of random points to create + :param polygon: + polygon within to sample the points + :returns: + list of randomly sampled points within the polygon bounds + """ + points = [] + minx, miny, maxx, maxy = polygon.bounds + while len(points) < number: + pnt = Point(random.uniform(minx, maxx), random.uniform(miny, maxy)) + if polygon.contains(pnt): + points.append(pnt) + return points + return _generate_random def test_raster_is_scene(get_bandstack): """test the is_scene attribute of RasterCollections""" @@ -159,3 +184,18 @@ def test_scene_collection_to_xarray(get_scene_collection): assert (xarr.time.values == scoll.timestamps).all(), 'wrong timestamps in DataArray' for idx in range(len(scoll)): assert (xarr.values[idx,:,:,:] == scoll[scoll.timestamps[idx]].get_values()).all(), 'wrong ' + +def test_scene_collection_time_series(get_scene_collection, generate_random_points): + """time series extraction from scene collection""" + scoll = get_scene_collection() + # sample pixels randomly distributed within the scene collection's spatial extent + bounds = scoll[1000]['B02'].bounds + crs = scoll[1000]['B02'].crs + # get 20 random points for which to extract the time series + random_points = generate_random_points(20, bounds) + random_points_gdf = gpd.GeoDataFrame(geometry=random_points, crs=crs) + points_ts = scoll.get_feature_timeseries(vector_features=random_points_gdf) + assert isinstance(points_ts, gpd.GeoDataFrame), 'expected a GeoDataFrame' + assert 'acquisition_time' in points_ts.columns, 'missing time column' + assert points_ts.shape == (60, 12), 'wrong shape of returned GeoDataFrame object' + From 2489d7a430c1c905b9a65c386bdacd06a01ef9cc Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 4 Dec 2022 16:18:51 +0100 Subject: [PATCH 070/125] reducing bands by multiple polygons now working --- eodal/core/band.py | 141 ++++++++++++++++++++++++++++++--------------- 1 file changed, 95 insertions(+), 46 deletions(-) diff --git a/eodal/core/band.py b/eodal/core/band.py index 07551158..54013142 100644 --- a/eodal/core/band.py +++ b/eodal/core/band.py @@ -44,21 +44,15 @@ from mpl_toolkits.axes_grid1 import make_axes_locatable from numbers import Number from pathlib import Path -from rasterio import Affine -from rasterio import features +from rasterio import Affine, features from rasterio.coords import BoundingBox from rasterio.crs import CRS from rasterio.drivers import driver_from_extension from rasterio.enums import Resampling -from shapely.geometry import box -from shapely.geometry import Point -from shapely.geometry import Polygon -from typing import Any -from typing import Dict -from typing import List -from typing import Optional -from typing import Tuple -from typing import Union +from rasterstats import zonal_stats +from rasterstats.utils import check_stats +from shapely.geometry import box, Point, Polygon +from typing import Any, Dict, List, Optional, Tuple, Union from eodal.core.operators import Operator from eodal.core.utils.geometry import check_geometry_types @@ -72,8 +66,6 @@ ReprojectionError, ) from eodal.utils.reprojection import reproject_raster_dataset, check_aoi_geoms -from _ast import Or - class BandOperator(Operator): """ @@ -1906,28 +1898,44 @@ def reproject( def reduce( self, method: Union[str, List[str]], - by: Optional[Union[Path, gpd.GeoDataFrame]] = None, - method_args: Optional[Dict[str, Any]] = None - ) -> Dict[str, Union[int, float]]: + by: Optional[Path | gpd.GeoDataFrame | str] = None, + ) -> List[Dict[str, int | float]]: """ - Reduces the raster data to scalar values. + Reduces the raster data to scalar values by calling `rasterstats`. + + The reduction can be done on the whole band or by using vector features. :param method: any ``numpy`` function taking a two-dimensional array as input and returning a single scalar. Can be a single function name (e.g., "mean") or a list of function names (e.g., ["mean", "median"]) :param by: - define by what to reduce the band values (not implemented yet!!) - :param method_args: - optional dictionary with arguments to pass to the single methods in - case the reducer method requires extra arguments to function properly - (e.g., `np.quantile`) + define optional vector features by which to reduce the band. By passing + `'self'` the method uses the features with which the band was read, otherwise + specify a file-path to vector features or provide a GeoDataFrame :returns: - a dictionary with scalar results + list of dictionaries with scalar results per feature """ + # check by what features the Band should be reduced spatially + # if `by` is None use the full spatial extent of the band + if by is None: + features = gpd.GeoDataFrame(geometry=[self.bounds], crs=self.crs) + else: + if by == 'self': + features = self.features + elif isinstance(by, Path): + features = gpd.read_file(by) + elif isinstance(by, gpd.GeoDataFrame): + features = by.copy() + else: + raise TypeError( + f'by expected "self", Path and GeoDataFrame objects - got {type(by)} instead' + ) + # check if features has the same CRS as the band. Reproject features if required + if not features.crs == self.crs: + features.to_crs(crs=self.crs, inplace=True) - # TODO: implement reduce by vector features - + # check method string passed if isinstance(method, str): method = [method] @@ -1939,27 +1947,68 @@ def reduce( elif self.is_zarr: raise NotImplemented() - # compute statistics - stats = {} - for operator in method: - # formulate numpy expression - expression = f"{numpy_prefix}.{operator}" - # numpy.ma has some different function names to consider - if operator.startswith("nan"): - expression = f"{numpy_prefix}.{operator[3::]}" - elif operator.endswith("nonzero"): - expression = f"{numpy_prefix}.count" - try: - # get function object and use its __call__ method - numpy_function = eval(expression) - # check if there are any function arguments - args = [] - if method_args is not None: - args = method_args.get(method, None) - stats[operator] = numpy_function.__call__(self.values, *args) - except TypeError: - raise Exception(f"Unknown function name for {numpy_prefix}: {operator}") - + # compute statistics by calling rasterstats. rasterstats needs the + # Affine transformation matrix to work on numpy arrays + affine = self.geo_info.as_affine() + # check if the passed methods are all within the default list supported + # by rasterstats. If not a ValueError is raised meaning we have to define + # these metrics as additional statistics. + # When the array is masked we also have to define custom functions + default_stats = True + try: + check_stats(stats=method, categorical=False) + except ValueError: + default_stats = False + # default rasterstats call + if default_stats and not self.is_masked_array: + stats = zonal_stats(features, self.values, affine=affine, stats=method) + else: + stats_operator_list = [] + # loop over operators in method list and make them rasterstats compatible + for operator in method: + expression = f"{numpy_prefix}.{operator}" + # When the array is masked, we have to set masked arrays to NaN + _operator = operator + if self.is_masked_array: + _operator = 'nan' + operator + # numpy.ma has some different function names to consider + if operator.startswith("nan"): + expression = f"{numpy_prefix}.{_operator[3::]}" + elif operator.endswith("nonzero"): + expression = f"{numpy_prefix}.count" + + def _fun_prototype(x: np.ndarray | np.ma.MaskedArray): + """ + a function prototype to by-pass custom numpy functions + to rasterstats + """ + return eval(f'{expression}(x)') + add_stats = {operator: deepcopy(_fun_prototype)} + # work-around for masked arrays (unfortunately, rasterstats works not that + # nicely) + vals = self.values.copy() + # cast array to float to set masked arrays to NaN; then we can call np.nan-something + # and force rasterstats to return the correct values + vals = vals.astype(float) + vals = vals.filled(np.nan) + # unfortunately, rasterstats always calculates some default statistcs. We therefore + # trick it by calling only the count method and delete the result afterwards + # Also, there seems to be a bug in rasterstats preventing more than a single + # operator to be passed in add_stats (otherwise the values are returned are wrong) + stats = zonal_stats( + features, vals, affine=affine, stats='count', add_stats=add_stats + ) + # delete the count entry + for item in stats: + del item['count'] + stats_operator_list.append(stats) + # combine the list of stats into a format consistent with the standard zonal_stats call + stats = [] + for idx in range(features.shape[0]): + feature_stats = {} + for odx, operator in enumerate(method): + feature_stats[operator] = stats_operator_list[odx][idx][operator] + stats.append(feature_stats) return stats def scale_data( From 1b9724dce10ada1c2b8f1c3109c926a8bbab6a86 Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 4 Dec 2022 16:19:09 +0100 Subject: [PATCH 071/125] added test for Band.reduce using multiple polygons --- tests/core/test_band.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/tests/core/test_band.py b/tests/core/test_band.py index 6e648754..8c171d0c 100644 --- a/tests/core/test_band.py +++ b/tests/core/test_band.py @@ -158,18 +158,18 @@ def test_bandstatistics(get_test_band): # get band statistics stats = band.reduce(method=['mean', 'min', 'max']) mean_stats = band.reduce(method='mean') - assert mean_stats['mean'] == stats['mean'], 'miss-match of metrics' - assert stats['min'] == band.values.min(), 'minimum not calculated correctly' - assert stats['max'] == band.values.max(), 'maximum not calculated correctly' + assert mean_stats[0]['mean'] == stats[0]['mean'], 'miss-match of metrics' + assert stats[0]['min'] == band.values.min(), 'minimum not calculated correctly' + assert stats[0]['max'] == band.values.max(), 'maximum not calculated correctly' # convert to GeoDataFrame gdf = band.to_dataframe() assert (gdf.geometry.type == 'Point').all(), 'wrong geometry type' assert set(gdf.columns) == {'geometry', 'B02'}, 'wrong column labels' assert gdf.shape[0] == 29674, 'wrong number of pixels converted' - assert gdf.B02.max() == stats['max'], 'band statistics not the same after conversion' - assert gdf.B02.min() == stats['min'], 'band statistics not the same after conversion' - assert gdf.B02.mean() == stats['mean'], 'band statistics not the same after conversion' + assert gdf.B02.max() == stats[0]['max'], 'band statistics not the same after conversion' + assert gdf.B02.min() == stats[0]['min'], 'band statistics not the same after conversion' + assert gdf.B02.mean() == stats[0]['mean'], 'band statistics not the same after conversion' def test_to_xarray(get_test_band): band = get_test_band() @@ -374,7 +374,7 @@ def test_from_vector(get_polygons): dtype_src='uint32' ) assert band_from_points.values.dtype == 'uint32', 'wrong data type' - assert band_from_points.reduce(method='max')['max'] == \ + assert band_from_points.reduce(method='max')[0]['max'] == \ point_gdf.GIS_ID.values.astype(int).max(), 'miss-match in band statistics' def test_clip_band(get_test_band): @@ -460,3 +460,13 @@ def test_clip_band(get_test_band): assert band_clipped.ncols == band_before_clip.ncols, 'number of columns must be the same' assert band_clipped.geo_info.ulx == band_before_clip.geo_info.ulx, 'upper left x should be the same' assert band_clipped.geo_info.uly == band_before_clip.geo_info.uly - 2000, 'wrong upper left y coordinate' + +def test_reduce_band_by_polygons(get_polygons, get_test_band): + """reduction of band raster values by polygons""" + # test reduction by external features + polys = get_polygons() + band = get_test_band() + method = ['mean', 'median', 'max'] + poly_stats = band.reduce(method=method, by=polys) + assert len(poly_stats) == gpd.read_file(polys).shape[0], 'wrong number of polygons returned' + assert list(poly_stats[0].keys()) == method, 'expected different naming of results' From 42472809567bafdd55a83dc6767e7b538d2f9d39 Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 4 Dec 2022 16:51:35 +0100 Subject: [PATCH 072/125] fixed issue with vector_features attribute --- eodal/core/band.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/eodal/core/band.py b/eodal/core/band.py index 54013142..01b76f97 100644 --- a/eodal/core/band.py +++ b/eodal/core/band.py @@ -524,17 +524,6 @@ def crs(self) -> CRS: """Coordinate Reference System of the band""" return CRS.from_epsg(self.geo_info.epsg) - @property - def features(self) -> None | gpd.GeoDataFrame: - """vector features used for reading or reducing band data""" - return self.vector_features - - @features.setter - def features(self, features: Optional[gpd.GeoDataFrame]): - """set vector features for reducing band data""" - self._check_vector_features(vector_features=features) - object.__setattr__(self, "vector_features", features) - @property def has_alias(self) -> bool: """Checks if a color name can be used for aliasing""" @@ -591,6 +580,8 @@ def _check_vector_features(vector_features: None | gpd.GeoDataFrame) -> None: Asserts that passed GeoDataFrame has a CRS """ if vector_features is not None: + if isinstance(vector_features, Path): + vector_features = gpd.read_file(vector_features) if vector_features.crs is None: raise ValueError( f'Cannot handle vector features without spatial coordinate reference system' @@ -803,6 +794,11 @@ def from_rasterio( if nodata_vals is not None: nodata = nodata_vals[band_idx - 1] + if masking: + # make sure to set the EPSG code + gdf_aoi.set_crs(epsg=epsg, inplace=True) + kwargs.update({'vector_features': gdf_aoi}) + # is_tiled can only be retrived from the raster attribs is_tiled = attrs.get("is_tiled", 0) @@ -1921,12 +1917,15 @@ def reduce( if by is None: features = gpd.GeoDataFrame(geometry=[self.bounds], crs=self.crs) else: - if by == 'self': - features = self.features + if isinstance(by, str): + if by == 'self': + features = deepcopy(self.vector_features) + else: + raise ValueError('When passing a string you must pass `self`') elif isinstance(by, Path): features = gpd.read_file(by) elif isinstance(by, gpd.GeoDataFrame): - features = by.copy() + features = deepcopy(by) else: raise TypeError( f'by expected "self", Path and GeoDataFrame objects - got {type(by)} instead' From d1b3cbd18a4ebb4e65d2e06a4e3f39632e3a4956 Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 4 Dec 2022 16:51:45 +0100 Subject: [PATCH 073/125] extended test case for Band.reduce --- tests/core/test_band.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/core/test_band.py b/tests/core/test_band.py index 8c171d0c..3483ede6 100644 --- a/tests/core/test_band.py +++ b/tests/core/test_band.py @@ -470,3 +470,18 @@ def test_reduce_band_by_polygons(get_polygons, get_test_band): poly_stats = band.reduce(method=method, by=polys) assert len(poly_stats) == gpd.read_file(polys).shape[0], 'wrong number of polygons returned' assert list(poly_stats[0].keys()) == method, 'expected different naming of results' + + # reduce by a limited number of polygons + polys_reduced = gpd.read_file(polys).iloc[0:10] + poly_stats_reduced = band.reduce(method=method, by=polys_reduced) + assert len(poly_stats_reduced) == polys_reduced.shape[0], 'wrong number of polygons returned' + assert poly_stats_reduced == poly_stats[0:10], 'wrong order of results' + + # reduce by passing the "self" keyword (features must be set) + poly_stats_self = band.reduce(method=method, by='self') + assert len(poly_stats_self) == band.vector_features.shape[0], 'wrong number of polygons' + assert poly_stats_self == poly_stats, 'both approaches should return exactly the same' + + # call reduce without passing "by" -> should return a single result + all_stats = band.reduce(method=method) + assert len(all_stats) == 1, 'there must not be more than a single result' From 63c8aaeb755da7a094b4d02d698b789c4e465922 Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 4 Dec 2022 16:53:42 +0100 Subject: [PATCH 074/125] adding rasterstats dependency required for Band.reduce() --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 59832615..152d504a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,3 +22,4 @@ pydantic[dotenv] requests planetary_computer alive_progress +rasterstats From 87c9368aa6de4162efd9f49694a9dd441d7600a1 Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 4 Dec 2022 17:58:14 +0100 Subject: [PATCH 075/125] preserving feature attributes when calling Band.reduce() --- eodal/core/band.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/eodal/core/band.py b/eodal/core/band.py index 01b76f97..8a76292c 100644 --- a/eodal/core/band.py +++ b/eodal/core/band.py @@ -1908,7 +1908,7 @@ def reduce( :param by: define optional vector features by which to reduce the band. By passing `'self'` the method uses the features with which the band was read, otherwise - specify a file-path to vector features or provide a GeoDataFrame + specify a file-path to vector features or provide a GeoDataFrame. :returns: list of dictionaries with scalar results per feature """ @@ -1988,8 +1988,9 @@ def _fun_prototype(x: np.ndarray | np.ma.MaskedArray): vals = self.values.copy() # cast array to float to set masked arrays to NaN; then we can call np.nan-something # and force rasterstats to return the correct values - vals = vals.astype(float) - vals = vals.filled(np.nan) + if self.is_masked_array: + vals = vals.astype(float) + vals = vals.filled(np.nan) # unfortunately, rasterstats always calculates some default statistcs. We therefore # trick it by calling only the count method and delete the result afterwards # Also, there seems to be a bug in rasterstats preventing more than a single @@ -2008,6 +2009,11 @@ def _fun_prototype(x: np.ndarray | np.ma.MaskedArray): for odx, operator in enumerate(method): feature_stats[operator] = stats_operator_list[odx][idx][operator] stats.append(feature_stats) + + # save the geometries and all other attributes of the feature(s) used + for idx in range(features.shape[0]): + stats[idx].update(features.iloc[idx].to_dict()) + stats[idx].update({'crs': features.crs}) return stats def scale_data( From 2897cc8c75a639c6aeb299e26a02224dcc5e514e Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 4 Dec 2022 17:58:28 +0100 Subject: [PATCH 076/125] check for preservation of features --- tests/core/test_band.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/core/test_band.py b/tests/core/test_band.py index 3483ede6..6aab6b38 100644 --- a/tests/core/test_band.py +++ b/tests/core/test_band.py @@ -469,7 +469,8 @@ def test_reduce_band_by_polygons(get_polygons, get_test_band): method = ['mean', 'median', 'max'] poly_stats = band.reduce(method=method, by=polys) assert len(poly_stats) == gpd.read_file(polys).shape[0], 'wrong number of polygons returned' - assert list(poly_stats[0].keys()) == method, 'expected different naming of results' + assert set(method).issubset(poly_stats[0].keys()), 'expected different naming of results' + assert 'geometry' in poly_stats[0].keys(), 'geometry attribute was lost' # reduce by a limited number of polygons polys_reduced = gpd.read_file(polys).iloc[0:10] From 31b2be8bdbcb224c2228a896c2b008c1c4798d47 Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 4 Dec 2022 17:58:49 +0100 Subject: [PATCH 077/125] updated band_summaries() method to latest changes in Band.reduce() --- eodal/core/raster.py | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/eodal/core/raster.py b/eodal/core/raster.py index 8ef2dea3..b69773e4 100644 --- a/eodal/core/raster.py +++ b/eodal/core/raster.py @@ -80,6 +80,7 @@ from collections.abc import MutableMapping from copy import deepcopy +from itertools import chain from matplotlib.axes import Axes from matplotlib.pyplot import Figure from numbers import Number @@ -1199,26 +1200,42 @@ def band_summaries( self, band_selection: Optional[List[str]] = None, methods: Optional[List[str]] = ["nanmin", "nanmean", "nanstd", "nanmax"], - ) -> pd.DataFrame: + **kwargs + ) -> gpd.GeoDataFrame: """ - Descriptive band statistics + Descriptive band statistics by calling `Band.reduce` for bands in a collection. :param band_selection: selection of bands to process. If not provided uses all bands :param methods: descriptive metrics to compute for each band + :param kwargs: + optional keyword arguments to pass to `~eodal.core.band.Band.reduce`. Use + `by` to get descriptive statistics by selected geometry features (e.g., + single polygons). :returns: - ``DataFrame`` with descriptive statistics for all bands selected + ``GeoDataFrame`` with descriptive statistics for all bands selected and geometry + features passed (optional) """ stats = [] if band_selection is None: band_selection = self.band_names for band_name in band_selection: - band_stats = self[band_name].reduce(method=methods) - band_stats["band_name"] = band_name + band_stats = self[band_name].reduce(method=methods, **kwargs) + # band_stats is a list of 1:N entries (one per feature on which reduce + # was called); we add the band name as attribute + for idx in range(len(band_stats)): + band_stats[idx].update({'band_name': band_name}) stats.append(band_stats) - return pd.DataFrame(stats) + # since the geometry information was passed on, a GeoDataFrame can be returned + df = pd.DataFrame(list(chain(*stats))) + gdf = gpd.GeoDataFrame(df, geometry=df['geometry'], crs=df['crs'].iloc[0]) + # cast columns to float; otherwise pandas throws an error: + # TypeError: unhashable type: 'MaskedConstant' + gdf[methods] = gdf[methods].astype(float) + gdf.drop(columns=['crs'], inplace=True) + return gdf @check_band_names def reproject( @@ -1458,6 +1475,7 @@ def join(self, other): Spatial join of one ``RasterCollection`` instance with another instance """ + pass def calc_si( self, si_name: str, inplace: Optional[bool] = False From fca0fba8e25c08bb901f219b8200031412847b8b Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 4 Dec 2022 17:59:04 +0100 Subject: [PATCH 078/125] added tests for RasterCollection.band_summaries() --- tests/core/test_raster_collection.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/core/test_raster_collection.py b/tests/core/test_raster_collection.py index 87fdf96b..c7799dc1 100644 --- a/tests/core/test_raster_collection.py +++ b/tests/core/test_raster_collection.py @@ -5,6 +5,7 @@ import datetime import pytest +import geopandas as gpd import matplotlib.pyplot as plt import numpy as np @@ -254,4 +255,19 @@ def test_clipping(get_bandstack): clipping_bounds = (0, 0, 100, 100) with pytest.raises(ValueError): rcoll.clip_bands(clipping_bounds=clipping_bounds, inplace=True) + +def test_band_summaries(get_bandstack, get_polygons): + """test band summary statistics""" + fpath_raster = get_bandstack() + rcoll = RasterCollection.from_multi_band_raster( + fpath_raster=fpath_raster + ) + # try band summary statistics for polygons + polys = get_polygons() + band_stats = rcoll.band_summaries(by=polys) + assert isinstance(band_stats, gpd.GeoDataFrame), 'expected a GeoDataFrame' + assert 'nanmean' in band_stats.columns, 'expected the mean value' + assert 'band_name' in band_stats.columns, 'expected the band name as column' + assert band_stats.crs == rcoll[rcoll.band_names[0]].crs, 'mis-match of CRS' + \ No newline at end of file From 09cfccc781b4bb730e079b804567c3028a52533f Mon Sep 17 00:00:00 2001 From: lukas Date: Sun, 4 Dec 2022 18:04:26 +0100 Subject: [PATCH 079/125] extended test case to entire spatial extent of RasterCollection --- tests/core/test_raster_collection.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/core/test_raster_collection.py b/tests/core/test_raster_collection.py index c7799dc1..21607476 100644 --- a/tests/core/test_raster_collection.py +++ b/tests/core/test_raster_collection.py @@ -270,4 +270,8 @@ def test_band_summaries(get_bandstack, get_polygons): assert 'band_name' in band_stats.columns, 'expected the band name as column' assert band_stats.crs == rcoll[rcoll.band_names[0]].crs, 'mis-match of CRS' + # get statistics of complete RasterCollection + band_stats_all = rcoll.band_summaries() + assert isinstance(band_stats, gpd.GeoDataFrame), 'expected a GeoDataFrame' + assert band_stats_all.shape[0] == len(rcoll), 'wrong number of items in statistics' \ No newline at end of file From 25bb55562293b1727331975692cfb82879cf7a20 Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 5 Dec 2022 16:07:32 +0100 Subject: [PATCH 080/125] deleted file created for some random testing purpose --- test.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 test.txt diff --git a/test.txt b/test.txt deleted file mode 100644 index e69de29b..00000000 From 4662369b89d3d7c3b73ae6ef24bec97cd951dcfd Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 5 Dec 2022 16:07:46 +0100 Subject: [PATCH 081/125] updated changelog with latest changes in core submodule --- CHANGELOG.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 235b3e28..c451f9a4 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -10,7 +10,7 @@ The format is based on `Keep a Changelog`_, and this project adheres to `Semanti Categories for changes are: Added, Changed, Deprecated, Removed, Fixed, Security. -Version `0.0.2 < https://github.com/EOA-team/eodal/releases/tag/v0.1.0>`__ +Version `0.1.0 < https://github.com/EOA-team/eodal/releases/tag/v0.1.0>`__ -------------------------------------------------------------------------------- Release date: YYYY-MM-DD @@ -19,6 +19,7 @@ Release date: YYYY-MM-DD - Added: RasterCollection now have a "apply" method allowing to pass custom functions to RasterCollection objects - Added: RasterCollection now supports numpy-array like slicing using band names or band aliases - Added: Band and RasterCollection objects now support clipping to rectangular bounds (i.e., spatial sub-setting) +- Changed: Band.reduce() and RasterCollection.band_summaries() now support creating statistics per Polygon features - Added: SceneCollections are collections of 0 to N Scenes (RasterCollection + timestamp) and allow to store multiple Scenes over time From ddc8301e5cdf1de63be34c1f71c4e4ccaedbaebc Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 5 Dec 2022 18:38:21 +0100 Subject: [PATCH 082/125] updated test for SceneCollection time series - still something's buggy --- tests/core/test_scene_collection.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/core/test_scene_collection.py b/tests/core/test_scene_collection.py index 0a89c96f..1db52185 100644 --- a/tests/core/test_scene_collection.py +++ b/tests/core/test_scene_collection.py @@ -185,7 +185,7 @@ def test_scene_collection_to_xarray(get_scene_collection): for idx in range(len(scoll)): assert (xarr.values[idx,:,:,:] == scoll[scoll.timestamps[idx]].get_values()).all(), 'wrong ' -def test_scene_collection_time_series(get_scene_collection, generate_random_points): +def test_scene_collection_time_series(get_scene_collection, generate_random_points, get_polygons): """time series extraction from scene collection""" scoll = get_scene_collection() # sample pixels randomly distributed within the scene collection's spatial extent @@ -198,4 +198,9 @@ def test_scene_collection_time_series(get_scene_collection, generate_random_poin assert isinstance(points_ts, gpd.GeoDataFrame), 'expected a GeoDataFrame' assert 'acquisition_time' in points_ts.columns, 'missing time column' assert points_ts.shape == (60, 12), 'wrong shape of returned GeoDataFrame object' - + + # test time series extraction using polygons and custom statistics + methods = ['nanmedian', 'nanmin'] + polys = get_polygons() + polygons_ts = scoll.get_feature_timeseries(vector_features=polys, method=methods) + # TODO: find out why nanmedian contains NaNs but nanmin not From 3c00932834c52697c4a6d9c30e2310bd0482e50e Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 5 Dec 2022 18:38:44 +0100 Subject: [PATCH 083/125] moved methods arg from RasterCollection to Band kwargs --- eodal/core/raster.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/eodal/core/raster.py b/eodal/core/raster.py index b69773e4..9579f741 100644 --- a/eodal/core/raster.py +++ b/eodal/core/raster.py @@ -1199,7 +1199,6 @@ def get_values( def band_summaries( self, band_selection: Optional[List[str]] = None, - methods: Optional[List[str]] = ["nanmin", "nanmean", "nanstd", "nanmax"], **kwargs ) -> gpd.GeoDataFrame: """ @@ -1208,8 +1207,6 @@ def band_summaries( :param band_selection: selection of bands to process. If not provided uses all bands - :param methods: - descriptive metrics to compute for each band :param kwargs: optional keyword arguments to pass to `~eodal.core.band.Band.reduce`. Use `by` to get descriptive statistics by selected geometry features (e.g., @@ -1222,7 +1219,7 @@ def band_summaries( if band_selection is None: band_selection = self.band_names for band_name in band_selection: - band_stats = self[band_name].reduce(method=methods, **kwargs) + band_stats = self[band_name].reduce(**kwargs) # band_stats is a list of 1:N entries (one per feature on which reduce # was called); we add the band name as attribute for idx in range(len(band_stats)): @@ -1233,6 +1230,7 @@ def band_summaries( gdf = gpd.GeoDataFrame(df, geometry=df['geometry'], crs=df['crs'].iloc[0]) # cast columns to float; otherwise pandas throws an error: # TypeError: unhashable type: 'MaskedConstant' + methods = kwargs.get('method', ["nanmin", "nanmean", "nanstd", "nanmax"]) gdf[methods] = gdf[methods].astype(float) gdf.drop(columns=['crs'], inplace=True) return gdf From 62d1e9d985bfb36ad96e75e1a7361fb88a942570 Mon Sep 17 00:00:00 2001 From: lukas Date: Mon, 5 Dec 2022 18:39:07 +0100 Subject: [PATCH 084/125] adding support for Polygon statistics (still buggy, rasterstats problem) --- eodal/core/scene.py | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/eodal/core/scene.py b/eodal/core/scene.py index fc933b38..1217b68f 100644 --- a/eodal/core/scene.py +++ b/eodal/core/scene.py @@ -28,6 +28,7 @@ from collections.abc import MutableMapping from copy import deepcopy +from pathlib import Path from typing import Any, Callable, List, Optional, Tuple from eodal.core.raster import RasterCollection @@ -366,25 +367,56 @@ def dump(self): def get_feature_timeseries( self, + vector_features: Path | gpd.GeoDataFrame | str, reindex_dataframe: Optional[bool] = False, **kwargs ) -> gpd.GeoDataFrame: """ Get a time series for 1:N vector features from SceneCollection. + :param vector_features: + vector features for which to extract time series data. If `Point` geometries + are provided calls `~RasterCollection.get_pixels()` on all scenes in the + collection. If `Polygon` (or `MultiPolygons`) are provided, calls + `~RasterCollection.band_summaries()` on all scenes in the collection. :param reindex_dataframe: boolean flag whether to reindex the resulting GeoDataFrame after extracting data from all scenes. Set to `True` to ensure that the returned GeoDataFrame has a unique index. `False` by default. :param kwargs: - key word arguments to pass to `~RasterCollection.get_pixels()`. + key word arguments to pass to `~RasterCollection.get_pixels()` or + `~RasterCollection.band_summaries()` depending on the type of the input geometries. :returns: ``GeoDataFrame`` with extracted raster values per feature and time stamp """ + # check spatial datatypes + if not isinstance(vector_features, str): + if isinstance(vector_features, Path): + gdf = gpd.read_file(vector_features) + elif isinstance(vector_features, gpd.GeoDataFrame): + gdf = vector_features.copy() + else: + raise ValueError('Can only handle pathlibo objects, GeoDataFrames or "self"') + if set(gdf.geometry.geom_type.unique()).issubset(set(['Point', 'MulitPoint'])): + pixels = True + elif set(gdf.geometry.geom_type.unique()).issubset(set(['Polygon', 'MultiPolygon'])): + pixels = False + else: + raise ValueError('Can only handle (Multi)Point or (Multi)Polygon geometries') + else: + if vector_features == 'self': + gdf = 'self' + pixels = False + else: + raise ValueError('When passing a string only "self" is permitted') + # loop over scenes in collection and get the feature values gdf_list = [] for timestamp, scene in self: - _gdf = scene.get_pixels(**kwargs) + if pixels: + _gdf = scene.get_pixels(vector_features=gdf, **kwargs) + else: + _gdf = scene.band_summaries(by=gdf, **kwargs) _gdf['acquisition_time'] = timestamp gdf_list.append(_gdf) # reindex the resulting GeoDataFrame if required From 1623d0e80900d08c0802805eb6bbc55ee450688d Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 6 Dec 2022 14:01:01 +0100 Subject: [PATCH 085/125] extended test case for SceneCollection feature time series --- tests/core/test_scene_collection.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/core/test_scene_collection.py b/tests/core/test_scene_collection.py index 1db52185..5a065128 100644 --- a/tests/core/test_scene_collection.py +++ b/tests/core/test_scene_collection.py @@ -200,7 +200,10 @@ def test_scene_collection_time_series(get_scene_collection, generate_random_poin assert points_ts.shape == (60, 12), 'wrong shape of returned GeoDataFrame object' # test time series extraction using polygons and custom statistics - methods = ['nanmedian', 'nanmin'] + methods = ['median', 'min'] polys = get_polygons() polygons_ts = scoll.get_feature_timeseries(vector_features=polys, method=methods) - # TODO: find out why nanmedian contains NaNs but nanmin not + assert isinstance(polygons_ts, gpd.GeoDataFrame), 'expected a GeoDataFrame' + assert polygons_ts.iloc[1]['median'] == \ + scoll[1000]['B02'].reduce(by=polygons_ts.iloc[1].geometry, method='median')[0]['median'], \ + 'values are not the same' From 57a4b72e25576f77d21e67ff402c0996dee2ae21 Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 6 Dec 2022 14:01:14 +0100 Subject: [PATCH 086/125] extended test case for time series --- tests/core/test_scene_collection.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/core/test_scene_collection.py b/tests/core/test_scene_collection.py index 5a065128..10da7713 100644 --- a/tests/core/test_scene_collection.py +++ b/tests/core/test_scene_collection.py @@ -202,6 +202,9 @@ def test_scene_collection_time_series(get_scene_collection, generate_random_poin # test time series extraction using polygons and custom statistics methods = ['median', 'min'] polys = get_polygons() + # make sure there's an error raised when numpy nan functions are passed + with pytest.raises(ValueError): + scoll.get_feature_timeseries(vector_features=polys, method=['nanmedian']) polygons_ts = scoll.get_feature_timeseries(vector_features=polys, method=methods) assert isinstance(polygons_ts, gpd.GeoDataFrame), 'expected a GeoDataFrame' assert polygons_ts.iloc[1]['median'] == \ From 282437bcf01c707b8c6e5361e7396fe47cdabb06 Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 6 Dec 2022 14:03:00 +0100 Subject: [PATCH 087/125] set default statistics to standard np functions and check for nan funs since rasterstats handles NaNs internally, the usage of numpy nan functions is discouraged and throws an error to prevent users from getting wrong results as it is not guaranteed that numpy nan functions will respect masks (i.e., masked values are used for calculating the statistics) --- eodal/core/band.py | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/eodal/core/band.py b/eodal/core/band.py index 8a76292c..ba9933bd 100644 --- a/eodal/core/band.py +++ b/eodal/core/band.py @@ -51,7 +51,7 @@ from rasterio.enums import Resampling from rasterstats import zonal_stats from rasterstats.utils import check_stats -from shapely.geometry import box, Point, Polygon +from shapely.geometry import box, MultiPolygon, Point, Polygon from typing import Any, Dict, List, Optional, Tuple, Union from eodal.core.operators import Operator @@ -1893,18 +1893,25 @@ def reproject( def reduce( self, - method: Union[str, List[str]], - by: Optional[Path | gpd.GeoDataFrame | str] = None, + method: Optional[str | List[str]] = ['min', 'mean', 'std', 'max', 'count'], + by: Optional[Path | gpd.GeoDataFrame | Polygon | str] = None, ) -> List[Dict[str, int | float]]: """ Reduces the raster data to scalar values by calling `rasterstats`. The reduction can be done on the whole band or by using vector features. + IMPORTANT: + NaNs in the data are handled by `rasterstats` internally. Therefore, passing + numpy nan-functions (e.g., `nanmedian`) is **NOT** necessary and users are + **discouraged** from doing so as passing `nanmedian` will ignore existing + masks. + :param method: any ``numpy`` function taking a two-dimensional array as input and returning a single scalar. Can be a single function name - (e.g., "mean") or a list of function names (e.g., ["mean", "median"]) + (e.g., "mean") or a list of function names (e.g., ["mean", "median"]). + By default ['min', 'mean', 'std', 'max', 'count'] are returned. :param by: define optional vector features by which to reduce the band. By passing `'self'` the method uses the features with which the band was read, otherwise @@ -1926,9 +1933,12 @@ def reduce( features = gpd.read_file(by) elif isinstance(by, gpd.GeoDataFrame): features = deepcopy(by) + elif (isinstance(by, Polygon) or isinstance(by, MultiPolygon)): + features = gpd.GeoDataFrame(geometry=[by], crs=self.crs) else: raise TypeError( - f'by expected "self", Path and GeoDataFrame objects - got {type(by)} instead' + 'by expected "self", Path, (Multi)Polygon and GeoDataFrame ' + \ + f'objects - got {type(by)} instead' ) # check if features has the same CRS as the band. Reproject features if required if not features.crs == self.crs: @@ -1965,13 +1975,19 @@ def reduce( stats_operator_list = [] # loop over operators in method list and make them rasterstats compatible for operator in method: + # check if operator stats with 'nan' -> this is discouraged to avoid + # errors in rasterstats as rasterstats checks for NaNs internally + if operator.startswith('nan'): + raise ValueError( + 'The usage of numpy-nan functions is discouraged and therefore raises an error.' + \ + '\nThe handling of NaNs is done by `rasterstats` internally and therefore does not' + \ + '\n need to be specified. Please pass operators by their standard numpy names (e.g., "mean")' + ) expression = f"{numpy_prefix}.{operator}" # When the array is masked, we have to set masked arrays to NaN - _operator = operator if self.is_masked_array: + _operator = operator _operator = 'nan' + operator - # numpy.ma has some different function names to consider - if operator.startswith("nan"): expression = f"{numpy_prefix}.{_operator[3::]}" elif operator.endswith("nonzero"): expression = f"{numpy_prefix}.count" @@ -1996,11 +2012,8 @@ def _fun_prototype(x: np.ndarray | np.ma.MaskedArray): # Also, there seems to be a bug in rasterstats preventing more than a single # operator to be passed in add_stats (otherwise the values are returned are wrong) stats = zonal_stats( - features, vals, affine=affine, stats='count', add_stats=add_stats + features, vals, affine=affine, stats='median', add_stats=add_stats ) - # delete the count entry - for item in stats: - del item['count'] stats_operator_list.append(stats) # combine the list of stats into a format consistent with the standard zonal_stats call stats = [] From e62c04a1ad965adaa1b3c860da443fe2abcc4303 Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 6 Dec 2022 14:03:19 +0100 Subject: [PATCH 088/125] changed defaults to numpy standard functions --- eodal/core/raster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eodal/core/raster.py b/eodal/core/raster.py index 9579f741..f117be49 100644 --- a/eodal/core/raster.py +++ b/eodal/core/raster.py @@ -1230,7 +1230,7 @@ def band_summaries( gdf = gpd.GeoDataFrame(df, geometry=df['geometry'], crs=df['crs'].iloc[0]) # cast columns to float; otherwise pandas throws an error: # TypeError: unhashable type: 'MaskedConstant' - methods = kwargs.get('method', ["nanmin", "nanmean", "nanstd", "nanmax"]) + methods = kwargs.get('method', ['min', 'mean', 'std', 'max', 'count']) gdf[methods] = gdf[methods].astype(float) gdf.drop(columns=['crs'], inplace=True) return gdf From 62f8721481ecae1bd5e9f9e181e2b76a73502916 Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 6 Dec 2022 17:03:44 +0100 Subject: [PATCH 089/125] added test case for SceneCollection.to_pickle() and .from_pickle() --- tests/core/test_scene_collection.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/core/test_scene_collection.py b/tests/core/test_scene_collection.py index 10da7713..01f71a03 100644 --- a/tests/core/test_scene_collection.py +++ b/tests/core/test_scene_collection.py @@ -210,3 +210,12 @@ def test_scene_collection_time_series(get_scene_collection, generate_random_poin assert polygons_ts.iloc[1]['median'] == \ scoll[1000]['B02'].reduce(by=polygons_ts.iloc[1].geometry, method='median')[0]['median'], \ 'values are not the same' + +def test_dump_and_load(get_scene_collection): + """dumping and loading SceneCollections to and from disk""" + scoll = get_scene_collection() + scoll_dumped = scoll.to_pickle() + assert isinstance(scoll_dumped, bytes), 'expected a binary oject' + scoll_reloaded = SceneCollection.from_pickle(scoll_dumped) + assert scoll_reloaded.collection == scoll.collection, 'data in collection should be the same' + assert scoll_reloaded.identifiers == scoll.identifiers, 'lost identifiers' From 8a386ee984c65630e9e22cb807c13452b027ddb0 Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 6 Dec 2022 17:04:11 +0100 Subject: [PATCH 090/125] bugfix to make RasterCollection algebra work --- eodal/core/raster.py | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/eodal/core/raster.py b/eodal/core/raster.py index f117be49..a6854825 100644 --- a/eodal/core/raster.py +++ b/eodal/core/raster.py @@ -252,7 +252,7 @@ class RasterOperator(Operator): def calc( cls, a, - other: Union[Band, Number, np.ndarray], + other: Band | Number | np.ndarray, operator: str, inplace: Optional[bool] = False, band_selection: Optional[List[str]] = None, @@ -263,8 +263,8 @@ def calc( :param a: `RasterCollection` object with values (non-empty) :param other: - `Band` object, scalar, or 3-dimensional `numpy.array` to use on the - right-hand side of the operator. If a `numpy.array` is passed the array + `Band` object, scalar, 3-dimensional `numpy.array`, or RasterCollection to use + on the right-hand side of the operator. If a `numpy.array` is passed the array must have either shape `(1,nrows,ncols)` or `(nband,nrows,ncols)` where `nrows` is the number of rows in `a`, ncols the number of columns in `a` and `nbands` the number of bands in a or the selection thereof. @@ -312,24 +312,31 @@ def calc( elif isinstance(other, RasterCollection): _other = other.copy() _other = other.get_values(band_selection=band_selection) - # other_is_raster = True + elif (isinstance(other, int) or isinstance(other, float)): + _other = other + else: + raise TypeError(f'{type(other)} is not supported') + # perform the operation try: - expr = f"a.get_values(band_selection) {operator} other" + expr = f"a.get_values(band_selection) {operator} _other" res = eval(expr) except Exception as e: raise cls.BandMathError(f"Could not execute {expr}: {e}") # return result or overwrite band data - if inplace: - if band_selection is None: - band_selection = a.band_names() - for idx, band_name in enumerate(band_selection): - object.__setattr__(cls.collection[band_name], "values", res[idx, :, :]) - else: - # TODO: return a new RasterCollection instance - # TODO: think about multiple slices - raise NotImplementedError() - + if band_selection is None: + band_selection = a.band_names + if not inplace: + rcoll_out = RasterCollection() + for idx, band_name in enumerate(band_selection): + if inplace: + object.__setattr__(a.collection[band_name], "values", res[idx,:,:]) + else: + attrs = a.collection[band_name].__dict__ + attrs.update({'values': res[idx,:,:]}) + rcoll_out.add_band(band_constructor=Band, **attrs) + if not inplace: + return rcoll_out class RasterCollection(MutableMapping): """ From 9e82c082a40975a61ecd3742a16943695cfd98dd Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 6 Dec 2022 17:06:31 +0100 Subject: [PATCH 091/125] pickling and unpickling of SceneCollection objects --- eodal/core/scene.py | 59 ++++++++++++++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 11 deletions(-) diff --git a/eodal/core/scene.py b/eodal/core/scene.py index 1217b68f..c0704a65 100644 --- a/eodal/core/scene.py +++ b/eodal/core/scene.py @@ -24,6 +24,7 @@ import geopandas as gpd import numpy as np import pandas as pd +import pickle import xarray as xr from collections.abc import MutableMapping @@ -176,6 +177,9 @@ def _get_scene_from_key(key: str | Any) -> RasterCollection: out_scoll.add_scene(_get_scene_from_key(key=scenes[idx])) return out_scoll + def __getstate__(self): + return self.__dict__.copy() + def __setitem__(self, item: RasterCollection): if not isinstance(item, RasterCollection): raise TypeError("Only RasterCollection objects can be passed") @@ -197,6 +201,9 @@ def __setitem__(self, item: RasterCollection): if hasattr(item.scene_properties, 'product_uri'): self._identifiers.append(item.scene_properties.product_uri) + def __setstate__(self, d): + self.collection = d + def __delitem__(self, key: str | datetime.datetime): # get index of the scene to be deleted to also delete its identifier idx = self.timestamps.index(str(key)) @@ -274,6 +281,22 @@ def is_sorted(self, value: bool) -> None: raise TypeError('Only boolean types are accepted') self._is_sorted = value + @classmethod + def from_pickle(cls, stream: bytes): + """ + Load SceneCollection from pickled binary stream. + + :param stream: + pickled binary stream to load into a SceneCollection. + :returns: + `SceneCollection` instance. + """ + reloaded = pickle.loads(stream) + scoll_out = cls() + for _, scene in reloaded['collection'].items(): + scoll_out.add_scene(scene) + return scoll_out + @classmethod def from_raster_collections( cls, @@ -350,20 +373,40 @@ def add_scene( # try to add the scene to the SceneCollection try: self.__setitem__(scene) - except Exception as e: raise KeyError(f'Cannot add scene: {e}') + def apply(self, func: Callable, *args, **kwargs) -> Any: + """ + Apply a custom function to a SceneCollection. - def apply(self, func: Callable): - pass + :param func: + custom callable taking the ``SceneCollection`` as first + argument + :param args: + optional arguments to pass to `func` + :param kwargs: + optional keyword arguments to pass to `func` + :returns: + results of `func` + """ + try: + return func.__call__(self, *args, **kwargs) + except Exception as e: + raise ValueError from e def copy(self): """returns a true copy of the SceneCollection""" return deepcopy(self) - def dump(self): - pass + def to_pickle(self) -> bytes: + """ + Dumps a scene collection as pickled object + + :returns: + pickled binary object + """ + return pickle.dumps(self.__dict__.copy()) def get_feature_timeseries( self, @@ -428,12 +471,6 @@ def get_feature_timeseries( else: return pd.concat(gdf_list) - def load(self): - pass - - def plot(self): - pass - def sort( self, sort_direction: Optional[str] = 'asc' From 2e4939f585c2399416353d8aad78517ff5bac5ba Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 6 Dec 2022 17:08:30 +0100 Subject: [PATCH 092/125] updated changelog --- CHANGELOG.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c451f9a4..58b33192 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -21,6 +21,8 @@ Release date: YYYY-MM-DD - Added: Band and RasterCollection objects now support clipping to rectangular bounds (i.e., spatial sub-setting) - Changed: Band.reduce() and RasterCollection.band_summaries() now support creating statistics per Polygon features - Added: SceneCollections are collections of 0 to N Scenes (RasterCollection + timestamp) and allow to store multiple Scenes over time +- Fixed: Map algebra now also works on RasterCollection supporting multiple cases (i.e., RasterCollection with other RasterCollection, scaler, etc.) +- Added: SceneCollection objects can be saved as pickled objects and loaded from pickled binary objects to make SceneCollections persistent Version `0.0.1 < https://github.com/EOA-team/eodal/releases/tag/v0.0.1>`__ From 115d9bd0cfe4db386a951af964d1191e3e2d94be Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 6 Dec 2022 17:21:20 +0100 Subject: [PATCH 093/125] covering case when data is pickled and saved to disk and reloaded --- tests/core/test_scene_collection.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/core/test_scene_collection.py b/tests/core/test_scene_collection.py index 01f71a03..6ef21f18 100644 --- a/tests/core/test_scene_collection.py +++ b/tests/core/test_scene_collection.py @@ -211,11 +211,20 @@ def test_scene_collection_time_series(get_scene_collection, generate_random_poin scoll[1000]['B02'].reduce(by=polygons_ts.iloc[1].geometry, method='median')[0]['median'], \ 'values are not the same' -def test_dump_and_load(get_scene_collection): - """dumping and loading SceneCollections to and from disk""" +def test_dump_and_load(get_scene_collection, datadir): + """dumping and loading SceneCollections to and from disk as pickled objects""" scoll = get_scene_collection() scoll_dumped = scoll.to_pickle() assert isinstance(scoll_dumped, bytes), 'expected a binary oject' scoll_reloaded = SceneCollection.from_pickle(scoll_dumped) assert scoll_reloaded.collection == scoll.collection, 'data in collection should be the same' assert scoll_reloaded.identifiers == scoll.identifiers, 'lost identifiers' + + # check saving to file and reading from it again + fpath = datadir.joinpath('scene_collection.pkl') + with open(fpath, 'wb') as f: + f.write(scoll_dumped) + + scoll_reloaded_from_file = SceneCollection.from_pickle(fpath) + assert scoll_reloaded_from_file.collection == scoll.collection, \ + 'data in collection should be the same' From 63e0a5c31935cbadd3a5060d7d0d9637bb3eb19e Mon Sep 17 00:00:00 2001 From: lukas Date: Tue, 6 Dec 2022 17:21:47 +0100 Subject: [PATCH 094/125] from_pickle() now accepting also file-paths to binary files --- eodal/core/scene.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/eodal/core/scene.py b/eodal/core/scene.py index c0704a65..ec6d00da 100644 --- a/eodal/core/scene.py +++ b/eodal/core/scene.py @@ -282,16 +282,24 @@ def is_sorted(self, value: bool) -> None: self._is_sorted = value @classmethod - def from_pickle(cls, stream: bytes): + def from_pickle(cls, stream: bytes | Path): """ Load SceneCollection from pickled binary stream. :param stream: - pickled binary stream to load into a SceneCollection. + pickled binary stream to load into a SceneCollection or + file-path to pickled binary on disk. :returns: `SceneCollection` instance. """ - reloaded = pickle.loads(stream) + if isinstance(stream, Path): + with open(stream, 'rb') as f: + reloaded = pickle.load(f) + elif isinstance(stream, bytes): + reloaded = pickle.loads(stream) + else: + raise TypeError(f'{type(stream)} is not a supported data type') + # open empty scene collection and add scenes one by one scoll_out = cls() for _, scene in reloaded['collection'].items(): scoll_out.add_scene(scene) From c5694261846d0a9bbe8c4bfd98df66d80a8b0c7f Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 7 Dec 2022 08:25:20 +0100 Subject: [PATCH 095/125] minor changes in import statements --- eodal/operational/mapping/merging.py | 3 +-- eodal/operational/mapping/sentinel2.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/eodal/operational/mapping/merging.py b/eodal/operational/mapping/merging.py index 34658085..102e301d 100644 --- a/eodal/operational/mapping/merging.py +++ b/eodal/operational/mapping/merging.py @@ -29,8 +29,7 @@ import eodal from eodal.config import get_settings from eodal.core.band import Band, GeoInfo -from eodal.core.raster import RasterCollection -from eodal.core.scene import SceneProperties +from eodal.core.raster import RasterCollection, SceneProperties Settings = get_settings() diff --git a/eodal/operational/mapping/sentinel2.py b/eodal/operational/mapping/sentinel2.py index 1133c65b..dbcf88f0 100644 --- a/eodal/operational/mapping/sentinel2.py +++ b/eodal/operational/mapping/sentinel2.py @@ -38,7 +38,7 @@ DataNotFoundError ) from eodal.metadata.sentinel2.utils import identify_updated_scenes -from eodal.core.scene import SceneProperties +from eodal.core.raster import SceneProperties settings = get_settings() logger = settings.logger From c9ad391233aca28719f4a8aa9169e5043c978d30 Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 7 Dec 2022 12:36:21 +0100 Subject: [PATCH 096/125] adding tests for SceneCollection.plot() method --- tests/core/test_scene_collection.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tests/core/test_scene_collection.py b/tests/core/test_scene_collection.py index 6ef21f18..dc9a8c1e 100644 --- a/tests/core/test_scene_collection.py +++ b/tests/core/test_scene_collection.py @@ -7,6 +7,7 @@ import pytest import datetime import geopandas as gpd +import matplotlib.pyplot as plt import random import xarray as xr @@ -228,3 +229,13 @@ def test_dump_and_load(get_scene_collection, datadir): scoll_reloaded_from_file = SceneCollection.from_pickle(fpath) assert scoll_reloaded_from_file.collection == scoll.collection, \ 'data in collection should be the same' + +def test_plot_scene_collection(get_scene_collection): + """plot scenes in collection""" + scoll = get_scene_collection() + # plot multiple bands + f = scoll.plot(band_selection=['B02', 'B04', 'B05']) + assert isinstance(f, plt.Figure), 'expected a matplotlib figure' + # plot single band + f = scoll.plot(band_selection=['B8A'], eodal_plot_kwargs={'colormap': 'viridis'}) + assert isinstance(f, plt.Figure), 'expected a matplotlib figure' From bfe1df0706e461bcd84b7c46f70bcb3f5c4b865b Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 7 Dec 2022 12:37:01 +0100 Subject: [PATCH 097/125] added SceneCollection.plot() to plot scenes in collection instance --- eodal/core/scene.py | 113 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 103 insertions(+), 10 deletions(-) diff --git a/eodal/core/scene.py b/eodal/core/scene.py index ec6d00da..5fa966e3 100644 --- a/eodal/core/scene.py +++ b/eodal/core/scene.py @@ -22,6 +22,7 @@ import datetime import dateutil.parser import geopandas as gpd +import matplotlib.pyplot as plt import numpy as np import pandas as pd import pickle @@ -30,7 +31,7 @@ from collections.abc import MutableMapping from copy import deepcopy from pathlib import Path -from typing import Any, Callable, List, Optional, Tuple +from typing import Any, Callable, Dict, List, Optional, Tuple from eodal.core.raster import RasterCollection from eodal.utils.exceptions import SceneNotFoundError @@ -407,15 +408,6 @@ def copy(self): """returns a true copy of the SceneCollection""" return deepcopy(self) - def to_pickle(self) -> bytes: - """ - Dumps a scene collection as pickled object - - :returns: - pickled binary object - """ - return pickle.dumps(self.__dict__.copy()) - def get_feature_timeseries( self, vector_features: Path | gpd.GeoDataFrame | str, @@ -479,6 +471,98 @@ def get_feature_timeseries( else: return pd.concat(gdf_list) + def plot( + self, + band_selection: str | List[str], + max_scenes_in_row: Optional[int] = 6, + eodal_plot_kwargs: Optional[Dict] = {}, + **kwargs + ) -> plt.Figure: + """ + Plots scenes in a `SceneCollection` + + :param band_selection: + selection of band(s) to use for plotting. Must be either a single + band or a set of three bands + :param max_scenes_in_row: + number of scenes in a row. Set to 6 by default. + :param eodal_plot_kwargs: + optional keyword arguments to pass on to `eodal.core.band.Band.plot()` + :param kwargs: + optional keyword arguments to pass to `matplotlib.subplots()`. + :returns: + `Figure` object + """ + # check number of passed bands + if isinstance(band_selection, str): + band_selection = [band_selection] + if not len(band_selection) == 1 and not len(band_selection) == 3: + raise ValueError('You must pass a single band name or three band names') + + plot_multiple_bands = True + if len(band_selection) == 1: + plot_multiple_bands = False + + # check number of mapper in feature_scenes and determine figure size + n_scenes = len(self) + nrows = 1 + ncols = 1 + if self.empty: + raise ValueError('No scenes available for plotting') + elif n_scenes == 1: + f, ax = plt.subplots(**kwargs) + # cast to array to allow indexing + ax = np.array([ax]).reshape(1,1) + else: + if n_scenes <= max_scenes_in_row: + ncols = n_scenes + f, ax = plt.subplots(ncols=ncols, nrows=nrows, **kwargs) + # reshape to match the shape of ax array with >1 rows + ax = ax.reshape(1, ax.size) + else: + nrows = int(np.ceil(n_scenes / max_scenes_in_row)) + ncols = max_scenes_in_row + f, ax = plt.subplots(ncols=ncols, nrows=nrows, **kwargs) + # get scene labels + scene_labels = list(self.collection.keys()) + + row_idx, col_idx = 0, 0 + idx = 0 + for idx, _scene in enumerate(self): + scene = _scene[1] + if plot_multiple_bands: + scene.plot_multiple_bands( + band_selection=band_selection, + ax=ax[row_idx, col_idx], + **eodal_plot_kwargs + ) + else: + scene[band_selection[0]].plot( + ax=ax[row_idx, col_idx], + **eodal_plot_kwargs + ) + ax[row_idx, col_idx].set_title(scene_labels[idx]) + idx += 1 + + # switch off axes labels if sharex == True and sharey=True + if kwargs.get('sharex', False) and kwargs.get('sharey', False) \ + and n_scenes > 1: + if nrows > 1: + if row_idx < (nrows - 1): ax[row_idx, col_idx].set_xlabel('') + if nrows > 1: + if col_idx > 0: ax[row_idx, col_idx].set_ylabel('') + + # increase column (and row) counter accordingly + col_idx += 1 + # begin a new row when all columns are filled + if col_idx == max_scenes_in_row: + col_idx = 0 + row_idx += 1 + + # make sure sub-plot labels do not overlap + f.tight_layout() + return f + def sort( self, sort_direction: Optional[str] = 'asc' @@ -503,6 +587,15 @@ def sort( scoll.add_scene(scenes[idx].copy()) return scoll + def to_pickle(self) -> bytes: + """ + Dumps a scene collection as pickled object + + :returns: + pickled binary object + """ + return pickle.dumps(self.__dict__.copy()) + def to_xarray(self, **kwargs) -> xr.DataArray: """ Converts all scenes in a SceneCollection to a single `xarray.DataArray`. From fb7637f059f77da5b0de5a4a5c3232b634e3c222 Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 7 Dec 2022 12:37:13 +0100 Subject: [PATCH 098/125] marked future to-do --- eodal/operational/mapping/merging.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/eodal/operational/mapping/merging.py b/eodal/operational/mapping/merging.py index 102e301d..f752d2ce 100644 --- a/eodal/operational/mapping/merging.py +++ b/eodal/operational/mapping/merging.py @@ -17,6 +17,8 @@ along with this program. If not, see . """ +# TODO: move this to eodal algorithms + import os import geopandas as gpd import uuid From dad642947edc6400e464d762c2a311c040852075 Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 7 Dec 2022 12:52:50 +0100 Subject: [PATCH 099/125] updated test to meet latest changes in core module --- tests/core/test_raster_collection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/core/test_raster_collection.py b/tests/core/test_raster_collection.py index 21607476..63f7d99e 100644 --- a/tests/core/test_raster_collection.py +++ b/tests/core/test_raster_collection.py @@ -266,7 +266,7 @@ def test_band_summaries(get_bandstack, get_polygons): polys = get_polygons() band_stats = rcoll.band_summaries(by=polys) assert isinstance(band_stats, gpd.GeoDataFrame), 'expected a GeoDataFrame' - assert 'nanmean' in band_stats.columns, 'expected the mean value' + assert 'mean' in band_stats.columns, 'expected the mean value' assert 'band_name' in band_stats.columns, 'expected the band name as column' assert band_stats.crs == rcoll[rcoll.band_names[0]].crs, 'mis-match of CRS' From 7be960453d3687b5d0459632f3c16f315074673f Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 7 Dec 2022 12:53:05 +0100 Subject: [PATCH 100/125] removed inappropriate line of test code --- tests/core/test_raster_copy.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/core/test_raster_copy.py b/tests/core/test_raster_copy.py index 93875dc0..ce134f46 100644 --- a/tests/core/test_raster_copy.py +++ b/tests/core/test_raster_copy.py @@ -21,8 +21,6 @@ def test_raster_copy(get_bandstack): assert rcoll_copy.band_aliases == rcoll.band_aliases, 'band aliases differ' assert (rcoll_copy.band_summaries() == rcoll.band_summaries()).all().all(), \ 'band statistics differ' - assert rcoll_copy.scene_properties.acquisition_time == rcoll.scene_properties.acquisition_time, \ - 'timestamps differ' assert rcoll_copy['a'].crs == rcoll['a'].crs, 'Band CRS differ' assert rcoll_copy['b'].get_attributes() == rcoll['b'].get_attributes(), \ 'Band attributes differ' From c0052d044fd9ecef3175369b19dd7e1cb0849438 Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 7 Dec 2022 12:53:27 +0100 Subject: [PATCH 101/125] fixed: datatype checking now considers more int and float data types --- eodal/core/sensors/sentinel2.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eodal/core/sensors/sentinel2.py b/eodal/core/sensors/sentinel2.py index 1df29d5e..119e6b21 100644 --- a/eodal/core/sensors/sentinel2.py +++ b/eodal/core/sensors/sentinel2.py @@ -583,10 +583,10 @@ def read_pixels_from_safe( # skip all pixels with zero reflectance (either blackfilled or outside of the # scene extent); in case of dtype float check for NaNs band_names = gdf.columns[gdf.columns.str.startswith("B")] - if (gdf.dtypes[band_names] == "float64").all(): + if gdf.dtypes[band_names].unique() in ['float32', 'float64']: gdf[band_names] = gdf[band_names].replace({0., np.nan}) gdf.dropna(axis=0, inplace=True) - elif (gdf.dtypes[band_names] == "int16").all(): + elif gdf.dtypes[band_names].unique() in ['int16', 'int32', 'int64']: gdf = gdf.loc[~(gdf[band_df_safe.band_name] == 0).all(axis=1)] return gdf From 534f1685c48e7fa6260f2d2614ae90aca43ebe92 Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 7 Dec 2022 12:54:49 +0100 Subject: [PATCH 102/125] removed wrong import statement --- tests/core/test_raster_apply.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/core/test_raster_apply.py b/tests/core/test_raster_apply.py index cf207665..eeefe0f8 100644 --- a/tests/core/test_raster_apply.py +++ b/tests/core/test_raster_apply.py @@ -12,7 +12,6 @@ from eodal.core.band import Band from eodal.core.raster import RasterCollection -from build.lib.eodal.core import raster def sqrt_per_band( raster_collection: RasterCollection, From 445dd5e9c3ddf378ed996e705a6e13616724378b Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 7 Dec 2022 13:17:24 +0100 Subject: [PATCH 103/125] moving fixture to global conftest module --- tests/conftest.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 9f13eaa4..950eb2d2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,6 +9,7 @@ from distutils import dir_util from pathlib import Path +from eodal.core.band import Band from eodal.core.raster import RasterCollection from eodal.core.scene import SceneCollection from eodal.downloader.utils import unzip_datasets @@ -47,6 +48,24 @@ def get_project_root_path() -> Path: """ return Path(os.path.dirname(os.path.abspath(__file__))).parent +@pytest.fixture +def get_test_band(get_bandstack, get_polygons): + """Fixture returning Band object from rasterio""" + def _get_test_band(): + fpath_raster = get_bandstack() + vector_features = get_polygons() + + band = Band.from_rasterio( + fpath_raster=fpath_raster, + band_idx=1, + band_name_dst='B02', + vector_features=vector_features, + full_bounding_box_only=False, + nodata=0 + ) + return band + return _get_test_band + @pytest.fixture() def get_s2_safe_l2a(get_project_root_path): """ From d518df19bbec75e5708347a8a7e3fd80467f0f5d Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 7 Dec 2022 13:17:38 +0100 Subject: [PATCH 104/125] moved fixture to conftest --- tests/core/test_band.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/tests/core/test_band.py b/tests/core/test_band.py index 6aab6b38..a418d2f0 100644 --- a/tests/core/test_band.py +++ b/tests/core/test_band.py @@ -15,24 +15,6 @@ from eodal.core.band import GeoInfo from eodal.core.band import WavelengthInfo -@pytest.fixture -def get_test_band(get_bandstack, get_polygons): - """Fixture returning Band object from rasterio""" - def _get_test_band(): - fpath_raster = get_bandstack() - vector_features = get_polygons() - - band = Band.from_rasterio( - fpath_raster=fpath_raster, - band_idx=1, - band_name_dst='B02', - vector_features=vector_features, - full_bounding_box_only=False, - nodata=0 - ) - return band - return _get_test_band - def test_base_constructors(): """ test base constructor calls From bba37d7f38f2752d369a38463bee79ad456a77c4 Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 7 Dec 2022 13:17:55 +0100 Subject: [PATCH 105/125] testing band algebra on scalars and band <-> band --- tests/core/test_band_algebra.py | 62 ++++++++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/tests/core/test_band_algebra.py b/tests/core/test_band_algebra.py index e1e2cc17..af95d5c8 100644 --- a/tests/core/test_band_algebra.py +++ b/tests/core/test_band_algebra.py @@ -1 +1,61 @@ -# TODO: test all operators available for single numbers, arrays and band objects + +import pytest +import numpy as np + +def test_band_algrebra_scalar(get_test_band): + """test algebraic operations using scalar values on Bands""" + band = get_test_band() + scalar = 2. + + # test different operators using scalar on the left-hand side + band_add = band + scalar + assert (band_add.values - band.values == scalar).all(), 'wrong result' + band_sub = band - scalar + assert (band_sub.values - band.values == -scalar).all(), 'wrong result' + band_mul = band * scalar + assert (band_mul.values / band.values == scalar).all(), 'wrong result' + band_div = band / scalar + assert (band_div.values * scalar == band.values).all(), 'wrong result' + band_pow = band**scalar + assert (np.sqrt(band_pow.values) == band.values).all(), 'wrong results' + + # test comparison operators (band <-> band) + band_eq = band == scalar + assert not band_eq.values.all(), 'wrong result' + band_gt = band > scalar + assert band_gt.values.all(), 'wrong result' + band_ge = band >= scalar + assert band_ge.values.all(), 'wrong_result' + band_lt = band < scalar + assert not band_lt.values.all(), 'wrong result' + band_le = band <= scalar + assert not band_le.values.all(), 'wrong result' + +def test_band_algebra_band(get_test_band): + """test algebraic operations using Band values on Bands""" + band = get_test_band() + other = get_test_band() + + band_add = band + other + assert (band_add.values - band.values == band.values).all(), 'wrong result' + band_sub = band - other + assert (band_sub.values == 0).all(), 'wrong result' + band_mul = band * other + assert (band_mul.values == band.values * other.values).all(), 'wrong result' + band_div = band / other + assert band_div.values.max() == band_div.values.min() == band_div.values.mean() == 1., \ + 'wrong result' + band_pow = band**other + assert (band_pow.values == band.values**other.values).all(), 'wrong results' + + # test comparison operators (band <-> band) + band_eq = band == other + assert band_eq.values.all(), 'wrong result' + band_gt = band > band_sub + assert band_gt.values.all(), 'wrong result' + band_ge = band >= band + assert band_ge.values.all(), 'wrong_result' + band_lt = band < band_sub + assert not band_lt.values.all(), 'wrong result' + band_le = band <= band_sub + assert not band_le.values.all(), 'wrong result' \ No newline at end of file From 7ea53e690fa48f40fd1cad42b3925fc560b573dc Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 7 Dec 2022 14:09:48 +0100 Subject: [PATCH 106/125] adding "!=" operator --- eodal/core/operators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eodal/core/operators.py b/eodal/core/operators.py index c46bb152..5745579c 100644 --- a/eodal/core/operators.py +++ b/eodal/core/operators.py @@ -25,7 +25,7 @@ class Operator: Band operator supporting basic algebraic operations """ - operators: List[str] = ["+", "-", "*", "/", "**", "<", ">", "==", "<=", ">="] + operators: List[str] = ["+", "-", "*", "/", "**", "<", ">", "==", "<=", ">=", "!="] class BandMathError(Exception): pass From f2792ec6ea5e1ce0969e616736cdc7e7e567c292 Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 7 Dec 2022 14:10:22 +0100 Subject: [PATCH 107/125] adding "!=" operator for pixelwise check if not-equal --- eodal/core/band.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/eodal/core/band.py b/eodal/core/band.py index ba9933bd..8615c44a 100644 --- a/eodal/core/band.py +++ b/eodal/core/band.py @@ -482,6 +482,9 @@ def __truediv__(self, other): def __mul__(self, other): return BandOperator.calc(a=self, other=other, operator="*") + def __ne__(self, other): + return BandOperator.calc(a=self, other=other, operator="!=") + def __eq__(self, other): return BandOperator.calc(a=self, other=other, operator="==") From ce1edd7d60ce659013271971946a37218a2b94c7 Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 7 Dec 2022 14:10:47 +0100 Subject: [PATCH 108/125] making deepcopy of raster values before apply operators --- eodal/core/raster.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/eodal/core/raster.py b/eodal/core/raster.py index a6854825..4d28615f 100644 --- a/eodal/core/raster.py +++ b/eodal/core/raster.py @@ -282,12 +282,14 @@ def calc( `numpy.ndarray` if inplace is False, None instead """ cls.check_operator(operator=operator) + # make a copy of a to avoid overwriting the original values + _a = deepcopy(a) # if `other` is a Band object get its values if isinstance(other, Band): - _other = other.copy() - _other = _other.values() + _other = deepcopy(other) + _other = _other.values # check if `other` matches the shape - if isinstance(other, np.ndarray) or isinstance(other, np.ma.MaskedArray): + elif isinstance(other, np.ndarray) or isinstance(other, np.ma.MaskedArray): # check if passed array is 2-d if len(other.shape) == 2: if other.shape != a.get_values(band_selection).shape[1::]: @@ -310,7 +312,7 @@ def calc( ) _other = other.copy() elif isinstance(other, RasterCollection): - _other = other.copy() + _other = deepcopy(other) _other = other.get_values(band_selection=band_selection) elif (isinstance(other, int) or isinstance(other, float)): _other = other @@ -319,7 +321,7 @@ def calc( # perform the operation try: - expr = f"a.get_values(band_selection) {operator} _other" + expr = f"_a.get_values(band_selection) {operator} _other" res = eval(expr) except Exception as e: raise cls.BandMathError(f"Could not execute {expr}: {e}") @@ -332,7 +334,7 @@ def calc( if inplace: object.__setattr__(a.collection[band_name], "values", res[idx,:,:]) else: - attrs = a.collection[band_name].__dict__ + attrs = _a.collection[band_name].__dict__ attrs.update({'values': res[idx,:,:]}) rcoll_out.add_band(band_constructor=Band, **attrs) if not inplace: @@ -513,6 +515,9 @@ def __truediv__(self, other): def __mul__(self, other): return RasterOperator.calc(a=self, other=other, operator="*") + def __ne__(self, other): + return RasterOperator.calc(a=self, other=other, operator="!=") + def __eq__(self, other): return RasterOperator.calc(a=self, other=other, operator="==") From 6fe966ebe6992506e4e962253840894d01bea813 Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 7 Dec 2022 14:11:06 +0100 Subject: [PATCH 109/125] defining tests for operations and band objects --- tests/core/test_band_algebra.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/core/test_band_algebra.py b/tests/core/test_band_algebra.py index af95d5c8..6324cdaa 100644 --- a/tests/core/test_band_algebra.py +++ b/tests/core/test_band_algebra.py @@ -22,6 +22,8 @@ def test_band_algrebra_scalar(get_test_band): # test comparison operators (band <-> band) band_eq = band == scalar assert not band_eq.values.all(), 'wrong result' + band_ne = band != scalar + assert band_ne.values.any(), 'wrong result' band_gt = band > scalar assert band_gt.values.all(), 'wrong result' band_ge = band >= scalar From c02fd8c06be4bfa2fa890971eb92fa031198eedd Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 7 Dec 2022 14:11:27 +0100 Subject: [PATCH 110/125] defining tests for applying operations on raster objects --- tests/core/test_raster_algebra.py | 79 ++++++++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/tests/core/test_raster_algebra.py b/tests/core/test_raster_algebra.py index 20724b03..1facfb13 100644 --- a/tests/core/test_raster_algebra.py +++ b/tests/core/test_raster_algebra.py @@ -1 +1,78 @@ -# TODO: test all operators available for single numbers, arrays, band objects and RasterCollection objects + +import pytest +import numpy + +from eodal.core.raster import RasterCollection + +def test_raster_algebra_scalar(get_bandstack): + """test algebraic operations using scalar values on RasterCollections""" + fpath = get_bandstack() + rcoll = RasterCollection.from_multi_band_raster(fpath) + scalar = 2 + + rcoll_add = rcoll + scalar + assert (rcoll_add.get_values() == rcoll.get_values() + scalar).all(), 'wrong result' + rcoll_sub = rcoll - scalar + assert (rcoll_sub.get_values() == rcoll.get_values() - scalar).all(), 'wrong result' + rcoll_mul = rcoll * scalar + assert (rcoll_mul.get_values() == rcoll.get_values() * scalar).all(), 'wrong result' + rcoll_div = rcoll / scalar + assert (rcoll_div.get_values() == rcoll.get_values() / scalar).all(), 'wrong result' + rcoll_pow = rcoll**scalar + assert (rcoll_pow.get_values() == rcoll.get_values() ** scalar).all(), 'wrong result' + + rcoll_eq = rcoll == scalar + assert not rcoll_eq.get_values().all(), 'wrong results' + rcoll_gt = rcoll > scalar + assert rcoll_gt.get_values().any(), 'wrong results' + rcoll_ge = rcoll >= scalar + assert rcoll_ge.get_values().any(), 'wrong results' + rcoll_lt = rcoll < scalar + assert rcoll_lt.get_values().any(), 'wrong results' + rcoll_le = rcoll <= scalar + assert rcoll_le.get_values().any(), 'wrong results' + +def test_raster_algebra_band_and_raster(get_bandstack): + """test algebraic operations using Bands and Rasters on RasterCollections""" + fpath = get_bandstack() + rcoll = RasterCollection.from_multi_band_raster(fpath) + band = rcoll['B02'].copy() + + # RasterCollection <-> Band + rcoll_add = rcoll + band + assert (rcoll_add.get_values() == rcoll.get_values() + band.values).all(), 'wrong result' + rcoll_sub = rcoll - band + assert (rcoll_sub.get_values() == rcoll.get_values() - band.values).all(), 'wrong result' + rcoll_mul = rcoll * band + assert (rcoll_mul.get_values() == rcoll.get_values() * band.values).all(), 'wrong result' + rcoll_pow = rcoll**band + assert (rcoll_pow.get_values() == rcoll.get_values() ** band.values).all(), 'wrong result' + + rcoll_eq = rcoll == band + assert rcoll_eq.get_values().any(), 'wrong results' + assert rcoll_eq['B02'].values.all(), 'wrong results' + rcoll_gt = rcoll > band + assert rcoll_gt.get_values().any(), 'wrong results' + rcoll_ge = rcoll >= band + assert rcoll_ge.get_values().any(), 'wrong results' + rcoll_lt = rcoll < band + assert rcoll_lt.get_values().any(), 'wrong results' + rcoll_le = rcoll <= band + assert rcoll_le.get_values().any(), 'wrong results' + + # RasterCollection <-> RasterCollection + other = RasterCollection.from_multi_band_raster(fpath) + rcoll_add = rcoll + other + assert (rcoll_add.get_values() == rcoll.get_values() + other.get_values()).all(), 'wrong result' + rcoll_sub = rcoll - other + assert (rcoll_sub.get_values() == rcoll.get_values() - other.get_values()).all(), 'wrong result' + rcoll_mul = rcoll * other + assert (rcoll_mul.get_values() == rcoll.get_values() * other.get_values()).all(), 'wrong result' + rcoll_pow = rcoll**other + assert (rcoll_pow.get_values() == rcoll.get_values() ** other.get_values()).all(), 'wrong result' + + rcoll_eq = rcoll == other + assert rcoll_eq.get_values().all(), 'wrong result' + rcoll_ne = rcoll != other + assert not rcoll_ne.get_values().any(), 'wrong result' + \ No newline at end of file From 958d5eeabd565ce84e2a03d99c31e308b048412f Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 7 Dec 2022 16:23:00 +0100 Subject: [PATCH 111/125] adopted to latest changes in function interfaces --- tests/metadata/test_stac_client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/metadata/test_stac_client.py b/tests/metadata/test_stac_client.py index 34cfd50e..35989ee6 100644 --- a/tests/metadata/test_stac_client.py +++ b/tests/metadata/test_stac_client.py @@ -22,7 +22,7 @@ def test_mspc_sentinel1(get_polygons): res_s1 = sentinel1( date_start=date_start, date_end=date_end, - vector_features=polys + bounding_box=polys ) assert not res_s1.empty, 'no mapper found' @@ -35,7 +35,7 @@ def test_mspc_sentinel1(get_polygons): res_grd_s1 = sentinel1( date_start=date_start, date_end=date_end, - vector_features=polys, + bounding_box=polys, collection='sentinel-1-grd' ) @@ -65,7 +65,7 @@ def test_mspc_sentinel2(get_polygons): date_end=date_end, processing_level=processing_level, cloud_cover_threshold=cloud_cover_threshold, - vector_features=polys, + bounding_box=polys ) assert not res_s2.empty, 'no results found' assert 'assets' in res_s2.columns, 'no assets provided' From ad7cf1ed3f4fdd58c85d6d052054777c49ea5464 Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 7 Dec 2022 17:28:52 +0100 Subject: [PATCH 112/125] updated tests following latest changes in source code --- tests/operational/test_sentinel2_mapper.py | 33 ++++++++++++---------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/tests/operational/test_sentinel2_mapper.py b/tests/operational/test_sentinel2_mapper.py index f4187aa2..cb6d19a1 100644 --- a/tests/operational/test_sentinel2_mapper.py +++ b/tests/operational/test_sentinel2_mapper.py @@ -1,19 +1,26 @@ import pytest import geopandas as gpd +import pandas as pd from datetime import date from pathlib import Path +from eodal.config import get_settings from eodal.operational.mapping import MapperConfigs, Sentinel2Mapper from eodal.utils.sentinel2 import ProcessingLevels from eodal.core.raster import RasterCollection from eodal.core.sensors import Sentinel2 +# use STAC catalog for testing (thus, it should run everywhere) +settings = get_settings() +settings.USE_STAC = True + +@pytest.mark.skip(reason='the mapper will be re-build in the next EOdal version') @pytest.mark.parametrize( 'date_start, date_end, processing_level', - [(date(2016,12,1), date(2017,1,31), ProcessingLevels.L1C), - (date(2016,12,1), date(2017,1,31), ProcessingLevels.L2A)] + [(date(2021,12,1), date(2022,1,31), ProcessingLevels.L1C), + (date(2021,12,1), date(2022,1,31), ProcessingLevels.L2A)] ) def test_point_extraction(get_points, date_start, date_end, processing_level): """Extraction of points from Sentinel-2 mapper""" @@ -29,7 +36,7 @@ def test_point_extraction(get_points, date_start, date_end, processing_level): ) assert isinstance(mapper.feature_collection, Path), 'expected a path-like object' - # query the DB to get all S2 mapper available for the points + # query STAC to get all S2 mapper available for the points mapper.get_scenes() assert isinstance(mapper.feature_collection, dict), 'expected a dict-like object' assert len(mapper.get_feature_ids()) == 12, 'wrong number of point features' @@ -57,8 +64,7 @@ def test_point_extraction(get_points, date_start, date_end, processing_level): @pytest.mark.parametrize( 'date_start, date_end, processing_level', - [(date(2016,12,1), date(2017,1,31), ProcessingLevels.L2A), - (date(2016,12,1), date(2017,1,31), ProcessingLevels.L1C)] + [(date(2021,12,1), date(2021,12,10), ProcessingLevels.L2A),] ) def test_field_parcel_extraction(get_polygons_3, date_start, date_end, processing_level): """Extraction of a polygon from multiple Sentinel-2 tiles""" @@ -73,27 +79,24 @@ def test_field_parcel_extraction(get_polygons_3, date_start, date_end, processin mapper_configs=mapping_config ) assert isinstance(mapper.feature_collection, Path), 'expected a path-like object' - # query the DB to get all S2 mapper available for the Polygon + # query the metadata catalogue to get all S2 mapper available for the Polygon mapper.get_scenes() assert len(mapper.observations) == 1, 'expected a single feature' feature_id = mapper.get_feature_ids()[0] obs = mapper.observations[feature_id] - # the polygon covers three different S2 tiles - assert set(obs.tile_id.unique()) == {'T32TLT', 'T31TGN', 'T32TLS'}, \ - 'expected three different tiles here' + assert isinstance(obs, pd.DataFrame), 'expected a DataFrame' + # the polygon covers two different S2 tiles + assert set(obs.tile_id.unique()) == {'32TMS', '32TMT'}, \ + 'expected two different tiles here' # the target CRS should be 32632 (UTM Zone 32N) because the majority of the # mapper is in that projection assert (obs.target_crs == 32632).all(), 'wrong target CRS' - if processing_level == ProcessingLevels.L1C: - assert set(obs.sensing_date.unique()) == {date(2016,12,1), date(2017,1,3)}, \ - 'expected two different dates' - assert obs.is_split.all(), 'all mapper must be flagged as "split"' + assert obs.is_split.any(), 'there must be scenes marked as split' # get single observation res = mapper.get_observation( feature_id=feature_id, - # sensing_date=date(2016,12,10) - sensing_date=date(2017,1,17) + sensing_date=date(2021,12,17) ) assert isinstance(res, Sentinel2), 'expected a raster collection for Sentinel-2 data' assert res.is_bandstack(), 'all bands must have the same extent, CRS and pixel size' From 6fb428f4bcdfc7387d2c973b013e2387f17b720d Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 7 Dec 2022 17:29:00 +0100 Subject: [PATCH 113/125] corrected path --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 950eb2d2..1c13bbe3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -234,7 +234,7 @@ def _get_polygons(): testdata_dir = get_project_root_path.joinpath('data') testdata_polys = testdata_dir.joinpath( - Path('sample_polygons').joinpath('western_switzerland.gpkg') + Path('sample_polygons').joinpath('lake_lucerne.gpkg') ) return testdata_polys return _get_polygons From 687fede7997df41439bbbdccd8c57111df9504e9 Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 7 Dec 2022 17:29:11 +0100 Subject: [PATCH 114/125] testing for Point geometries --- eodal/utils/geometry.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/eodal/utils/geometry.py b/eodal/utils/geometry.py index 755c38fb..a49babd4 100644 --- a/eodal/utils/geometry.py +++ b/eodal/utils/geometry.py @@ -20,7 +20,8 @@ import geopandas as gpd import json -from shapely.geometry import box, Polygon +from copy import deepcopy +from shapely.geometry import box, Point, Polygon def box_to_geojson(gdf: gpd.GeoDataFrame | Polygon) -> str: """ @@ -34,10 +35,11 @@ def box_to_geojson(gdf: gpd.GeoDataFrame | Polygon) -> str: """ # GeoJSON should be in geographic coordinates if isinstance(gdf, gpd.GeoDataFrame): + _gdf = deepcopy(gdf) gdf_wgs84 = gdf.to_crs(epsg=4326) bbox = gdf_wgs84.total_bounds bbox_poly = box(*bbox) - elif isinstance(gdf, Polygon): - bbox_poly = gdf + elif (isinstance(gdf, Polygon) or isinstance(gdf, Point)): + bbox_poly = deepcopy(gdf) bbox_json = gpd.GeoSeries([bbox_poly]).to_json() return json.loads(bbox_json)['features'][0]['geometry'] From 6edbf44cb4ee3ba6f86661041069d5163038e2ab Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 7 Dec 2022 17:30:11 +0100 Subject: [PATCH 115/125] removed blank line --- eodal/operational/mapping/mapper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/eodal/operational/mapping/mapper.py b/eodal/operational/mapping/mapper.py index ede85bbc..078bb634 100644 --- a/eodal/operational/mapping/mapper.py +++ b/eodal/operational/mapping/mapper.py @@ -388,7 +388,7 @@ def _prepare_features(self) -> pd.DataFrame: else: aoi_features = self.feature_collection.copy() - # for the DB query, the geometries are required in geographic coordinates + # for the query, the geometries are required in geographic coordinates # however, we keep the original coordinates as well to avoid to many reprojections aoi_features["geometry_wgs84"] = aoi_features["geometry"].to_crs(4326) @@ -404,6 +404,7 @@ def _prepare_features(self) -> pd.DataFrame: aoi_features[self.unique_id_attribute] = [ str(uuid.uuid4()) for _ in aoi_features.iterrows() ] + return aoi_features def get_feature_ids(self) -> List: From 2173777cee0a620552a3db8967f3373c9c3859e9 Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 7 Dec 2022 17:43:44 +0100 Subject: [PATCH 116/125] set USE_STAC to False to enable local testing --- tests/operational/test_sentinel2_resampling_pipeline.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/operational/test_sentinel2_resampling_pipeline.py b/tests/operational/test_sentinel2_resampling_pipeline.py index 485e9ebd..7a768ce3 100644 --- a/tests/operational/test_sentinel2_resampling_pipeline.py +++ b/tests/operational/test_sentinel2_resampling_pipeline.py @@ -3,9 +3,14 @@ import pytest import rasterio as rio +from eodal.config import get_settings from eodal.core.sensors import Sentinel2 from eodal.operational.resampling.sentinel2 import resample_and_stack_s2 +# testing with local files requires USE_STAC to be set to False +settings = get_settings() +settings.USE_STAC = False + @pytest.mark.parametrize('interpolation_method', [(cv2.INTER_CUBIC), (cv2.INTER_NEAREST_EXACT)]) def test_resample_and_stack_s2(datadir, get_s2_safe_l2a, interpolation_method): """Tests the resample and band stack module from the pipeline""" From 4309ddf27a96d257d0c8a0aac9e64b21fa801ff0 Mon Sep 17 00:00:00 2001 From: lukas Date: Wed, 7 Dec 2022 17:46:28 +0100 Subject: [PATCH 117/125] removing Python 3.8 and 3.9 support --- .pre-commit-config.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0f7a0e2d..b07d0dca 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -38,8 +38,6 @@ repos: args: ["--profile=black"] - id: nbqa-black args: - - "--target-version=py38" - - "--target-version=py39" - "--target-version=py310" additional_dependencies: ["black==22.3.0"] - id: nbqa-flake8 From 16069830b11e74836eedadc11c112699aaf7c6df Mon Sep 17 00:00:00 2001 From: lukas Date: Thu, 8 Dec 2022 09:33:21 +0100 Subject: [PATCH 118/125] updating README (removing some redundant parts) --- README.rst | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/README.rst b/README.rst index 04fb298c..63f67dcc 100644 --- a/README.rst +++ b/README.rst @@ -3,26 +3,15 @@ E:earth_africa:dal Earth Observation Data Analysis Library ========================================================== - -The E:earth_africa:dal Python library to load, modify, analyze, modify and write Earth Observation data within an unified framework. - -**E:earth_africa:dal enables open-source, reproducible geo-spatial data science while lowering the burden of data handling on the user-side** - - -Quick start ------------ - -coming soon ... - -Why eodal ---------- - -**E:earth_africa:dal enables open-source, reproducible geo-spatial data science while lowering the burden of data handling on the user-side** - -E:earth_africa:dal is a lightweight ``Python3`` package to **organize**, -**explore** and **process** **Earth Observation data** in -an easy and intuitive manner. It supports the processing of **gridded datasets**, -**vector files** and the **spatial intersection** of these. +E:earth_africa:dal is a Python library enabling the acquisition, organization, and analysis of Earth observation data in a completely open-source manner. + +E:earth_africa:dal Python allows to +* load +* modify +* analyze +* modify +* write +Earth Observation data within an unified framework. E:earth_africa:dal thus enables open-source, reproducible geo-spatial data science while lowering the burden of data handling on the user-side. The roots of E:earth_africa:dal lay in **agricultural remote sensing applications** with **Sentinel-2**. However, due to its **modular and object-oriented programming structure**, it allows the From efa89f0a91b1428e4656c74f848c2b1e7b13c7fa Mon Sep 17 00:00:00 2001 From: lukas Date: Thu, 8 Dec 2022 09:35:58 +0100 Subject: [PATCH 119/125] removing repeated statements --- README.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.rst b/README.rst index 63f67dcc..bd8ccfbb 100644 --- a/README.rst +++ b/README.rst @@ -6,11 +6,11 @@ E:earth_africa:dal Earth Observation Data Analysis Library E:earth_africa:dal is a Python library enabling the acquisition, organization, and analysis of Earth observation data in a completely open-source manner. E:earth_africa:dal Python allows to -* load -* modify -* analyze -* modify -* write + * load + * modify + * analyze + * write + * and interface Earth Observation data within an unified framework. E:earth_africa:dal thus enables open-source, reproducible geo-spatial data science while lowering the burden of data handling on the user-side. The roots of E:earth_africa:dal lay in **agricultural remote sensing applications** with **Sentinel-2**. From 5f2f7a741a3be136dc0954c6b9bf8fe897ec919f Mon Sep 17 00:00:00 2001 From: lukas Date: Thu, 8 Dec 2022 09:38:16 +0100 Subject: [PATCH 120/125] adding blank lines --- README.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.rst b/README.rst index bd8ccfbb..49b6e8f8 100644 --- a/README.rst +++ b/README.rst @@ -6,11 +6,13 @@ E:earth_africa:dal Earth Observation Data Analysis Library E:earth_africa:dal is a Python library enabling the acquisition, organization, and analysis of Earth observation data in a completely open-source manner. E:earth_africa:dal Python allows to + * load * modify * analyze * write * and interface + Earth Observation data within an unified framework. E:earth_africa:dal thus enables open-source, reproducible geo-spatial data science while lowering the burden of data handling on the user-side. The roots of E:earth_africa:dal lay in **agricultural remote sensing applications** with **Sentinel-2**. From 6d84b4b85c94e149894019f0aed775d70a12efc8 Mon Sep 17 00:00:00 2001 From: lukas Date: Fri, 9 Dec 2022 09:14:16 +0100 Subject: [PATCH 121/125] removal of blank lines --- examples/digital_terrain_model.py | 7 +- examples/random_sentinel2_pixels.py | 139 -------------------------- examples/satellite_archive_query.py | 1 - examples/sentinel1_mapping_example.py | 1 - 4 files changed, 6 insertions(+), 142 deletions(-) delete mode 100644 examples/random_sentinel2_pixels.py diff --git a/examples/digital_terrain_model.py b/examples/digital_terrain_model.py index c5e8b047..1ce9e88f 100644 --- a/examples/digital_terrain_model.py +++ b/examples/digital_terrain_model.py @@ -25,7 +25,12 @@ # load resource into a Band instance and name it "Elevation" dem = Band.from_rasterio(fpath_raster=dem_file, band_name_dst='Elevation') -print(dem.band_name) +print(dem) +# will display: +# EOdal Band +# ---------. +# Name: Elevation +# GeoInfo: {'epsg': 2056, 'ulx': 2618000.0, 'uly': 1093000.0, 'pixres_x': 2.0, 'pixres_y': -2.0} # fast visualization fig = dem.plot( diff --git a/examples/random_sentinel2_pixels.py b/examples/random_sentinel2_pixels.py deleted file mode 100644 index 643b35b3..00000000 --- a/examples/random_sentinel2_pixels.py +++ /dev/null @@ -1,139 +0,0 @@ -''' -Created on Oct 1, 2022 - -@author: graflu -''' - -import cv2 -import geopandas as gpd -import numpy as np -import pandas as pd - -from datetime import date -from pathlib import Path -from typing import Optional - -from eodal.operational.mapping import MapperConfigs, Sentinel2Mapper -from eodal.utils.constants import ProcessingLevels - -def assign_pixel_ids(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: - """ - Assigns unique pixel IDs based on the pixel geometries - - :param gdf: - GeoDataFrame with point geometries (pixels) - :returns: - the input GeoDataFrame with a new column containing unique - pixel IDs - """ - # we need the well-known-binary representation of the geometries - # to allow for fast assignment of pixel IDs (pandas cannot handle - # the actual geometries) - gdf['wkb'] = gdf.geometry.apply(lambda x: x.wkb) - unique_geoms = list(gdf.wkb.unique()) - pixel_ids = [x for x in range(len(unique_geoms))] - pixel_id_mapper = dict(zip(unique_geoms, pixel_ids)) - gdf['pixel_id'] = gdf.wkb.map(pixel_id_mapper) - return gdf - -def random_choice(pixel_series: gpd.GeoSeries, n: Optional[int] = 5) -> gpd.GeoSeries: - """ - Selects `n` observations from a pixel time series (all bands) - - :param pixel_series: - pixel time series - :param n: - number of observations to sample from the series - :returns: - randomly selected observations - """ - # get sensing dates available - dates = list(pixel_series.sensing_date.unique) - n_dates = len(dates) - # update (lower) n if required - if n_dates < n: - n = n_dates - # TODO select n dates and return the corresponding pixel values - return - -def get_pixels(date_start: date, date_end: date, scene_cloud_cover_threshold: int, - aois: gpd.GeoDataFrame | Path, **kwargs): - """ - Random selection of pixel observations from time series within one or more areas - of interest (AOIS, aka features). - - :param date_start: - start date for extracting Sentinel-2 data (inclusive) - :param date_end: - end date for extracting Sentinel-2 data (inclusive) - :param scene_cloud_cover_threshold: - scene-wide cloud cover threshold in percent [0-100]. Scenes with a cloud-cover - higher than the threshold are not considered - :param aois: - areas of interest (1 to N) for which to extract random pixel observations - """ - # setup Sentinel-2 mapper to get the relevant mapper - mapper_configs = MapperConfigs( - spatial_resolution=10., - resampling_method=cv2.INTER_NEAREST_EXACT, - band_names=['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A', 'B09', 'B11', 'B12'] - ) - - # get a new mapper instance - mapper = Sentinel2Mapper( - date_start=date_start, - date_end=date_end, - processing_level=ProcessingLevels.L2A, - cloud_cover_threshold=scene_cloud_cover_threshold, - mapper_configs=mapper_configs, - feature_collection=aois - ) - # query the available mapper (spatio-temporal query in the metadata catalog) - mapper.get_scenes() - # extract the actual S2 data - s2_data = mapper.get_complete_timeseries() - - # extraction is based on features (1 to N geometries) - features = mapper.feature_collection['features'] - - # loop over features and extract scene data - for idx, feature in enumerate(features): - feature_id = mapper.get_feature_ids()[idx] - # mapper of the actual feature - feature_scenes = s2_data[feature_id] - # loop over mapper, drop non-cloudfree observations and save spectral values to GeoDataFrame - feature_refl_list = [] - for feature_scene in feature_scenes: - # drop all observations but SCL classes 4 and 5 - feature_scene.mask_clouds_and_shadows(inplace=True) - # save spectral values as GeoDataFrame - refl_df = feature_scene.to_dataframe() - # drop nans (results from the masking of clouds) - refl_df.drop_nan(inplace=True) - # save the sensing date - refl_df['sensing_date'] = pd.to_datetime(feature_scene.scene_properties.sensing_date) - feature_refl_list.append(refl_df) - # create a single data frame per feature - feature_refl_df = pd.concat(feature_refl_list) - feature_refl_df.sort_values(by='sensing_date', inplace=True) - # assign pixel ids based on the coordinates so that sampling per pixel time series is possible - feature_refl_df_pid = assign_pixel_ids(gdf=feature_refl_df) - # select 5 observations per pixel (or less if there are not enough) by random choice - feature_refl_grouped = feature_refl_df_pid.groupby(by='pixel_id') - # apply random choice on each pixel - - -if __name__ == '__main__': - - date_start = date(2022,3,1) - date_end = date(2022,3,31) - aois = Path('../data/sample_polygons/BY_AOI_2019_MNI_EPSG32632.shp') - scene_cloud_cover_threshold = 50 - - get_pixels( - date_start=date_start, - date_end=date_end, - scene_cloud_cover_threshold=scene_cloud_cover_threshold, - aois=aois - ) - \ No newline at end of file diff --git a/examples/satellite_archive_query.py b/examples/satellite_archive_query.py index 48885d8b..253d118e 100644 --- a/examples/satellite_archive_query.py +++ b/examples/satellite_archive_query.py @@ -94,4 +94,3 @@ ax.set_xlim(-1,12) ax.legend(fontsize=16) f.savefig(out_dir.joinpath(f'monthly_cloudy_pixel_percentage_{date_start}-{date_end}.png'), bbox_inches='tight') - diff --git a/examples/sentinel1_mapping_example.py b/examples/sentinel1_mapping_example.py index 90767bab..d0d7468c 100644 --- a/examples/sentinel1_mapping_example.py +++ b/examples/sentinel1_mapping_example.py @@ -71,4 +71,3 @@ # retrieve metadata of mapper found (no reading) mapper.get_scenes() mapper.observations - From 1aaa846c295e5914b1eb825e52e72b93be536659 Mon Sep 17 00:00:00 2001 From: lukas Date: Fri, 9 Dec 2022 09:25:08 +0100 Subject: [PATCH 122/125] updating content of README.rst --- README.rst | 56 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 13 deletions(-) diff --git a/README.rst b/README.rst index 49b6e8f8..7164e7dd 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,19 @@ |GHA tests| |Codecov report| |pre-commit| |black| +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""" E:earth_africa:dal Earth Observation Data Analysis Library -========================================================== +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +................................................................................ +A truely open-source package for unified analysis of Earth Observation (EO) data +................................................................................ + +.. contents:: Overview + :depth: 3 + +======================== +About E:earth_africa:dal +======================== E:earth_africa:dal is a Python library enabling the acquisition, organization, and analysis of Earth observation data in a completely open-source manner. @@ -13,19 +25,37 @@ E:earth_africa:dal Python allows to * write * and interface -Earth Observation data within an unified framework. E:earth_africa:dal thus enables open-source, reproducible geo-spatial data science while lowering the burden of data handling on the user-side. - -The roots of E:earth_africa:dal lay in **agricultural remote sensing applications** with **Sentinel-2**. -However, due to its **modular and object-oriented programming structure**, it allows the -**processing of (nearly) any type of Earth Observation data** and can be **adapted** to -**other remote sensing platforms** or **Earth Observation data sources** (e.g., Digital Elevation -Models, Soil Maps, Land Cover Maps etc.). - +EO data within an unified framework. E:earth_africa:dal thus enables open-source, reproducible geo-spatial data science while lowering the burden of data handling on the user-side. E:earth_africa:dal supports working in **cloud-environments** using [STAC catalogs](https://stacspec.org/) ("online" mode) and on **local premises** using a spatial PostgreSQL/PostGIS database to organize metadata ("offline" mode). -Examples --------- +Read more about E:earth_africa:dal in [this peer reviewed article](https://doi.org/10.1016/j.compag.2022.107487). + +======================== +Citing E:earth_africa:dal +======================== + +When using EOdal not only refer to our [license agreement](LICENSE) but also cite us properly: + +.. code::latex + + @article{GRAF2022107487, + title = {EOdal: An open-source Python package for large-scale agroecological research using Earth Observation and gridded environmental data}, + journal = {Computers and Electronics in Agriculture}, + volume = {203}, + pages = {107487}, + year = {2022}, + issn = {0168-1699}, + doi = {https://doi.org/10.1016/j.compag.2022.107487}, + url = {https://www.sciencedirect.com/science/article/pii/S0168169922007955}, + author = {Lukas Valentin Graf and Gregor Perich and Helge Aasen}, + keywords = {Satellite data, Python, Open-source, Earth Observation, Ecophysiology}, + abstract = {Earth Observation by means of remote sensing imagery and gridded environmental data opens tremendous opportunities for systematic capture, quantification and interpretation of plant–environment interactions through space and time. The acquisition, maintenance and processing of these data sources, however, requires a unified software framework for efficient and scalable integrated spatio-temporal analysis taking away the burden of data and file handling from the user. Existing software products either cover only parts of these requirements, exhibit a high degree of complexity, or are closed-source, which limits reproducibility of research. With the open-source Python library EOdal (Earth Observation Data Analysis Library) we propose a novel software that enables the development of fully reproducible spatial data science chains through the strict use of open-source developments. Thanks to its modular design, EOdal enables advanced data warehousing especially for remote sensing data, sophisticated spatio-temporal analysis and intersection of different data sources, as well as nearly unlimited expandability through application programming interfaces (APIs).} + } + +============================== +Examples of E:earth_africa:dal +============================== The following code snippet reads spectral bands from a Sentinel-2 scene organized in .SAFE folder structure acquired over Southern Germany in @@ -94,8 +124,8 @@ Output :alt: Sentinel-2 Scene classification layer +============ Contributing ------------- +============ Contributions to E:earth_africa:dal are welcome. Please make sure to read the [contribution guidelines](Contributing.rst) first. - From dcac5265574df5a88498a278a84a487cf9847ec2 Mon Sep 17 00:00:00 2001 From: lukas Date: Fri, 9 Dec 2022 09:29:13 +0100 Subject: [PATCH 123/125] removing Python 3.8 and 3.9 support in lack --- .pre-commit-config.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b07d0dca..0c69340a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,8 +15,6 @@ repos: hooks: - id: black args: - - "--target-version=py38" - - "--target-version=py39" - "--target-version=py310" types: [python] From ac99bdb04cfff5f00d0b83e06484648bb6611cc5 Mon Sep 17 00:00:00 2001 From: lukas Date: Fri, 9 Dec 2022 09:29:28 +0100 Subject: [PATCH 124/125] updated version and author information+ --- eodal/__meta__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/eodal/__meta__.py b/eodal/__meta__.py index b2ee8da1..32d5ce1a 100644 --- a/eodal/__meta__.py +++ b/eodal/__meta__.py @@ -8,10 +8,10 @@ # version = "0.1.dev0" author = ( "Crop Science, Institute of Agricultural Sciences, D-USYS, ETH Zurich, Zurich, Switzerland;\n" - "Remote Sensing Team, Division Agroecology and Environment, Agroscope, Zurich, Switzerland" + "Earth Observation of Agroecosystems Team, Division Agroecology and Environment, Agroscope, Zurich, Switzerland" ) author_email = "" description = "Earth Observation Data Analysis Library" # One-liner -url = "https://github.com/remote-sensing-team/eodal" # your project home-page +url = "https://github.com/EOA-team/eodal" # your project home-page license = "GNU General Public License version 3" # See https://choosealicense.com -version = "0.0" +version = "0.1.0" From 5a92ffe450a3642961c0b144b2710ae2f1310cae Mon Sep 17 00:00:00 2001 From: lukas Date: Fri, 9 Dec 2022 09:31:01 +0100 Subject: [PATCH 125/125] updating release date --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 58b33192..17e97095 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -13,7 +13,7 @@ Categories for changes are: Added, Changed, Deprecated, Removed, Fixed, Security Version `0.1.0 < https://github.com/EOA-team/eodal/releases/tag/v0.1.0>`__ -------------------------------------------------------------------------------- -Release date: YYYY-MM-DD +Release date: 2022-12-08 - Added: RasterCollection objects are now iterable (iterate over bands in collection) - Added: RasterCollection now have a "apply" method allowing to pass custom functions to RasterCollection objects