Skip to content

Commit

Permalink
Merge branch 'issue573-stac-item-assets-support'
Browse files Browse the repository at this point in the history
  • Loading branch information
soxofaan committed Jun 18, 2024
2 parents 14854ce + 84a9f5b commit 62477b6
Show file tree
Hide file tree
Showing 7 changed files with 247 additions and 3 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Document PEP 723 based Python UDF dependency declarations ([Open-EO/openeo-geopyspark-driver#237](https://github.com/Open-EO/openeo-geopyspark-driver/issues/237))
- Added more `openeo.api.process.Parameter` helpers to easily create "bounding_box", "date", "datetime", "geojson" and "temporal_interval" parameters for UDP construction.
- Added convenience method `Connection.load_stac_from_job(job)` to easily load the results of a batch job with the `load_stac` process ([#566](https://github.com/Open-EO/openeo-python-client/issues/566))
- `load_stac`/`metadata_from_stac`: add support for extracting band info from "item_assets" in collection metadata ([#573](https://github.com/Open-EO/openeo-python-client/issues/573))
- Added initial `openeo.testing` submodule for reusable test utilities

### Changed

Expand Down
86 changes: 85 additions & 1 deletion openeo/metadata.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from __future__ import annotations

import functools
import logging
import warnings
from typing import Any, Callable, List, NamedTuple, Optional, Tuple, Union
from typing import Any, Callable, Dict, List, NamedTuple, Optional, Set, Tuple, Union

import pystac
import pystac.extensions.eo
import pystac.extensions.item_assets

from openeo.internal.jupyter import render_component
from openeo.util import deep_get
Expand Down Expand Up @@ -107,6 +110,7 @@ class Band(NamedTuple):


class BandDimension(Dimension):
# TODO #575 support unordered bands and avoid assumption that band order is known.
def __init__(self, name: str, bands: List[Band]):
super().__init__(type="bands", name=name)
self.bands = bands
Expand Down Expand Up @@ -534,6 +538,8 @@ def metadata_from_stac(url: str) -> CubeMetadata:
:return: A :py:class:`CubeMetadata` containing the DataCube band metadata from the url.
"""

# TODO move these nested functions and other logic to _StacMetadataParser

def get_band_metadata(eo_bands_location: dict) -> List[Band]:
# TODO: return None iso empty list when no metadata?
return [
Expand Down Expand Up @@ -573,6 +579,10 @@ def is_band_asset(asset: pystac.Asset) -> bool:
for asset_band in asset_bands:
if asset_band.name not in get_band_names(bands):
bands.append(asset_band)
if _PYSTAC_1_9_EXTENSION_INTERFACE and collection.ext.has("item_assets"):
# TODO #575 support unordered band names and avoid conversion to a list.
bands = list(_StacMetadataParser().get_bands_from_item_assets(collection.ext.item_assets))

elif isinstance(stac_object, pystac.Catalog):
catalog = stac_object
bands = get_band_metadata(catalog.extra_fields.get("summaries", {}))
Expand All @@ -586,3 +596,77 @@ def is_band_asset(asset: pystac.Asset) -> bool:
temporal_dimension = TemporalDimension(name="t", extent=[None, None])
metadata = CubeMetadata(dimensions=[band_dimension, temporal_dimension])
return metadata


# Sniff for PySTAC extension API since version 1.9.0 (which is not available below Python 3.9)
# TODO: remove this once support for Python 3.7 and 3.8 is dropped
_PYSTAC_1_9_EXTENSION_INTERFACE = hasattr(pystac.Item, "ext")


class _StacMetadataParser:
"""
Helper to extract openEO metadata from STAC metadata resource
"""

def __init__(self):
# TODO: toggles for how to handle strictness, warnings, logging, etc
pass

def _get_band_from_eo_bands_item(self, eo_band: Union[dict, pystac.extensions.eo.Band]) -> Band:
if isinstance(eo_band, pystac.extensions.eo.Band):
return Band(
name=eo_band.name,
common_name=eo_band.common_name,
wavelength_um=eo_band.center_wavelength,
)
elif isinstance(eo_band, dict) and "name" in eo_band:
return Band(
name=eo_band["name"],
common_name=eo_band.get("common_name"),
wavelength_um=eo_band.get("center_wavelength"),
)
else:
raise ValueError(eo_band)

def get_bands_from_eo_bands(self, eo_bands: List[Union[dict, pystac.extensions.eo.Band]]) -> List[Band]:
"""
Extract bands from STAC `eo:bands` array
:param eo_bands: List of band objects, as dict or `pystac.extensions.eo.Band` instances
"""
# TODO: option to skip bands that failed to parse in some way?
return [self._get_band_from_eo_bands_item(band) for band in eo_bands]

def _get_bands_from_item_asset(
self, item_asset: pystac.extensions.item_assets.AssetDefinition, *, _warn: Callable[[str], None] = _log.warning
) -> Union[List[Band], None]:
"""Get bands from a STAC 'item_assets' asset definition."""
if _PYSTAC_1_9_EXTENSION_INTERFACE and item_asset.ext.has("eo"):
if item_asset.ext.eo.bands is not None:
return self.get_bands_from_eo_bands(item_asset.ext.eo.bands)
elif "eo:bands" in item_asset.properties:
# TODO: skip this in strict mode?
if _PYSTAC_1_9_EXTENSION_INTERFACE:
_warn("Extracting band info from 'eo:bands' metadata, but 'eo' STAC extension was not declared.")
return self.get_bands_from_eo_bands(item_asset.properties["eo:bands"])

def get_bands_from_item_assets(
self, item_assets: Dict[str, pystac.extensions.item_assets.AssetDefinition]
) -> Set[Band]:
"""
Get bands extracted from "item_assets" objects (defined by "item-assets" extension,
in combination with "eo" extension) at STAC Collection top-level,
Note that "item_assets" in STAC is a mapping, so the band order is undefined,
which is why we return a set of bands here.
:param item_assets: a STAC `item_assets` mapping
"""
bands = set()
# Trick to just warn once per collection
_warn = functools.lru_cache()(_log.warning)
for item_asset in item_assets.values():
asset_bands = self._get_bands_from_item_asset(item_asset, _warn=_warn)
if asset_bands:
bands.update(asset_bands)
return bands
36 changes: 36 additions & 0 deletions openeo/testing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""
Utilities for testing of openEO client workflows.
"""

import json
from pathlib import Path
from typing import Callable, Optional, Union


class TestDataLoader:
"""
Helper to resolve paths to test data files, load them as JSON, optionally preprocess them, etc.
It's intended to be used as a pytest fixture, e.g. from conftest.py:
@pytest.fixture
def test_data() -> TestDataLoader:
return TestDataLoader(root=Path(__file__).parent / "data")
.. versionadded:: 0.30.0
"""

def __init__(self, root: Union[str, Path]):
self.data_root = Path(root)

def get_path(self, filename: Union[str, Path]) -> Path:
"""Get absolute path to a test data file"""
return self.data_root / filename

def load_json(self, filename: Union[str, Path], preprocess: Optional[Callable[[str], str]] = None) -> dict:
"""Parse data from a test JSON file"""
data = self.get_path(filename).read_text(encoding="utf8")
if preprocess:
data = preprocess(data)
return json.loads(data)
2 changes: 2 additions & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@


def get_test_resource(relative_path: str) -> Path:
# TODO: migrate to TestDataLoader
dir = Path(os.path.dirname(os.path.realpath(__file__)))
return dir / relative_path


def load_json_resource(relative_path, preprocess: Callable = None) -> dict:
# TODO: migrate to TestDataLoader
with get_test_resource(relative_path).open("r+") as f:
data = f.read()
if preprocess:
Expand Down
6 changes: 6 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import pytest

from openeo.testing import TestDataLoader
from openeo.util import ensure_dir

pytest_plugins = "pytester"
Expand All @@ -25,3 +26,8 @@ def tmp_openeo_config_home(tmp_path):
path = ensure_dir(Path(str(tmp_path)) / "openeo-conf")
with mock.patch.dict("os.environ", {"OPENEO_CONFIG_HOME": str(path)}):
yield path


@pytest.fixture
def test_data() -> TestDataLoader:
return TestDataLoader(root=Path(__file__).parent / "data")
82 changes: 82 additions & 0 deletions tests/data/stac/collections/agera5_daily01.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
{
"type": "Collection",
"id": "agera5_daily",
"stac_version": "1.0.0",
"description": "ERA5",
"links": [
{
"rel": "self",
"type": "application/json",
"href": "https://stac.test/collections/agera5_daily"
}
],
"stac_extensions": [
"https://stac-extensions.github.io/item-assets/v1.0.0/schema.json",
"https://stac-extensions.github.io/eo/v1.1.0/schema.json"
],
"item_assets": {
"2m_temperature_min": {
"type": "image/tiff; application=geotiff",
"title": "2m temperature min 24h",
"eo:bands": [
{
"name": "2m_temperature_min",
"description": "temperature 2m above ground (Kelvin)"
}
]
},
"2m_temperature_max": {
"type": "image/tiff; application=geotiff",
"eo:bands": [
{
"name": "2m_temperature_max",
"description": "temperature 2m above ground (Kelvin)"
}
]
},
"dewpoint_temperature_mean": {
"type": "image/tiff; application=geotiff",
"title": "2m dewpoint temperature",
"eo:bands": [
{
"name": "dewpoint_temperature_mean",
"description": "dewpoint temperature 2m above ground (Kelvin)"
}
]
},
"vapour_pressure": {
"eo:bands": [
{
"name": "vapour_pressure"
}
]
}
},
"title": "agERA5 data",
"extent": {
"spatial": {
"bbox": [
[
-180,
-90,
180,
90
]
]
},
"temporal": {
"interval": [
[
"2010-01-01T00:00:00Z",
"2024-06-12T00:00:00Z"
]
]
}
},
"keywords": [
"ERA5"
],
"summaries": {},
"assets": {},
"license": "proprietary"
}
36 changes: 34 additions & 2 deletions tests/test_metadata.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from __future__ import annotations

import json
from typing import List
import re
from typing import List, Union

import pytest

from openeo.metadata import (
_PYSTAC_1_9_EXTENSION_INTERFACE,
Band,
BandDimension,
CollectionMetadata,
Expand Down Expand Up @@ -835,8 +837,38 @@ def filter_bbox(self, bbox):
],
)
def test_metadata_from_stac(tmp_path, test_stac, expected):

path = tmp_path / "stac.json"
path.write_text(json.dumps(test_stac))
metadata = metadata_from_stac(path)
assert metadata.band_names == expected


@pytest.mark.skipif(not _PYSTAC_1_9_EXTENSION_INTERFACE, reason="Requires PySTAC 1.9+ extension interface")
@pytest.mark.parametrize("eo_extension_is_declared", [False, True])
def test_metadata_from_stac_collection_bands_from_item_assets(test_data, tmp_path, eo_extension_is_declared, caplog):
stac_data = test_data.load_json("stac/collections/agera5_daily01.json")
stac_data["stac_extensions"] = [
ext
for ext in stac_data["stac_extensions"]
if (not ext.startswith("https://stac-extensions.github.io/eo/") or eo_extension_is_declared)
]
assert (
any(ext.startswith("https://stac-extensions.github.io/eo/") for ext in stac_data["stac_extensions"])
== eo_extension_is_declared
)
path = tmp_path / "stac.json"
path.write_text(json.dumps(stac_data))

metadata = metadata_from_stac(path)
assert sorted(metadata.band_names) == [
"2m_temperature_max",
"2m_temperature_min",
"dewpoint_temperature_mean",
"vapour_pressure",
]

warn_count = sum(
"Extracting band info from 'eo:bands' metadata, but 'eo' STAC extension was not declared." in m
for m in caplog.messages
)
assert warn_count == (0 if eo_extension_is_declared else 1)

0 comments on commit 62477b6

Please sign in to comment.