Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Changed

- Item IDs no longer contain the production datetime ([#88](https://github.com/stactools-packages/modis/pull/88))
- Make XML metadata optional - extract metadata from HDF file if XML is not available ([#XX](https://github.com/stactools-packages/modis/pull/XX))

### Fixed

Expand Down
8 changes: 7 additions & 1 deletion src/stactools/modis/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,13 @@ def add_hdf_or_xml_href(
xml_href = f"{href}.xml"
else:
raise ValueError(f"Invalid HDF or XML href: {href}")
self.add_xml_asset(xml_href)

# Add XML asset if it exists, otherwise extract metadata from HDF
if os.path.exists(xml_href):
self.add_xml_asset(xml_href)
else:
self.metadata = Metadata.from_hdf_href(hdf_href, self.read_href_modifier)

self.add_hdf_asset(
hdf_href, cog_directory=cog_directory, create_cogs=create_cogs
)
Expand Down
29 changes: 28 additions & 1 deletion src/stactools/modis/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import fsspec
import numpy as np
import rasterio
from lxml import etree
from rasterio import Affine
from rasterio.crs import CRS
Expand Down Expand Up @@ -232,6 +233,8 @@ def from_cog_tags(cls, cog_tags: Dict[str, str]) -> "Metadata":
geometry, bbox = cls._geometry_and_bbox(
collection, horizontal_tile, vertical_tile
)
qa_percent = cog_tags.get("QAPERCENTNOTPRODUCEDCLOUD")
qa_percent_not_produced_cloud = int(qa_percent) if qa_percent else None
return Metadata(
id=os.path.splitext(cog_tags["LOCALGRANULEID"])[0],
product=product,
Expand All @@ -242,7 +245,7 @@ def from_cog_tags(cls, cog_tags: Dict[str, str]) -> "Metadata":
end_datetime=end_datetime,
created=None,
updated=None,
qa_percent_not_produced_cloud=int(cog_tags["QAPERCENTNOTPRODUCEDCLOUD"]),
qa_percent_not_produced_cloud=qa_percent_not_produced_cloud,
qa_percent_cloud_cover=None,
horizontal_tile=horizontal_tile,
vertical_tile=vertical_tile,
Expand All @@ -252,6 +255,30 @@ def from_cog_tags(cls, cog_tags: Dict[str, str]) -> "Metadata":
collection=collection,
)

@classmethod
def from_hdf_href(
cls, href: str, read_href_modifier: Optional[ReadHrefModifier] = None
) -> "Metadata":
"""Reads metadata from an HDF file when XML is not available.

Args:
href (str): The href of the HDF file
read_href_modifier (Optional[Callable[[str], str]]): Optional
function to modify the read href

Returns:
Metadata: Information that will map to Item attributes.
"""
if read_href_modifier:
read_href = read_href_modifier(href)
else:
read_href = href

with rasterio.open(read_href) as dataset:
hdf_tags = dataset.tags()

return cls.from_cog_tags(hdf_tags)

@property
def datetime(self) -> Optional[datetime.datetime]:
"""Returns a single nominal datetime for this metadata file.
Expand Down
19 changes: 19 additions & 0 deletions tests/test_stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,25 @@ def test_raster_footprint_geometry() -> None:
item.validate()


def test_create_item_from_hdf_without_xml(tmp_path: Path) -> None:
hdf_file = "MOD10A2.A2022033.h09v05.061.2022042050729.hdf"
source_hdf_path = test_data.get_path(f"data-files/{hdf_file}")

temp_hdf_path = tmp_path / hdf_file
shutil.copyfile(source_hdf_path, temp_hdf_path)

temp_xml_path = tmp_path / f"{hdf_file}.xml"
assert not temp_xml_path.exists()

item = stactools.modis.stac.create_item(str(temp_hdf_path))

assert item is not None
assert item.id.startswith("MOD10A2.A2022033.h09v05")
assert "hdf" in item.assets
assert "metadata" not in item.assets
item.validate()


@pytest.mark.parametrize("file_name", PROJECTION_EDGE_FILES)
def test_raster_footprint_at_projection_edge(file_name: str) -> None:
path = test_data.get_path(file_name)
Expand Down
Loading