Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Changed

- Item IDs no longer contain the production datetime ([#88](https://github.com/stactools-packages/modis/pull/88))
- Make XML metadata optional - extract metadata from HDF file if XML is not available ([#XX](https://github.com/stactools-packages/modis/pull/XX))

### Fixed

Expand Down
8 changes: 7 additions & 1 deletion src/stactools/modis/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,13 @@ def add_hdf_or_xml_href(
xml_href = f"{href}.xml"
else:
raise ValueError(f"Invalid HDF or XML href: {href}")
self.add_xml_asset(xml_href)

# Add XML asset if it exists, otherwise extract metadata from HDF
if os.path.exists(xml_href):
self.add_xml_asset(xml_href)
else:
self.metadata = Metadata.from_hdf_href(hdf_href, self.read_href_modifier)

self.add_hdf_asset(
hdf_href, cog_directory=cog_directory, create_cogs=create_cogs
)
Expand Down
35 changes: 33 additions & 2 deletions src/stactools/modis/metadata.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import datetime
import os.path
import warnings
from dataclasses import dataclass
from typing import Any, Callable, Dict, List, Optional, Tuple

import fsspec
import numpy as np
import rasterio
from lxml import etree
from rasterio import Affine
from rasterio.crs import CRS
from rasterio.errors import NotGeoreferencedWarning
from shapely.geometry import shape
from stactools.core.io import ReadHrefModifier
from stactools.core.io.xml import XmlElement
Expand Down Expand Up @@ -232,6 +235,8 @@ def from_cog_tags(cls, cog_tags: Dict[str, str]) -> "Metadata":
geometry, bbox = cls._geometry_and_bbox(
collection, horizontal_tile, vertical_tile
)
qa_percent = cog_tags.get("QAPERCENTNOTPRODUCEDCLOUD")
qa_percent_not_produced_cloud = int(qa_percent) if qa_percent else None
return Metadata(
id=os.path.splitext(cog_tags["LOCALGRANULEID"])[0],
product=product,
Expand All @@ -242,16 +247,42 @@ def from_cog_tags(cls, cog_tags: Dict[str, str]) -> "Metadata":
end_datetime=end_datetime,
created=None,
updated=None,
qa_percent_not_produced_cloud=int(cog_tags["QAPERCENTNOTPRODUCEDCLOUD"]),
qa_percent_not_produced_cloud=qa_percent_not_produced_cloud,
qa_percent_cloud_cover=None,
horizontal_tile=horizontal_tile,
vertical_tile=vertical_tile,
tile_id=cog_tags["TileID"],
tile_id=cog_tags.get("TileID", ""),
platforms=sorted(list(platforms)),
instruments=sorted(list(instruments)),
collection=collection,
)

@classmethod
def from_hdf_href(
cls, href: str, read_href_modifier: Optional[ReadHrefModifier] = None
) -> "Metadata":
"""Reads metadata from an HDF file when XML is not available.

Args:
href (str): The href of the HDF file
read_href_modifier (Optional[Callable[[str], str]]): Optional
function to modify the read href

Returns:
Metadata: Information that will map to Item attributes.
"""
if read_href_modifier:
read_href = read_href_modifier(href)
else:
read_href = href

with warnings.catch_warnings():
warnings.simplefilter("ignore", category=NotGeoreferencedWarning)
with rasterio.open(read_href) as dataset:
hdf_tags = dataset.tags()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason to catch this warning? Feels like we do want to propagate this up to the user.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed the suppression. It is now visible in the output.


return cls.from_cog_tags(hdf_tags)

@property
def datetime(self) -> Optional[datetime.datetime]:
"""Returns a single nominal datetime for this metadata file.
Expand Down
29 changes: 29 additions & 0 deletions tests/test_stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,35 @@ def test_raster_footprint_geometry() -> None:
item.validate()


def test_create_item_from_hdf_without_xml() -> None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pytest provides a fixture for a temporary directory, let's use that instead.

Suggested change
def test_create_item_from_hdf_without_xml() -> None:
def test_create_item_from_hdf_without_xml(tmp_path: Path) -> None:

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

"""Test that an item can be created from an HDF file when XML is not available.

This tests the fallback to extracting metadata directly from the HDF file
when the accompanying XML metadata file is not present.
"""
Copy link
Contributor

@gadomski gadomski Dec 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm generally 👎🏼 on docstrings for tests, as I prefer to let the test function name and content speak for themselves.

Suggested change
"""Test that an item can be created from an HDF file when XML is not available.
This tests the fallback to extracting metadata directly from the HDF file
when the accompanying XML metadata file is not present.
"""

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, they're auto-generated comments and I have removed them.

hdf_file = "MOD10A2.A2022033.h09v05.061.2022042050729.hdf"
source_hdf_path = test_data.get_path(f"data-files/{hdf_file}")

with TemporaryDirectory() as temporary_directory:
# Copy only the HDF file (not the XML) to ensure XML is not available
temp_hdf_path = os.path.join(temporary_directory, hdf_file)
shutil.copyfile(source_hdf_path, temp_hdf_path)

# Verify XML does not exist in temp directory
temp_xml_path = f"{temp_hdf_path}.xml"
assert not os.path.exists(temp_xml_path), "XML file should not exist"

# Create item from HDF only - should extract metadata from HDF
item = stactools.modis.stac.create_item(temp_hdf_path)

# Verify item was created with correct metadata
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These look like auto-gen comments, again I generally prefer to let the code speak for itself.

Suggested change
# Copy only the HDF file (not the XML) to ensure XML is not available
temp_hdf_path = os.path.join(temporary_directory, hdf_file)
shutil.copyfile(source_hdf_path, temp_hdf_path)
# Verify XML does not exist in temp directory
temp_xml_path = f"{temp_hdf_path}.xml"
assert not os.path.exists(temp_xml_path), "XML file should not exist"
# Create item from HDF only - should extract metadata from HDF
item = stactools.modis.stac.create_item(temp_hdf_path)
# Verify item was created with correct metadata
temp_hdf_path = os.path.join(temporary_directory, hdf_file)
shutil.copyfile(source_hdf_path, temp_hdf_path)
temp_xml_path = f"{temp_hdf_path}.xml"
assert not os.path.exists(temp_xml_path), "XML file should not exist"
item = stactools.modis.stac.create_item(temp_hdf_path)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

assert item is not None
assert item.id.startswith("MOD10A2.A2022033.h09v05")
assert "hdf" in item.assets
assert "metadata" not in item.assets # XML asset should not be present
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
assert "metadata" not in item.assets # XML asset should not be present
assert "metadata" not in item.assets

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

item.validate()


@pytest.mark.parametrize("file_name", PROJECTION_EDGE_FILES)
def test_raster_footprint_at_projection_edge(file_name: str) -> None:
path = test_data.get_path(file_name)
Expand Down
Loading