-
Notifications
You must be signed in to change notification settings - Fork 4
Make XML metadata optional, extract from HDF if XML not found #97
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,13 +1,16 @@ | ||
| import datetime | ||
| import os.path | ||
| import warnings | ||
| from dataclasses import dataclass | ||
| from typing import Any, Callable, Dict, List, Optional, Tuple | ||
|
|
||
| import fsspec | ||
| import numpy as np | ||
| import rasterio | ||
| from lxml import etree | ||
| from rasterio import Affine | ||
| from rasterio.crs import CRS | ||
| from rasterio.errors import NotGeoreferencedWarning | ||
| from shapely.geometry import shape | ||
| from stactools.core.io import ReadHrefModifier | ||
| from stactools.core.io.xml import XmlElement | ||
|
|
@@ -232,6 +235,8 @@ def from_cog_tags(cls, cog_tags: Dict[str, str]) -> "Metadata": | |
| geometry, bbox = cls._geometry_and_bbox( | ||
| collection, horizontal_tile, vertical_tile | ||
| ) | ||
| qa_percent = cog_tags.get("QAPERCENTNOTPRODUCEDCLOUD") | ||
| qa_percent_not_produced_cloud = int(qa_percent) if qa_percent else None | ||
| return Metadata( | ||
| id=os.path.splitext(cog_tags["LOCALGRANULEID"])[0], | ||
| product=product, | ||
|
|
@@ -242,16 +247,42 @@ def from_cog_tags(cls, cog_tags: Dict[str, str]) -> "Metadata": | |
| end_datetime=end_datetime, | ||
| created=None, | ||
| updated=None, | ||
| qa_percent_not_produced_cloud=int(cog_tags["QAPERCENTNOTPRODUCEDCLOUD"]), | ||
| qa_percent_not_produced_cloud=qa_percent_not_produced_cloud, | ||
| qa_percent_cloud_cover=None, | ||
| horizontal_tile=horizontal_tile, | ||
| vertical_tile=vertical_tile, | ||
| tile_id=cog_tags["TileID"], | ||
| tile_id=cog_tags.get("TileID", ""), | ||
| platforms=sorted(list(platforms)), | ||
| instruments=sorted(list(instruments)), | ||
| collection=collection, | ||
| ) | ||
|
|
||
| @classmethod | ||
| def from_hdf_href( | ||
| cls, href: str, read_href_modifier: Optional[ReadHrefModifier] = None | ||
| ) -> "Metadata": | ||
| """Reads metadata from an HDF file when XML is not available. | ||
|
|
||
| Args: | ||
| href (str): The href of the HDF file | ||
| read_href_modifier (Optional[Callable[[str], str]]): Optional | ||
| function to modify the read href | ||
|
|
||
| Returns: | ||
| Metadata: Information that will map to Item attributes. | ||
| """ | ||
| if read_href_modifier: | ||
| read_href = read_href_modifier(href) | ||
| else: | ||
| read_href = href | ||
|
|
||
| with warnings.catch_warnings(): | ||
| warnings.simplefilter("ignore", category=NotGeoreferencedWarning) | ||
| with rasterio.open(read_href) as dataset: | ||
| hdf_tags = dataset.tags() | ||
|
||
|
|
||
| return cls.from_cog_tags(hdf_tags) | ||
|
|
||
| @property | ||
| def datetime(self) -> Optional[datetime.datetime]: | ||
| """Returns a single nominal datetime for this metadata file. | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -207,6 +207,35 @@ def test_raster_footprint_geometry() -> None: | |||||||||||||||||||||||||||||||||||
| item.validate() | ||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||
| def test_create_item_from_hdf_without_xml() -> None: | ||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||
| def test_create_item_from_hdf_without_xml() -> None: | |
| def test_create_item_from_hdf_without_xml(tmp_path: Path) -> None: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm generally 👎🏼 on docstrings for tests, as I prefer to let the test function name and content speak for themselves.
| """Test that an item can be created from an HDF file when XML is not available. | |
| This tests the fallback to extracting metadata directly from the HDF file | |
| when the accompanying XML metadata file is not present. | |
| """ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, they're auto-generated comments and I have removed them.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These look like auto-gen comments, again I generally prefer to let the code speak for itself.
| # Copy only the HDF file (not the XML) to ensure XML is not available | |
| temp_hdf_path = os.path.join(temporary_directory, hdf_file) | |
| shutil.copyfile(source_hdf_path, temp_hdf_path) | |
| # Verify XML does not exist in temp directory | |
| temp_xml_path = f"{temp_hdf_path}.xml" | |
| assert not os.path.exists(temp_xml_path), "XML file should not exist" | |
| # Create item from HDF only - should extract metadata from HDF | |
| item = stactools.modis.stac.create_item(temp_hdf_path) | |
| # Verify item was created with correct metadata | |
| temp_hdf_path = os.path.join(temporary_directory, hdf_file) | |
| shutil.copyfile(source_hdf_path, temp_hdf_path) | |
| temp_xml_path = f"{temp_hdf_path}.xml" | |
| assert not os.path.exists(temp_xml_path), "XML file should not exist" | |
| item = stactools.modis.stac.create_item(temp_hdf_path) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| assert "metadata" not in item.assets # XML asset should not be present | |
| assert "metadata" not in item.assets |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
Uh oh!
There was an error while loading. Please reload this page.