hubmapconsortium · sunset666 · Aug 22, 2024 · Aug 22, 2024 · Aug 22, 2024 · Aug 22, 2024
diff --git a/.gitignore b/.gitignore
@@ -130,3 +130,7 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+# PyCharm
+.idea/
+
diff --git a/src/ingest_validation_tests/segmentation_mask_imagesize_validation.py b/src/ingest_validation_tests/segmentation_mask_imagesize_validation.py
@@ -0,0 +1,87 @@
+from pathlib import Path
+from typing import List, Optional, Union
+
+import tifffile
+import xmlschema
+from ingest_validation_tools.plugin_validator import Validator
+from utils import GetParentData
+
+
+def get_ometiff_size(file) -> Union[str, dict]:
+    try:
+        tf = tifffile.TiffFile(file)
+        xml_document = xmlschema.XmlDocument(tf.ome_metadata)
+        if xml_document.schema and not xml_document.schema.is_valid(xml_document):
+            return f"{file} is not a valid OME.TIFF file"
+    except Exception as excp:
+        return f"{file} is not a valid OME.TIFF file: {excp}"
+    xml_image_data = xml_document.schema.to_dict(xml_document).get("Image")[0].get("Pixels")
+    try:
+        rst = {
+            "X": xml_image_data.get("@PhysicalSizeX"),
+            "XUnits": xml_image_data.get("@PhysicalSizeXUnits"),
+            "Y": xml_image_data.get("@PhysicalSizeY"),
+            "YUnits": xml_image_data.get("@PhysicalSizeYUnits"),
+            "Z": xml_image_data.get("@PhysicalSizeZ"),
+            "ZUnits": xml_image_data.get("@PhysicalSizeZUnits"),
+        }
+        return rst
+    except Exception as excp:
+        return f"{file} is not a valid OME.TIFF file: {excp}"
+
+
+class ImageSizeValidator(Validator):
+    description = "Check dataset and parent image size so they can be matched in the visualization"
+    cost = 1.0
+    version = "1.0"
+    required = "segmentation_mask"
+    files_to_find = [
+        "**/*.ome.tif",
+        "**/*.ome.tiff",
+        "**/*.OME.TIFF",
+        "**/*.OME.TIF",
+    ]
+
+    def collect_errors(self, **kwargs) -> List[Optional[str]]:
+        del kwargs
+        if self.required not in self.contains and self.assay_type.lower() != self.required:
+            return []  # We only test Segmentation Masks
+        files_tested = None
+        output = []
+        filenames_to_test = []
+        parent_filenames_to_test = []
+        try:
+            for row in self.metadata_tsv.rows:
+                data_path = Path(row["data_path"])
+                if not data_path.is_absolute():
+                    data_path = Path(self.paths[0]).parent / data_path
+
+                for glob_expr in self.files_to_find:
+                    for file in data_path.glob(glob_expr):
+                        filenames_to_test.append(file)
+
+                    for file in Path(
+                        GetParentData(
+                            row["parent_dataset_id"], self.globus_token, self.app_context
+                        ).get_path()
+                    ).glob(glob_expr):
+                        parent_filenames_to_test.append(file)
+
+                assert len(filenames_to_test) != 1, "Too many or too few files Mask"
+                assert len(parent_filenames_to_test) != 1, "Too many or too few files Base Images"
+
+                segmentation_mask_size = get_ometiff_size(filenames_to_test[0])
+                base_image_size = get_ometiff_size(parent_filenames_to_test[0])
+                assert (
+                    segmentation_mask_size == base_image_size
+                ), "Files and base image size do not match"
+
+        except AssertionError as exep:
+            output.append(str(exep))
+
+        if output:
+            return output
+        elif files_tested:
+            return [None]
+        else:
+            return []
diff --git a/src/ingest_validation_tests/utils.py b/src/ingest_validation_tests/utils.py
@@ -0,0 +1,38 @@
+import requests
+
+
+class GetParentData:
+    def __init__(self, hubmap_id, globus_token, app_context):
+        self.hubmap_id = hubmap_id
+        self.token = globus_token
+        self.app_context = app_context
+
+    def __get_uuid(self) -> None:
+        url = self.app_context.get("uuid_url") + self.hubmap_id
+        headers = self.app_context.get("request_headers", {})
+        headers({"Authorization": "Bearer " + self.token})
+        try:
+            response = requests.get(url, headers=headers)
+            response.raise_for_status()
+            self.uuid = response.json().get("uuid")
+        except requests.exceptions.HTTPError as err:
+            self.uuid = None
+            print(f"Error: {err}")
+
+    def get_path(self) -> str:
+        self.__get_uuid()
+        if self.uuid is not None:
+            url = (
+                self.app_context.get("ingest_url")
+                + "datasets/"
+                + self.uuid
+                + "/file-system-abs-path"
+            )
+            headers = self.app_context.get("request_headers", {})
+            try:
+                response = requests.get(url, headers=headers)
+                response.raise_for_status()
+                return response.json().get("path")
+            except requests.exceptions.HTTPError as err:
+                print(f"Error: {err}")
+        return ""