diff --git a/.gitignore b/.gitignore index da9fde3..0cd82b6 100644 --- a/.gitignore +++ b/.gitignore @@ -130,3 +130,7 @@ dmypy.json # Pyre type checker .pyre/ + +# PyCharm +.idea/ + diff --git a/src/ingest_validation_tests/segmentation_mask_imagesize_validation.py b/src/ingest_validation_tests/segmentation_mask_imagesize_validation.py new file mode 100644 index 0000000..682ce73 --- /dev/null +++ b/src/ingest_validation_tests/segmentation_mask_imagesize_validation.py @@ -0,0 +1,88 @@ +from pathlib import Path +from typing import List, Optional, Union + +import tifffile +import xmlschema +from ingest_validation_tools.plugin_validator import Validator +from tests_utils import GetParentData + + +def get_ometiff_size(file) -> Union[str, dict]: + try: + tf = tifffile.TiffFile(file) + xml_document = xmlschema.XmlDocument(tf.ome_metadata) + if xml_document.schema and not xml_document.schema.is_valid(xml_document): + return f"{file} is not a valid OME.TIFF file" + except Exception as excp: + return f"{file} is not a valid OME.TIFF file: {excp}" + xml_image_data = xml_document.schema.to_dict(xml_document).get("Image")[0].get("Pixels") + try: + rst = { + "X": xml_image_data.get("@PhysicalSizeX"), + "XUnits": xml_image_data.get("@PhysicalSizeXUnits"), + "Y": xml_image_data.get("@PhysicalSizeY"), + "YUnits": xml_image_data.get("@PhysicalSizeYUnits"), + "Z": xml_image_data.get("@PhysicalSizeZ"), + "ZUnits": xml_image_data.get("@PhysicalSizeZUnits"), + } + return rst + except Exception as excp: + return f"{file} is not a valid OME.TIFF file: {excp}" + + +class ImageSizeValidator(Validator): + description = "Check dataset and parent image size so they can be matched in the visualization" + cost = 1.0 + version = "1.0" + required = "segmentation mask" + files_to_find = [ + "**/segmentation_masks/*.ome.tif", + "**/segmentation_masks/*.ome.tiff", + "**/segmentation_masks/*.OME.TIFF", + "**/segmentation_masks/*.OME.TIF", + ] + + def collect_errors(self, **kwargs) -> List[Optional[str]]: + del kwargs + print("Validating Image/SegMask sizes") + if self.required not in self.contains and self.assay_type.lower() != self.required: + return [] # We only test Segmentation Masks + files_tested = None + output = [] + filenames_to_test = [] + parent_filenames_to_test = [] + try: + for row in self.metadata_tsv.rows: + data_path = Path(row["data_path"]) + if not data_path.is_absolute(): + data_path = Path(self.paths[0]).parent / data_path + + for glob_expr in self.files_to_find: + for file in data_path.glob(glob_expr): + filenames_to_test.append(file) + + for file in Path( + GetParentData( + row["parent_dataset_id"], self.token, self.app_context + ).get_path() + ).glob(glob_expr): + parent_filenames_to_test.append(file) + + assert len(filenames_to_test) == 1, "Too many or too few files Mask" + assert len(parent_filenames_to_test) == 1, "Too many or too few files Base Images" + + segmentation_mask_size = get_ometiff_size(filenames_to_test[0]) + base_image_size = get_ometiff_size(parent_filenames_to_test[0]) + assert ( + segmentation_mask_size == base_image_size + ), "Files and base image size do not match" + + except AssertionError as exep: + output.append(str(exep)) + + if output: + return output + elif files_tested: + return [None] + else: + return [] diff --git a/src/ingest_validation_tests/tests_utils.py b/src/ingest_validation_tests/tests_utils.py new file mode 100644 index 0000000..f367f54 --- /dev/null +++ b/src/ingest_validation_tests/tests_utils.py @@ -0,0 +1,38 @@ +import requests + + +class GetParentData: + def __init__(self, hubmap_id, globus_token, app_context): + self.hubmap_id = hubmap_id + self.token = globus_token + self.app_context = app_context + + def __get_uuid(self) -> None: + url = self.app_context.get("uuid_url") + self.hubmap_id + headers = self.app_context.get("request_headers", {}) + headers.update({"Authorization": "Bearer " + self.token}) + try: + response = requests.get(url, headers=headers) + response.raise_for_status() + self.uuid = response.json().get("uuid") + except requests.exceptions.HTTPError as err: + self.uuid = None + print(f"Error: {err}") + + def get_path(self) -> str: + self.__get_uuid() + if self.uuid is not None: + url = ( + self.app_context.get("ingest_url") + + "datasets/" + + self.uuid + + "/file-system-abs-path" + ) + headers = self.app_context.get("request_headers", {}) + try: + response = requests.get(url, headers=headers) + response.raise_for_status() + return response.json().get("path") + except requests.exceptions.HTTPError as err: + print(f"Error: {err}") + return ""